diff options
author | Charlie Stanton <charlie@shtanton.xyz> | 2022-09-21 21:05:34 +0100 |
---|---|---|
committer | Charlie Stanton <charlie@shtanton.xyz> | 2022-09-21 21:05:34 +0100 |
commit | 0a8690993d572a50b95dd4f1c1903ed00ddb9c2b (patch) | |
tree | 2ab207544c88ff19308e22c8b79c3ea349c97faa /main/parse.go | |
download | subex-0a8690993d572a50b95dd4f1c1903ed00ddb9c2b.tar |
Initial commit
Parses and executes substitute expressions (subexes)
So far subex has the following operations:
- Concatenation of a and b with ab
- Or with |
- Repeat maximally with *
- Repeat minimally with -
- Copy a specific character 'a'
- Copy any character '.'
- Store text matching a regex into slot 's': `$s(regex)`
- Output text in "" including loading from slots with '$'
Regexes support all the same operations as subexes minus storing and outputting
This first implementation gives very little thought to efficiency
Example:
./main 'according to all known laws of aviation' '$1(.-)$m(( .* )| ).*"$m$1"'
This swaps the first and last words of the input string
Diffstat (limited to 'main/parse.go')
-rw-r--r-- | main/parse.go | 141 |
1 files changed, 141 insertions, 0 deletions
diff --git a/main/parse.go b/main/parse.go new file mode 100644 index 0000000..a9bd4b5 --- /dev/null +++ b/main/parse.go @@ -0,0 +1,141 @@ +package main + +func parseReplacement(l *RuneReader) (output []TransducerOutput) { + loop: for { + r := l.next() + switch r { + case eof: + panic("Missing closing \"") + case '"': + break loop + case '$': + slot := l.next() + if slot == eof { + panic("Missing slot character") + } + output = append(output, TransducerReplacementLoad(slot)) + default: + output = append(output, TransducerReplacementRune(r)) + } + } + return output +} + +func parseRegex(l *RuneReader, minPower int) RegexAST { + var lhs RegexAST + r := l.next() + switch r { + case eof: + return nil + case ')', '*', '-', '|': + l.rewind() + return nil + case '(': + lhs = parseRegex(l, 0) + if !l.accept(")") { + panic("Missing matching )") + } + case '.': + lhs = RegexASTAny{} + default: + lhs = RegexASTRune(r) + } + loop: for { + if minPower <= 0 { + next := parseRegex(l, 1) + if next != nil { + lhs = RegexASTConcat{lhs, next} + continue loop + } + } + r := l.next() + switch { + case r == '*' && minPower <= 4: + lhs = RegexASTMaximise{lhs} + case r == '-' && minPower <= 4: + lhs = RegexASTMinimise{lhs} + case r == '|' && minPower <= 2: + rhs := parseRegex(l, 3) + if rhs == nil { + panic("Missing regex after |") + } + lhs = RegexASTOr{lhs, rhs} + default: + l.rewind() + break loop + } + } + return lhs +} + +func parseSubex(l *RuneReader, minPower int) SubexAST { + var lhs SubexAST + r := l.next() + switch r { + case eof: + return nil + case '(': + lhs = parseSubex(l, 0) + if !l.accept(")") { + panic("Missing matching )") + } + case ')', '*', '-', '|': + l.rewind() + return nil + case '$': + slot := l.next() + if slot == eof { + panic("Missing slot character") + } + match := parseRegex(l, 100) + if match == nil { + panic("Missing regex for store") + } + lhs = SubexASTStore{ + match: match, + slot: slot, + } + case '"': + replacement := parseReplacement(l) + lhs = SubexASTOutput{replacement} + case '.': + lhs = SubexASTCopyAny{} + default: + lhs = SubexASTCopyRune(r) + } + loop: for { + if minPower <= 0 { + next := parseSubex(l, 1) + if next != nil { + lhs = SubexASTConcat{lhs, next} + continue loop + } + } + r := l.next() + switch { + case r == '*' && minPower <= 4: + lhs = SubexASTMaximise{lhs} + case r == '-' && minPower <= 4: + lhs = SubexASTMinimise{lhs} + case r == '|' && minPower <= 2: + rhs := parseSubex(l, 3) + if rhs == nil { + panic("Missing subex after |") + } + lhs = SubexASTOr{lhs, rhs} + default: + l.rewind() + break loop + } + } + return lhs +} + +func parse(input string) SubexAST { + l := RuneReader { + input: input, + pos: 0, + width: 0, + } + return parseSubex(&l, 0) +} |