From 0a8690993d572a50b95dd4f1c1903ed00ddb9c2b Mon Sep 17 00:00:00 2001 From: Charlie Stanton Date: Wed, 21 Sep 2022 21:05:34 +0100 Subject: Initial commit Parses and executes substitute expressions (subexes) So far subex has the following operations: - Concatenation of a and b with ab - Or with | - Repeat maximally with * - Repeat minimally with - - Copy a specific character 'a' - Copy any character '.' - Store text matching a regex into slot 's': `$s(regex)` - Output text in "" including loading from slots with '$' Regexes support all the same operations as subexes minus storing and outputting This first implementation gives very little thought to efficiency Example: ./main 'according to all known laws of aviation' '$1(.-)$m(( .* )| ).*"$m$1"' This swaps the first and last words of the input string --- main/parse.go | 141 ++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 141 insertions(+) create mode 100644 main/parse.go (limited to 'main/parse.go') diff --git a/main/parse.go b/main/parse.go new file mode 100644 index 0000000..a9bd4b5 --- /dev/null +++ b/main/parse.go @@ -0,0 +1,141 @@ +package main + +func parseReplacement(l *RuneReader) (output []TransducerOutput) { + loop: for { + r := l.next() + switch r { + case eof: + panic("Missing closing \"") + case '"': + break loop + case '$': + slot := l.next() + if slot == eof { + panic("Missing slot character") + } + output = append(output, TransducerReplacementLoad(slot)) + default: + output = append(output, TransducerReplacementRune(r)) + } + } + return output +} + +func parseRegex(l *RuneReader, minPower int) RegexAST { + var lhs RegexAST + r := l.next() + switch r { + case eof: + return nil + case ')', '*', '-', '|': + l.rewind() + return nil + case '(': + lhs = parseRegex(l, 0) + if !l.accept(")") { + panic("Missing matching )") + } + case '.': + lhs = RegexASTAny{} + default: + lhs = RegexASTRune(r) + } + loop: for { + if minPower <= 0 { + next := parseRegex(l, 1) + if next != nil { + lhs = RegexASTConcat{lhs, next} + continue loop + } + } + r := l.next() + switch { + case r == '*' && minPower <= 4: + lhs = RegexASTMaximise{lhs} + case r == '-' && minPower <= 4: + lhs = RegexASTMinimise{lhs} + case r == '|' && minPower <= 2: + rhs := parseRegex(l, 3) + if rhs == nil { + panic("Missing regex after |") + } + lhs = RegexASTOr{lhs, rhs} + default: + l.rewind() + break loop + } + } + return lhs +} + +func parseSubex(l *RuneReader, minPower int) SubexAST { + var lhs SubexAST + r := l.next() + switch r { + case eof: + return nil + case '(': + lhs = parseSubex(l, 0) + if !l.accept(")") { + panic("Missing matching )") + } + case ')', '*', '-', '|': + l.rewind() + return nil + case '$': + slot := l.next() + if slot == eof { + panic("Missing slot character") + } + match := parseRegex(l, 100) + if match == nil { + panic("Missing regex for store") + } + lhs = SubexASTStore{ + match: match, + slot: slot, + } + case '"': + replacement := parseReplacement(l) + lhs = SubexASTOutput{replacement} + case '.': + lhs = SubexASTCopyAny{} + default: + lhs = SubexASTCopyRune(r) + } + loop: for { + if minPower <= 0 { + next := parseSubex(l, 1) + if next != nil { + lhs = SubexASTConcat{lhs, next} + continue loop + } + } + r := l.next() + switch { + case r == '*' && minPower <= 4: + lhs = SubexASTMaximise{lhs} + case r == '-' && minPower <= 4: + lhs = SubexASTMinimise{lhs} + case r == '|' && minPower <= 2: + rhs := parseSubex(l, 3) + if rhs == nil { + panic("Missing subex after |") + } + lhs = SubexASTOr{lhs, rhs} + default: + l.rewind() + break loop + } + } + return lhs +} + +func parse(input string) SubexAST { + l := RuneReader { + input: input, + pos: 0, + width: 0, + } + return parseSubex(&l, 0) +} -- cgit v1.2.3