diff options
| author | Charlie Stanton <charlie@shtanton.xyz> | 2022-09-21 21:05:34 +0100 | 
|---|---|---|
| committer | Charlie Stanton <charlie@shtanton.xyz> | 2022-09-21 21:05:34 +0100 | 
| commit | 0a8690993d572a50b95dd4f1c1903ed00ddb9c2b (patch) | |
| tree | 2ab207544c88ff19308e22c8b79c3ea349c97faa /main/parse.go | |
| download | subex-0a8690993d572a50b95dd4f1c1903ed00ddb9c2b.tar | |
Initial commit
Parses and executes substitute expressions (subexes)
So far subex has the following operations:
- Concatenation of a and b with ab
- Or with |
- Repeat maximally with *
- Repeat minimally with -
- Copy a specific character 'a'
- Copy any character '.'
- Store text matching a regex into slot 's': `$s(regex)`
- Output text in "" including loading from slots with '$'
Regexes support all the same operations as subexes minus storing and outputting
This first implementation gives very little thought to efficiency
Example:
./main 'according to all known laws of aviation' '$1(.-)$m(( .* )| ).*"$m$1"'
This swaps the first and last words of the input string
Diffstat (limited to 'main/parse.go')
| -rw-r--r-- | main/parse.go | 141 | 
1 files changed, 141 insertions, 0 deletions
| diff --git a/main/parse.go b/main/parse.go new file mode 100644 index 0000000..a9bd4b5 --- /dev/null +++ b/main/parse.go @@ -0,0 +1,141 @@ +package main + +func parseReplacement(l *RuneReader) (output []TransducerOutput) { +	loop: for { +		r := l.next() +		switch r { +			case eof: +				panic("Missing closing \"") +			case '"': +				break loop +			case '$': +				slot := l.next() +				if slot == eof { +					panic("Missing slot character") +				} +				output = append(output, TransducerReplacementLoad(slot)) +			default: +				output = append(output, TransducerReplacementRune(r)) +		} +	} +	return output +} + +func parseRegex(l *RuneReader, minPower int) RegexAST { +	var lhs RegexAST +	r := l.next() +	switch r { +		case eof: +			return nil +		case ')', '*', '-', '|': +			l.rewind() +			return nil +		case '(': +			lhs = parseRegex(l, 0) +			if !l.accept(")") { +				panic("Missing matching )") +			} +		case '.': +			lhs = RegexASTAny{} +		default: +			lhs = RegexASTRune(r) +	} +	loop: for { +		if minPower <= 0 { +			next := parseRegex(l, 1) +			if next != nil { +				lhs = RegexASTConcat{lhs, next} +				continue loop +			} +		} +		r := l.next() +		switch { +			case r == '*' && minPower <= 4: +				lhs = RegexASTMaximise{lhs} +			case r == '-' && minPower <= 4: +				lhs = RegexASTMinimise{lhs} +			case r == '|' && minPower <= 2: +				rhs := parseRegex(l, 3) +				if rhs == nil { +					panic("Missing regex after |") +				} +				lhs = RegexASTOr{lhs, rhs} +			default: +				l.rewind() +				break loop +		} +	} +	return lhs +} + +func parseSubex(l *RuneReader, minPower int) SubexAST { +	var lhs SubexAST +	r := l.next() +	switch r { +		case eof: +			return nil +		case '(': +			lhs = parseSubex(l, 0) +			if !l.accept(")") { +				panic("Missing matching )") +			} +		case ')', '*', '-', '|': +			l.rewind() +			return nil +		case '$': +			slot := l.next() +			if slot == eof { +				panic("Missing slot character") +			} +			match := parseRegex(l, 100) +			if match == nil { +				panic("Missing regex for store") +			} +			lhs = SubexASTStore{ +				match: match, +				slot: slot, +			} +		case '"': +			replacement := parseReplacement(l) +			lhs = SubexASTOutput{replacement} +		case '.': +			lhs = SubexASTCopyAny{} +		default: +			lhs = SubexASTCopyRune(r) +	} +	loop: for { +		if minPower <= 0 { +			next := parseSubex(l, 1) +			if next != nil { +				lhs = SubexASTConcat{lhs, next} +				continue loop +			} +		} +		r := l.next() +		switch { +			case r == '*' && minPower <= 4: +				lhs = SubexASTMaximise{lhs} +			case r == '-' && minPower <= 4: +				lhs = SubexASTMinimise{lhs} +			case r == '|' && minPower <= 2: +				rhs := parseSubex(l, 3) +				if rhs == nil { +					panic("Missing subex after |") +				} +				lhs = SubexASTOr{lhs, rhs} +			default: +				l.rewind() +				break loop +		} +	} +	return lhs +} + +func parse(input string) SubexAST { +	l := RuneReader { +		input: input, +		pos: 0, +		width: 0, +	} +	return parseSubex(&l, 0) +} | 
