diff options
| author | Charlie Stanton <charlie@shtanton.xyz> | 2023-04-19 14:34:22 +0100 | 
|---|---|---|
| committer | Charlie Stanton <charlie@shtanton.xyz> | 2023-04-19 14:34:22 +0100 | 
| commit | 10f847acc7087317b0fbe20b7cf3307a0fafab8a (patch) | |
| tree | 4abf2f4009fcac55013672e841b2f9d3a2b2fb52 | |
| parent | 5089fe689f17a3489b6be76588b8fc7f93d70e55 (diff) | |
| download | stred-go-10f847acc7087317b0fbe20b7cf3307a0fafab8a.tar | |
Changes the parsing API for subex to be more suitable to being part of a larger program
| -rw-r--r-- | subex/lex.go | 16 | ||||
| -rw-r--r-- | subex/main.go | 7 | ||||
| -rw-r--r-- | subex/parse.go | 91 | 
3 files changed, 60 insertions, 54 deletions
| diff --git a/subex/lex.go b/subex/lex.go index f020b23..74bf370 100644 --- a/subex/lex.go +++ b/subex/lex.go @@ -5,11 +5,11 @@ import (  )  const eof rune = -1 -type RuneReader struct { +type StringRuneReader struct {  	input string  	pos, width int  } -func (l *RuneReader) next() rune { +func (l *StringRuneReader) Next() rune {  	if l.pos >= len(l.input) {  		l.width = 0  		return eof @@ -19,16 +19,6 @@ func (l *RuneReader) next() rune {  	l.pos += l.width  	return r  } -func (l *RuneReader) accept(chars string) bool { -	r := l.next() -	for _, char := range chars { -		if char == r { -			return true -		} -	} -	l.rewind() -	return false -} -func (l *RuneReader) rewind() { +func (l *StringRuneReader) Rewind() {  	l.pos -= l.width  } diff --git a/subex/main.go b/subex/main.go index 091625b..9824f10 100644 --- a/subex/main.go +++ b/subex/main.go @@ -131,7 +131,12 @@ func Main() {  		panic("Expected: program [subex]")  	}  	program := os.Args[1] -	ast := Parse(program) +	reader := &StringRuneReader { +		input: program, +		pos: 0, +		width: 0, +	} +	ast := Parse(reader)  	transducer := CompileTransducer(ast)  	stdin := bufio.NewReader(os.Stdin); diff --git a/subex/parse.go b/subex/parse.go index d6ef995..e6efc2e 100644 --- a/subex/parse.go +++ b/subex/parse.go @@ -4,8 +4,24 @@ import (  	"main/walk"  ) -func expectBracket(l *RuneReader, ifLeft walk.Atom, ifRight walk.Atom) walk.Atom { -	switch l.next() { +type RuneReader interface { +	Next() rune +	Rewind() +} + +func accept(l RuneReader, chars string) bool { +	r := l.Next() +	for _, char := range chars { +		if char == r { +			return true +		} +	} +	l.Rewind() +	return false +} + +func expectBracket(l RuneReader, ifLeft walk.Atom, ifRight walk.Atom) walk.Atom { +	switch l.Next() {  		case '(':  			return ifLeft  		case ')': @@ -16,7 +32,7 @@ func expectBracket(l *RuneReader, ifLeft walk.Atom, ifRight walk.Atom) walk.Atom  }  // Having just read termType, read in a bracket and return the corresponding Atom -func parseTerminatorAtomLiteral(termType rune, l *RuneReader) walk.Atom { +func parseTerminatorAtomLiteral(termType rune, l RuneReader) walk.Atom {  	switch termType {  		case '@':  			return expectBracket(l, walk.ArrayBegin, walk.ArrayEnd) @@ -34,41 +50,41 @@ func charIsDigit(c rune) bool {  }  // Parse a positive integer, reads digits 0-9 and stops at the first non-digit -func parseInt(l *RuneReader) (output int) { +func parseInt(l RuneReader) (output int) {  	for { -		char := l.next() +		char := l.Next()  		if charIsDigit(char) {  			output = output * 10 + int(char - '0')  		} else {  			break  		}  	} -	l.rewind() +	l.Rewind()  	return output  }  // Having just read {, read in and parse the range contents -func parseRepeatRange(l *RuneReader) (output []ConvexRange) { +func parseRepeatRange(l RuneReader) (output []ConvexRange) {  	loop: for {  		var start, end int -		char := l.next() -		l.rewind() +		char := l.Next() +		l.Rewind()  		if char == '-' {  			start = -1  		} else {  			start = parseInt(l)  		} -		switch l.next() { +		switch l.Next() {  			case ',':  				output = append(output, ConvexRange{start, start})  				continue loop  			case '-': -				char := l.next() +				char := l.Next()  				if charIsDigit(char) { -					l.rewind() +					l.Rewind()  					end = parseInt(l)  				} else { -					l.rewind() +					l.Rewind()  					end = -1  				}  			case '}': @@ -77,7 +93,7 @@ func parseRepeatRange(l *RuneReader) (output []ConvexRange) {  			default:  				panic("Invalid character in repeat specifier")  		} -		switch l.next() { +		switch l.Next() {  			case ',':  				output = append(output, ConvexRange{start, end})  				continue loop @@ -91,17 +107,17 @@ func parseRepeatRange(l *RuneReader) (output []ConvexRange) {  	return output  } -func parseReplacement(l *RuneReader) (output []OutputContent) { +func parseReplacement(l RuneReader) (output []OutputContent) {  	// TODO escaping  	loop: for { -		r := l.next() +		r := l.Next()  		switch r {  			case eof:  				panic("Missing closing \"")  			case '"':  				break loop  			case '$': -				slot := l.next() +				slot := l.Next()  				if slot == eof {  					panic("Missing slot character")  				} @@ -116,13 +132,13 @@ func parseReplacement(l *RuneReader) (output []OutputContent) {  }  // Parse the contents of a range subex [] into a map -func parseRangeSubex(l *RuneReader) map[walk.Atom]walk.Atom { +func parseRangeSubex(l RuneReader) map[walk.Atom]walk.Atom {  	// TODO escaping  	parts := make(map[walk.Atom]walk.Atom)  	var froms []walk.Atom  	var hasTo bool  	for { -		fromsStart := l.next() +		fromsStart := l.Next()  		if fromsStart == ']' {  			hasTo = false  			break @@ -136,10 +152,10 @@ func parseRangeSubex(l *RuneReader) map[walk.Atom]walk.Atom {  				continue  			}  		} -		if l.accept("-") { -			fromsEnd := l.next() +		if accept(l, "-") { +			fromsEnd := l.Next()  			if fromsEnd == ']' || fromsEnd == '=' { -				l.rewind() +				l.Rewind()  				fromsEnd = fromsStart  			}  			for i := fromsStart; i <= fromsEnd; i += 1 { @@ -156,7 +172,7 @@ func parseRangeSubex(l *RuneReader) map[walk.Atom]walk.Atom {  	var tos []walk.Atom  	if hasTo {  		for { -			tosStart := l.next() +			tosStart := l.Next()  			if tosStart == ']' {  				break  			} else { @@ -166,10 +182,10 @@ func parseRangeSubex(l *RuneReader) map[walk.Atom]walk.Atom {  					continue  				}  			} -			if l.accept("-") { -				tosEnd := l.next() +			if accept(l, "-") { +				tosEnd := l.Next()  				if tosEnd == ']' { -					l.rewind() +					l.Rewind()  					tosEnd = tosStart  				}  				for i := tosStart; i <= tosEnd; i += 1 { @@ -192,22 +208,22 @@ func parseRangeSubex(l *RuneReader) map[walk.Atom]walk.Atom {  	return parts  } -func parseSubex(l *RuneReader, minPower int) SubexAST { +func parseSubex(l RuneReader, minPower int) SubexAST {  	var lhs SubexAST -	r := l.next() +	r := l.Next()  	switch r {  		case eof:  			return nil  		case '(':  			lhs = parseSubex(l, 0) -			if !l.accept(")") { +			if !accept(l, ")") {  				panic("Missing matching )")  			}  		case '[':  			rangeParts := parseRangeSubex(l)  			lhs = SubexASTRange {rangeParts}  		case ')', '|', ';', '{', '+', '$': -			l.rewind() +			l.Rewind()  			return nil  		case '"':  			replacement := parseReplacement(l) @@ -227,7 +243,7 @@ func parseSubex(l *RuneReader, minPower int) SubexAST {  				continue loop  			}  		} -		r := l.next() +		r := l.Next()  		switch {  			case r == '{' && minPower <= 8:  				lhs = SubexASTRepeat { @@ -245,7 +261,7 @@ func parseSubex(l *RuneReader, minPower int) SubexAST {  			case r == '!' && minPower <= 8:  				lhs = SubexASTNot {lhs}  			case r == '$' && minPower <= 8: -				slot := l.next() +				slot := l.Next()  				if slot == eof {  					panic("Missing slot character")  				} @@ -269,18 +285,13 @@ func parseSubex(l *RuneReader, minPower int) SubexAST {  					delimiter: rhs,  				}  			default: -				l.rewind() +				l.Rewind()  				break loop  		}  	}  	return lhs  } -func Parse(input string) SubexAST { -	l := RuneReader { -		input: input, -		pos: 0, -		width: 0, -	} -	return parseSubex(&l, 0) +func Parse(l RuneReader) SubexAST { +	return parseSubex(l, 0)  } | 
