diff options
| author | Charlie Stanton <charlie@shtanton.xyz> | 2022-09-21 21:05:34 +0100 | 
|---|---|---|
| committer | Charlie Stanton <charlie@shtanton.xyz> | 2022-09-21 21:05:34 +0100 | 
| commit | 0a8690993d572a50b95dd4f1c1903ed00ddb9c2b (patch) | |
| tree | 2ab207544c88ff19308e22c8b79c3ea349c97faa /main | |
| download | subex-0a8690993d572a50b95dd4f1c1903ed00ddb9c2b.tar | |
Initial commit
Parses and executes substitute expressions (subexes)
So far subex has the following operations:
- Concatenation of a and b with ab
- Or with |
- Repeat maximally with *
- Repeat minimally with -
- Copy a specific character 'a'
- Copy any character '.'
- Store text matching a regex into slot 's': `$s(regex)`
- Output text in "" including loading from slots with '$'
Regexes support all the same operations as subexes minus storing and outputting
This first implementation gives very little thought to efficiency
Example:
./main 'according to all known laws of aviation' '$1(.-)$m(( .* )| ).*"$m$1"'
This swaps the first and last words of the input string
Diffstat (limited to 'main')
| -rw-r--r-- | main/lex.go | 34 | ||||
| -rw-r--r-- | main/main.go | 86 | ||||
| -rw-r--r-- | main/parse.go | 141 | ||||
| -rw-r--r-- | main/regexast.go | 72 | ||||
| -rw-r--r-- | main/regexstate.go | 48 | ||||
| -rw-r--r-- | main/subexast.go | 117 | ||||
| -rw-r--r-- | main/subexstate.go | 123 | 
7 files changed, 621 insertions, 0 deletions
| diff --git a/main/lex.go b/main/lex.go new file mode 100644 index 0000000..c08f402 --- /dev/null +++ b/main/lex.go @@ -0,0 +1,34 @@ +package main + +import ( +	"unicode/utf8" +) + +const eof rune = -1 +type RuneReader struct { +	input string +	pos, width int +} +func (l *RuneReader) next() rune { +	if l.pos >= len(l.input) { +		l.width = 0 +		return eof +	} +	var r rune +	r, l.width = utf8.DecodeRuneInString(l.input[l.pos:]) +	l.pos += l.width +	return r +} +func (l *RuneReader) accept(chars string) bool { +	r := l.next() +	for _, char := range chars { +		if char == r { +			return true +		} +	} +	l.rewind() +	return false +} +func (l *RuneReader) rewind() { +	l.pos -= l.width +} diff --git a/main/main.go b/main/main.go new file mode 100644 index 0000000..be43d90 --- /dev/null +++ b/main/main.go @@ -0,0 +1,86 @@ +package main + +import ( +	"os" +	"fmt" +) + +type TransducerOutput interface { +	build(Store) string +} + +type TransducerReplacementRune rune +func (replacement TransducerReplacementRune) build(store Store) string { +	return string(replacement) +} + +type TransducerReplacementLoad rune +func (replacement TransducerReplacementLoad) build(store Store) string { +	return store[rune(replacement)] +} + +type Store map[rune]string +func (store Store) clone() Store { +	newStore := make(Store) +	for key, val := range store { +		newStore[key] = val +	} +	return newStore +} + +func compileTransducer(transducerAst SubexAST) SubexState { +	return transducerAst.compileWith(SubexNoneState{}) +} + +type SubexBranch struct { +	store Store +	state SubexState +	output string +} +func (pair SubexBranch) eat(char rune) []SubexBranch { +	states := pair.state.eat(pair.store, char) +	for i := range states { +		states[i].output = pair.output + states[i].output +	} +	return states +} +func (pair SubexBranch) accepting() []string { +	return pair.state.accepting(pair.store) +} + +func runTransducer(transducer SubexState, input string) (output string, err bool) { +	states := []SubexBranch{{ +		state: transducer, +		output: "", +		store: make(Store), +	}} +	for _, char := range input { +		var newStates []SubexBranch +		for _, state := range states { +			newStates = append(newStates, state.eat(char)...) +		} +		states = newStates +	} +	for _, state := range states { +		outputEnds := state.accepting() +		for _, outputEnd := range outputEnds { +			return state.output + outputEnd, false +		} +	} +	return "", true +} + +func main() { +	if len(os.Args) != 3 { +		panic("Expected: program [input] [subex]") +	} +	input := os.Args[1] +	program := os.Args[2] +	ast := parse(program) +	transducer := compileTransducer(ast) +	output, err := runTransducer(transducer, input) +	if err { +		output = input +	} +	fmt.Println(output) +} diff --git a/main/parse.go b/main/parse.go new file mode 100644 index 0000000..a9bd4b5 --- /dev/null +++ b/main/parse.go @@ -0,0 +1,141 @@ +package main + +func parseReplacement(l *RuneReader) (output []TransducerOutput) { +	loop: for { +		r := l.next() +		switch r { +			case eof: +				panic("Missing closing \"") +			case '"': +				break loop +			case '$': +				slot := l.next() +				if slot == eof { +					panic("Missing slot character") +				} +				output = append(output, TransducerReplacementLoad(slot)) +			default: +				output = append(output, TransducerReplacementRune(r)) +		} +	} +	return output +} + +func parseRegex(l *RuneReader, minPower int) RegexAST { +	var lhs RegexAST +	r := l.next() +	switch r { +		case eof: +			return nil +		case ')', '*', '-', '|': +			l.rewind() +			return nil +		case '(': +			lhs = parseRegex(l, 0) +			if !l.accept(")") { +				panic("Missing matching )") +			} +		case '.': +			lhs = RegexASTAny{} +		default: +			lhs = RegexASTRune(r) +	} +	loop: for { +		if minPower <= 0 { +			next := parseRegex(l, 1) +			if next != nil { +				lhs = RegexASTConcat{lhs, next} +				continue loop +			} +		} +		r := l.next() +		switch { +			case r == '*' && minPower <= 4: +				lhs = RegexASTMaximise{lhs} +			case r == '-' && minPower <= 4: +				lhs = RegexASTMinimise{lhs} +			case r == '|' && minPower <= 2: +				rhs := parseRegex(l, 3) +				if rhs == nil { +					panic("Missing regex after |") +				} +				lhs = RegexASTOr{lhs, rhs} +			default: +				l.rewind() +				break loop +		} +	} +	return lhs +} + +func parseSubex(l *RuneReader, minPower int) SubexAST { +	var lhs SubexAST +	r := l.next() +	switch r { +		case eof: +			return nil +		case '(': +			lhs = parseSubex(l, 0) +			if !l.accept(")") { +				panic("Missing matching )") +			} +		case ')', '*', '-', '|': +			l.rewind() +			return nil +		case '$': +			slot := l.next() +			if slot == eof { +				panic("Missing slot character") +			} +			match := parseRegex(l, 100) +			if match == nil { +				panic("Missing regex for store") +			} +			lhs = SubexASTStore{ +				match: match, +				slot: slot, +			} +		case '"': +			replacement := parseReplacement(l) +			lhs = SubexASTOutput{replacement} +		case '.': +			lhs = SubexASTCopyAny{} +		default: +			lhs = SubexASTCopyRune(r) +	} +	loop: for { +		if minPower <= 0 { +			next := parseSubex(l, 1) +			if next != nil { +				lhs = SubexASTConcat{lhs, next} +				continue loop +			} +		} +		r := l.next() +		switch { +			case r == '*' && minPower <= 4: +				lhs = SubexASTMaximise{lhs} +			case r == '-' && minPower <= 4: +				lhs = SubexASTMinimise{lhs} +			case r == '|' && minPower <= 2: +				rhs := parseSubex(l, 3) +				if rhs == nil { +					panic("Missing subex after |") +				} +				lhs = SubexASTOr{lhs, rhs} +			default: +				l.rewind() +				break loop +		} +	} +	return lhs +} + +func parse(input string) SubexAST { +	l := RuneReader { +		input: input, +		pos: 0, +		width: 0, +	} +	return parseSubex(&l, 0) +} diff --git a/main/regexast.go b/main/regexast.go new file mode 100644 index 0000000..0aab053 --- /dev/null +++ b/main/regexast.go @@ -0,0 +1,72 @@ +package main + +import ( +	"fmt" +) + +type RegexAST interface { +	compileWith(next RegexState) RegexState +} + +type RegexASTRune rune +func (ast RegexASTRune) compileWith(next RegexState) RegexState { +	return RegexRuneState{ +		rune: rune(ast), +		next: next, +	} +} +func (ast RegexASTRune) String() string { +	return string(rune(ast)) +} + +type RegexASTAny struct {} +func (ast RegexASTAny) compileWith(next RegexState) RegexState { +	return RegexAnyState{next} +} +func (ast RegexASTAny) String() string { +	return "." +} + +type RegexASTConcat struct { +	first, second RegexAST +} +func (ast RegexASTConcat) compileWith(next RegexState) RegexState { +	return ast.first.compileWith(ast.second.compileWith(next)) +} +func (ast RegexASTConcat) String() string { +	return fmt.Sprintf("Concat{%v, %v}", ast.first, ast.second) +} + +type RegexASTOr struct { +	first, second RegexAST +} +func (ast RegexASTOr) compileWith(next RegexState) RegexState { +	return RegexGroupState{ +		ast.first.compileWith(next), +		ast.second.compileWith(next), +	} +} + +type RegexASTMaximise struct { +	content RegexAST +} +func (ast RegexASTMaximise) compileWith(next RegexState) RegexState { +	state := &RegexGroupState{ +		nil, +		next, +	} +	state.first = ast.content.compileWith(state) +	return state +} + +type RegexASTMinimise struct { +	content RegexAST +} +func (ast RegexASTMinimise) compileWith(next RegexState) RegexState { +	state := &RegexGroupState{ +		next, +		nil, +	} +	state.second = ast.content.compileWith(state) +	return state +} diff --git a/main/regexstate.go b/main/regexstate.go new file mode 100644 index 0000000..16d5581 --- /dev/null +++ b/main/regexstate.go @@ -0,0 +1,48 @@ +package main + +type RegexState interface { +	eat(char rune) []RegexState +	accepting() bool +} + +type RegexNoneState struct {} +func (state RegexNoneState) eat(char rune) []RegexState { +	return nil +} +func (state RegexNoneState) accepting() bool { +	return true +} + +type RegexAnyState struct { +	next RegexState +} +func (state RegexAnyState) eat(char rune) []RegexState { +	return []RegexState{state.next} +} +func (state RegexAnyState) accepting() bool { +	return false +} + +type RegexRuneState struct { +	rune rune +	next RegexState +} +func (state RegexRuneState) eat(char rune) []RegexState { +	if char == state.rune { +		return []RegexState{state.next} +	} +	return nil +} +func (state RegexRuneState) accepting() bool { +	return false +} + +type RegexGroupState struct { +	first, second RegexState +} +func (state RegexGroupState) eat(char rune) []RegexState { +	return append(state.first.eat(char), state.second.eat(char)...) +} +func (state RegexGroupState) accepting() bool { +	return state.first.accepting() || state.second.accepting() +} diff --git a/main/subexast.go b/main/subexast.go new file mode 100644 index 0000000..7e2f33c --- /dev/null +++ b/main/subexast.go @@ -0,0 +1,117 @@ +package main + +import ( +	"fmt" +) + +type SubexAST interface { +	compileWith(next SubexState) SubexState +} + +type SubexASTConcat struct { +	first, second SubexAST +} +func (ast SubexASTConcat) compileWith(next SubexState) SubexState { +	return ast.first.compileWith(ast.second.compileWith(next)) +} +func (ast SubexASTConcat) String() string { +	return fmt.Sprintf("(%v)(%v)", ast.first, ast.second) +} + +type SubexASTStore struct { +	match RegexAST +	slot rune +} +func (ast SubexASTStore) compileWith(next SubexState) SubexState { +	return SubexStoreState { +		match: ast.match.compileWith(RegexNoneState{}), +		slot: ast.slot, +		next: next, +	} +} +func (ast SubexASTStore) String() string { +	return fmt.Sprintf("$%c(%v)", ast.slot, ast.match) +} + +type SubexASTOr struct { +	first, second SubexAST +} +func (ast SubexASTOr) compileWith(next SubexState) SubexState { +	return SubexGroupState { +		ast.first.compileWith(next), +		ast.second.compileWith(next), +	} +} + +type SubexASTMaximise struct { +	content SubexAST +} +func (ast SubexASTMaximise) compileWith(next SubexState) SubexState { +	state := &SubexGroupState { +		nil, +		next, +	} +	state.first = ast.content.compileWith(state) +	return state +} +func (ast SubexASTMaximise) String() string { +	return fmt.Sprintf("(%v)*", ast.content) +} + +type SubexASTMinimise struct { +	content SubexAST +} +func (ast SubexASTMinimise) compileWith(next SubexState) SubexState { +	state := &SubexGroupState { +		next, +		nil, +	} +	state.second = ast.content.compileWith(state) +	return state +} +func (ast SubexASTMinimise) String() string { +	return fmt.Sprintf("(%v)-", ast.content) +} + +type SubexASTRepeat struct { +	content SubexAST +	min, max int +} +func (ast SubexASTRepeat) compileWith(next SubexState) SubexState { +	for i := ast.min; i < ast.max; i += 1 { +		next = SubexGroupState { +			ast.content.compileWith(next), +			next, +		} +	} +	for i := 0; i < ast.min; i += 1 { +		next = ast.content.compileWith(next) +	} +	return next +} + +type SubexASTCopyRune rune +func (ast SubexASTCopyRune) compileWith(next SubexState) SubexState { +	return SubexCopyRuneState{ +		rune: rune(ast), +		next: next, +	} +} + +type SubexASTCopyAny struct {} +func (ast SubexASTCopyAny) compileWith(next SubexState) SubexState { +	return SubexCopyAnyState{next} +} +func (ast SubexASTCopyAny) String() string { +	return "." +} + +type SubexASTOutput struct { +	replacement []TransducerOutput +} +func (ast SubexASTOutput) compileWith(next SubexState) SubexState { +	return SubexOutputState{ +		content: ast.replacement, +		next: next, +	} +} diff --git a/main/subexstate.go b/main/subexstate.go new file mode 100644 index 0000000..cc697f0 --- /dev/null +++ b/main/subexstate.go @@ -0,0 +1,123 @@ +package main + +import ( +	"strings" +) + +type SubexState interface { +	eat(store Store, char rune) []SubexBranch +	accepting(store Store) []string +} + +type SubexGroupState struct { +	first, second SubexState +} +func (state SubexGroupState) eat(store Store, char rune) []SubexBranch { +	otherStore := store.clone() +	return append(state.first.eat(store, char), state.second.eat(otherStore, char)...) +} +func (state SubexGroupState) accepting(store Store) []string { +	return append(state.first.accepting(store), state.second.accepting(store)...) +} + +type SubexStoreState struct { +	match RegexState +	slot rune +	next SubexState +	input string +} +func (state SubexStoreState) eat(store Store, char rune) []SubexBranch { +	var nextStates []SubexBranch +	if state.match.accepting() { +		store[state.slot] = state.input +		nextStates = state.next.eat(store, char) +	} +	nextRegexStates := state.match.eat(char) +	for _, regexState := range nextRegexStates { +		nextStates = append(nextStates, SubexBranch { +			state: SubexStoreState { +				match: regexState, +				slot: state.slot, +				next: state.next, +				input: state.input + string(char), +			}, +			output: "", +			store: store.clone(), +		}) +	} +	return nextStates +} +func (state SubexStoreState) accepting(store Store) []string { +	if state.match.accepting() { +		return state.next.accepting(store) +	} +	return nil +} + +type SubexOutputState struct { +	content []TransducerOutput +	next SubexState +} +func (state SubexOutputState) build(store Store) string { +	var builder strings.Builder +	for _, part := range state.content { +		builder.WriteString(part.build(store)) +	} +	return builder.String() +} +func (state SubexOutputState) eat(store Store, char rune) []SubexBranch { +	content := state.build(store) +	nextStates := state.next.eat(store, char) +	for i := range nextStates { +		nextStates[i].output = content + nextStates[i].output +	} +	return nextStates +} +func (state SubexOutputState) accepting(store Store) []string { +	content := state.build(store) +	outputs := state.next.accepting(store) +	for i := range outputs { +		outputs[i] = content + outputs[i] +	} +	return outputs +} + +type SubexNoneState struct {} +func (state SubexNoneState) eat(store Store, char rune) []SubexBranch { +	return nil +} +func (state SubexNoneState) accepting(store Store) []string { +	return []string{""} +} + +type SubexCopyRuneState struct { +	rune rune +	next SubexState +} +func (state SubexCopyRuneState) eat(store Store, char rune) []SubexBranch { +	if char == state.rune { +		return []SubexBranch{{ +			state: state.next, +			output: string(char), +			store: store, +		}} +	} +	return nil +} +func (state SubexCopyRuneState) accepting(store Store) []string { +	return nil +} + +type SubexCopyAnyState struct { +	next SubexState +} +func (state SubexCopyAnyState) eat(store Store, char rune) []SubexBranch { +	return []SubexBranch{{ +		state: state.next, +		output: string(char), +		store: store, +	}} +} +func (state SubexCopyAnyState) accepting(store Store) []string { +	return nil +} | 
