diff options
Diffstat (limited to 'subex/parse.go')
| -rw-r--r-- | subex/parse.go | 661 | 
1 files changed, 538 insertions, 123 deletions
| diff --git a/subex/parse.go b/subex/parse.go index e91008a..179cc01 100644 --- a/subex/parse.go +++ b/subex/parse.go @@ -1,7 +1,6 @@  package subex  import ( -	"fmt"  	"main/walk"  	"strconv"  	"strings" @@ -64,6 +63,7 @@ const (  type RuneReader interface {  	Next() rune  	Rewind() +	RewindRune(r rune)  }  func accept(l RuneReader, chars string) bool { @@ -122,7 +122,6 @@ func parseScalarLiteral(l RuneReader) (walk.Scalar, bool) {  				panic("Invalid literal")  			}  		default: -			fmt.Printf("%c\n", r)  			panic("Invalid literal")  	}  } @@ -145,6 +144,72 @@ func parseInt(l RuneReader) (output int) {  	return output  } +func parseNumberFilter(l RuneReader, minPower int) SubexASTNumberFilter { +	var lhs SubexASTNumberFilter +	r := l.Next() +	switch r { +	case eof: +		panic("Missing matching ]") +	case 'c': +		count := parseInt(l) +		lhs = SubexASTNumberFilterCount {count} +	case 'p': +		var subset NumberSubset +		if l.Next() == 'i' { +			subset = NumberSubsetPositiveInteger +		} else { +			subset = NumberSubsetPositiveReal +			l.Rewind() +		} +		lhs = SubexASTNumberFilterSubset { +			subset: subset, +		} +	default: +		if !isNumericRune(r) { +			panic("Invalid character in numeric []") +		} + +		var builder strings.Builder +		builder.WriteRune(r) +		for { +			r := l.Next() +			if !isNumericRune(r) { +				l.Rewind() +				break +			} +			builder.WriteRune(r) +		} +		numberString := builder.String() +		number, err := strconv.ParseFloat(numberString, 64) +		if err != nil { +			panic("Invalid number literal") +		} + +		lhs = SubexASTNumberFilterLiteral {number} +	} + +	loop: for { +		r := l.Next() +		switch { +		case r == '+' && minPower <= 10: +			lhs = SubexASTNumberFilterAdd { +				lhs: lhs, +				rhs: parseNumberFilter(l, 11), +			} +		case r == '*' && minPower <= 20: +			lhs = SubexASTNumberFilterMultiply { +				lhs: lhs, +				rhs: parseNumberFilter(l, 21), +			} +		default: +			l.Rewind() +			break loop +		} +	} + +	return lhs +} +  // Having just read {, read in and parse the range contents  func parseRepeatRange(l RuneReader) (output []ConvexRange) {  	loop: for { @@ -189,7 +254,7 @@ func parseRepeatRange(l RuneReader) (output []ConvexRange) {  	return output  } -func parseValueReplacement(l RuneReader, end rune) (output SubexAST) { +func parseValueReplacementOLD(l RuneReader, end rune) (output SubexAST) {  	output = SubexASTEmpty{}  	// TODO escaping  	// TODO add arrays, maps and strings @@ -222,7 +287,7 @@ func parseValueReplacement(l RuneReader, end rune) (output SubexAST) {  				Second: SubexASTDestructure {  					Destructure: NoneStructure,  					Structure: MapStructure, -					Content: parseValueReplacement(l, ')'), +					Content: parseValueReplacementOLD(l, ')'),  				},  			}  			if !accept(l, "#") { @@ -264,7 +329,7 @@ func parseRuneReplacement(l RuneReader, end rune) (output SubexAST) {  			panic("Missing closing `")  		case end:  			break loop -		case '$': +		case '<':  			slot := l.Next()  			if slot == eof {  				panic("Missing slot character") @@ -287,6 +352,126 @@ func parseRuneReplacement(l RuneReader, end rune) (output SubexAST) {  	return output  } +func parseValueReplacement(l RuneReader, end rune, minPower int) SubexAST { +	// TODO: escaping probably +	var lhs SubexAST +	r := l.Next() +	switch r { +	case eof: +		panic("Missing closing `") +	case end: +		l.Rewind() +		return SubexASTEmpty{} +	case 'n': +		if !accept(l, "u") { +			panic("Expected null") +		} +		if !accept(l, "l") { +			panic("Expected null") +		} +		if !accept(l, "l") { +			panic("Expected null") +		} +		lhs = SubexASTOutputValueLiteral { +			literal: walk.NullValue{}, +		} +	// TODO: everything except numbers, strings, maps, and null +	case '"': +		lhs = SubexASTDestructure { +			Destructure: NoneStructure, +			Structure: StringStructure, +			Content: parseRuneReplacement(l, '"'), +		} +	case '#': +		if !accept(l, "(") { +			panic("Missing ( after #") +		} +		lhs = SubexASTDestructure { +			Destructure: NoneStructure, +			Structure: MapStructure, +			Content: parseValueReplacement(l, ')', 0), +		} +		if !accept(l, ")") { +			panic("Missing closing )") +		} +		if !accept(l, "#") { +			panic("Missing # after )") +		} +	case '<': +		slot := l.Next() +		if slot == eof { +			panic("Missing slot character") +		} +		lhs = SubexASTOutputValueLoad { +			slot: slot, +		} +	default: +		if !isNumericRune(r) { +			panic("Invalid character in numeric") +		} + +		var builder strings.Builder +		builder.WriteRune(r) +		for { +			r := l.Next() +			if !isNumericRune(r) { +				l.Rewind() +				break +			} +			builder.WriteRune(r) +		} +		numberString := builder.String() +		number, err := strconv.ParseFloat(numberString, 64) +		if err != nil { +			panic("Invalid number literal") +		} + +		lhs = SubexASTOutputValueLiteral { +			literal: walk.NumberValue(number), +		} +	} + +	loop: for { +		r := l.Next() +		switch { +		case r == eof: +			panic("Missing closing `") +		case r == '+' && minPower <= 10: +			lhs = SubexASTBinop { +				op: binopAdd, +				lhs: lhs, +				rhs: parseValueReplacement(l, end, 11), +			} +		case r == '*' && minPower <= 20: +			lhs = SubexASTBinop { +				op: binopMultiply, +				lhs: lhs, +				rhs: parseValueReplacement(l, end, 21), +			} +		case r == '/' && minPower <= 20: +			lhs = SubexASTBinop { +				op: binopDivide, +				lhs: lhs, +				rhs: parseValueReplacement(l, end, 21), +			} +		case r == end: +			l.Rewind() +			break loop +		case minPower <= 2: +			l.Rewind() +			lhs = SubexASTConcat { +				First: lhs, +				Second: parseValueReplacement(l, end, 3), +			} +		default: +			l.Rewind() +			break loop +		} +	} + +	return lhs +} +  // Parse the contents of a range subex [] into a map  // func parseRangeSubex(l RuneReader) map[walk.AtomOLD]walk.AtomOLD {  // 	// TODO escaping @@ -471,165 +656,395 @@ func parseSubex(l RuneReader, minPower int, inType Type) (lhs SubexAST, outType  	start:  	r := l.Next()  	switch r { -		case eof: -			return nil, inType -		case '(': -			lhs, outType = parseSubex(l, 0, inType) -			if !accept(l, ")") { -				panic("Missing matching )") -			} -		case '-': -			lhs, outType = parseDestructure(l, NoneStructure, inType) -		case '~': -			lhs, outType = parseDestructure(l, StringStructure, inType) -		case '@': -			lhs, outType = parseDestructure(l, ArrayStructure, inType) -		case ':': -			lhs, outType = parseDestructure(l, ArrayValuesStructure, inType) -		case '#': -			lhs, outType = parseDestructure(l, MapStructure, inType) -		case '"': -			if inType == ValueType { -				var innerOutType Type -				lhs, innerOutType = parseSubex(l, 0, RuneType) -				if !accept(l, "\"") { -					panic("Missing matching \"") -				} -				resolveTypes(innerOutType, RuneType) -				lhs = SubexASTDestructure { -					Destructure: StringStructure, -					Structure: StringStructure, -					Content: lhs, -				} -				outType = ValueType -			} else { -				l.Rewind() -				return SubexASTEmpty{}, inType +	case eof: +		return nil, inType +	case '(': +		lhs, outType = parseSubex(l, 0, inType) +		if !accept(l, ")") { +			panic("Missing matching )") +		} +	case '-': +		lhs, outType = parseDestructure(l, NoneStructure, inType) +	case '~': +		lhs, outType = parseDestructure(l, StringStructure, inType) +	case '@': +		lhs, outType = parseDestructure(l, ArrayStructure, inType) +	case ':': +		lhs, outType = parseDestructure(l, ArrayValuesStructure, inType) +	case '#': +		lhs, outType = parseDestructure(l, MapStructure, inType) +	case '"': +		switch inType { +		case ValueType: +			var innerOutType Type +			lhs, innerOutType = parseSubex(l, 0, RuneType) +			if !accept(l, "\"") { +				panic("Missing matching \"") +			} +			resolveTypes(innerOutType, RuneType) +			lhs = SubexASTDestructure { +				Destructure: StringStructure, +				Structure: StringStructure, +				Content: lhs,  			} -		// TODO -		// case '[': -		// 	rangeParts := parseRangeSubex(l) -		// 	lhs = SubexASTRange {rangeParts} -		case ')', ']', '|', ';', '{', '+', '*', '/', '!', '=', '$': +			outType = ValueType +		// RuneType +		default:  			l.Rewind()  			return SubexASTEmpty{}, inType -		case '.': -			outType = inType -			if inType == RuneType { -				lhs = SubexASTCopyAnyRune{} -			} else { -				lhs = SubexASTCopyAnyValue{} -			} -		case ',': +		} +	case '<': +		slot := l.Next() +		switch slot { +		case eof: +			panic("Missing slot") +		case '>': +			panic("Parsing error. Tried to parse <> as a subex with nothing before it") +		default:  			switch inType {  			case ValueType: -				outType = inType -				lhs = SubexASTCopyAnySimpleValue{} +				lhs = SubexASTOutputValueLoad { +					slot: slot, +				}  			case RuneType: -				outType = inType -				lhs = SubexASTCopyRune{','} +				lhs = SubexASTOutputRuneLoad { +					slot: slot, +				}  			default:  				panic("Invalid inType")  			} -		case '?': +		} +	case '[': +		switch inType { +		case ValueType: +			lhs = SubexASTCopyNumberFilter { +				filter: parseNumberFilter(l, 0), +			} +			if !accept(l, "]") { +				panic("Missing matching ]") +			} +		default: +			// TODO: other types +			panic("[] is only valid for values currently") +		} +	case ')', ']', '|', '{', '+', '*': +		l.Rewind() +		return SubexASTEmpty{}, inType +	case '.': +		outType = inType +		if inType == RuneType { +			lhs = SubexASTCopyAnyRune{} +		} else { +			lhs = SubexASTCopyAnyValue{} +		} +	case ',': +		switch inType { +		case ValueType:  			outType = inType -			lhs = SubexASTCopyBool{} -		case '%': +			lhs = SubexASTCopyAnySimpleValue{} +		case RuneType:  			outType = inType -			lhs = SubexASTCopyNumber{} -		case '`': +			lhs = SubexASTCopyRune{','} +		default: +			panic("Invalid inType") +		} +	case 'r': +		switch inType { +		case ValueType:  			outType = inType -			switch inType { -			case ValueType: -				lhs = parseValueReplacement(l, '`') -			case RuneType: -				lhs = parseRuneReplacement(l, '`') -			default: -				panic("Invalid inType") +			lhs = SubexASTCopyNumberFilter { +				filter: SubexASTNumberFilterSubset { +					subset: NumberSubsetReal, +				},  			} -		case ' ': -			if inType == RuneType { -				outType = RuneType -				lhs = SubexASTCopyRune {' '} -			} else { -				goto start +		case RuneType: +			outType = inType +			lhs = SubexASTCopyRune {'r'} +		default: +			panic("Invalid inType") +		} +	case '?': +		outType = inType +		lhs = SubexASTCopyBool{} +	case '`': +		outType = inType +		switch inType { +		case ValueType: +			lhs = parseValueReplacement(l, '`', 0) +			if !accept(l, "`") { +				panic("Missing closing `")  			} +		case RuneType: +			lhs = parseRuneReplacement(l, '`')  		default: -			outType = inType -			if inType == RuneType { -				lhs = SubexASTCopyRune {r} -			} else { -				l.Rewind() -				scalar, ok := parseScalarLiteral(l) -				if !ok { -					panic("Invalid subex") -				} -				lhs = SubexASTCopyScalar {scalar} +			panic("Invalid inType") +		} +	case ' ': +		switch inType { +		case RuneType: +			outType = RuneType +			lhs = SubexASTCopyRune {' '} +		case ValueType: +			goto start +		} +	default: +		outType = inType +		switch inType { +		case RuneType: +			lhs = SubexASTCopyRune {r} +		// ValueType, NumberType +		case ValueType: +			l.Rewind() +			scalar, ok := parseScalarLiteral(l) +			if !ok { +				panic("Invalid subex")  			} +			lhs = SubexASTCopyScalar {scalar} +		}  	}  	loop: for { -		if minPower <= 20 { -			next, outType2 := parseSubex(l, 21, inType) -			// TODO: next might legitimately be SubexASTEmpty, e.g. `` -			if next != nil && (next != SubexASTEmpty{}) { -				outType = resolveTypes(outType, outType2) -				lhs = SubexASTConcat{lhs, next} -				continue loop -			} -		}  		r := l.Next()  		switch { -			case r == '{' && minPower <= 4: -				lhs = SubexASTRepeat { +		case r == eof: +			break loop +		case r == '{' && minPower <= 10: +			lhs = SubexASTRepeat { +				Content: lhs, +				Acceptable: parseRepeatRange(l), +			} +		case r == '+' && minPower <= 10: +			lhs = SubexASTRepeat { +				Content: lhs, +				Acceptable: []ConvexRange {{ +					Start: -1, +					End: 1, +				}}, +			} +		case r == '*' && minPower <= 10: +			lhs = SubexASTRepeat { +				Content: lhs, +				Acceptable: []ConvexRange {{ +					Start: -1, +					End: 0, +				}}, +			} +		case r == '_' && minPower <= 10: +			switch inType { +			case ValueType: +				lhs = SubexASTDiscard {  					Content: lhs, -					Acceptable: parseRepeatRange(l), +					InnerOutType: outType,  				} -			case r == '+' && minPower <= 4: -				lhs = SubexASTSum {lhs} -				resolveTypes(inType, ValueType) -				outType = resolveTypes(outType, ValueType) -			case r == '*' && minPower <= 4: -				lhs = SubexASTProduct {lhs} -				resolveTypes(inType, ValueType) -				outType = resolveTypes(outType, ValueType) -			case r == '!' && minPower <= 4: -				lhs = SubexASTNot {lhs} -				resolveTypes(inType, ValueType) -				outType = resolveTypes(outType, ValueType) -			case r == '$' && minPower <= 4: +				outType = AnyType +			case RuneType: +				// Just a concat +				lhs = SubexASTConcat { +					lhs, +					SubexASTCopyRune { +						rune: '_', +					}, +				} +				outType = AnyType +			default: +				panic("Invalid inType") +			} +		case r == '%' && minPower <= 10: +			slot := l.Next() +			switch slot { +			case eof: +				panic("Missing slot character") +			case '<', '>': +				panic("Invalid character after %") +			case '_': +				panic("Cannot load from _") +			default: +				switch inType { +				case ValueType: +					lhs = SubexASTConcat { +						First: SubexASTStoreValues { +							Match: lhs, +							Slot: slot, +						}, +						Second: SubexASTOutputValueLoad { +							slot: slot, +						}, +					} +				case RuneType: +					lhs = SubexASTConcat { +						First: SubexASTStoreRunes { +							Match: lhs, +							Slot: slot, +						}, +						Second: SubexASTOutputRuneLoad { +							slot: slot, +						}, +					} +				default: +					panic("Invalid inType") +				} +			} +		case r == '>' && minPower <= 10: +			slot := l.Next() +			switch slot { +			case eof: +				panic("Missing slot character") +			case '>':  				slot := l.Next() -				if slot == eof { +				switch slot { +				case eof:  					panic("Missing slot character") -				} -				if slot == '_' { +				case '_':  					lhs = SubexASTDiscard {  						Content: lhs,  						InnerOutType: outType,  					} -				} else { -					if inType == ValueType { -						lhs = SubexASTStoreValues { +					outType = AnyType +				default: +					switch inType { +					case ValueType: +						lhs = SubexASTAppendStoreValues {  							Match: lhs,  							Slot: slot,  						} -					} else { -						lhs = SubexASTStoreRunes { +					case RuneType: +						lhs = SubexASTAppendStoreRunes {  							Match: lhs,  							Slot: slot,  						} +					default: +						panic("Invalid inType")  					} +					outType = AnyType +				} +			case '<': +				slot := l.Next() +				switch slot { +				case eof: +					panic("Missing slot character") +				case '_': +					panic("Cannot load from _ slot") +				default: +					switch inType { +					case ValueType: +						lhs = SubexASTConcat { +							First: SubexASTStoreValues { +								Match: lhs, +								Slot: slot, +							}, +							Second: SubexASTOutputValueLoad { +								slot: slot, +							}, +						} +					case RuneType: +						lhs = SubexASTConcat { +							First: SubexASTStoreRunes { +								Match: lhs, +								Slot: slot, +							}, +							Second: SubexASTOutputRuneLoad { +								slot: slot, +							}, +						} +					default: +						panic("Invalid inType") +					} +					outType = inType +				} +			case '_': +				lhs = SubexASTDiscard { +					Content: lhs, +					InnerOutType: outType,  				}  				outType = AnyType -			case r == '|' && minPower <= 8: -				rhs, outType2 := parseSubex(l, 9, inType) -				outType = resolveTypes(outType, outType2) -				if rhs == nil { -					panic("Missing subex after |") +			default: +				switch inType { +				case ValueType: +					lhs = SubexASTStoreValues { +						Match: lhs, +						Slot: slot, +					} +				case RuneType: +					lhs = SubexASTStoreRunes { +						Match: lhs, +						Slot: slot, +					} +				default: +					panic("Invalid type") +				} +				outType = AnyType +			} +		case r == '<' && minPower <= 6: +			slot := l.Next() +			switch slot { +			case eof: +				panic("Missing slot character") +			case '_': +				panic("Cannot load from _ slot") +			case '>': +				slot := l.Next() +				switch slot { +				case eof: +					panic("Missing slot character") +				case '_': +					panic("Cannot load from _ slot") +				default: +					switch inType { +					case ValueType: +						lhs = SubexASTConcat { +							SubexASTOutputValueLoad { +								slot: slot, +							}, +							SubexASTStoreValues { +								Match: lhs, +								Slot: slot, +							}, +						} +					case RuneType: +						lhs = SubexASTConcat { +							SubexASTOutputRuneLoad { +								slot: slot, +							}, +							SubexASTStoreRunes { +								Match: lhs, +								Slot: slot, +							}, +						} +					default: +						panic("Invalid inType") +					}  				} -				lhs = SubexASTOr{lhs, rhs}  			default: +				// This is just a concat  				l.Rewind() +				l.RewindRune('<') +				next, outType2 := parseSubex(l, 7, inType) +				// TODO: next might legitimately be SubexASTEmpty, e.g. `` +				if next != nil && (next != SubexASTEmpty{}) { +					outType = resolveTypes(outType, outType2) +					lhs = SubexASTConcat{lhs, next} +					continue loop +				} +			} +		case r == '|' && minPower <= 2: +			rhs, outType2 := parseSubex(l, 3, inType) +			outType = resolveTypes(outType, outType2) +			if rhs == nil { +				panic("Missing subex after |") +			} +			lhs = SubexASTOr{lhs, rhs} +		case minPower <= 6: +			l.Rewind() +			next, outType2 := parseSubex(l, 7, inType) +			// TODO: next might legitimately be SubexASTEmpty, e.g. `` +			if next != nil && (next != SubexASTEmpty{}) { +				outType = resolveTypes(outType, outType2) +				lhs = SubexASTConcat{lhs, next} +			} else {  				break loop +			} +		default: +			l.Rewind() +			break loop  		}  	}  	return lhs, outType | 
