diff options
Diffstat (limited to 'subex')
| -rw-r--r-- | subex/main.go | 1 | ||||
| -rw-r--r-- | subex/main_test.go | 56 | ||||
| -rw-r--r-- | subex/parse.go | 175 | ||||
| -rw-r--r-- | subex/subexast.go | 145 | 
4 files changed, 278 insertions, 99 deletions
| diff --git a/subex/main.go b/subex/main.go index 86a8d41..f8d9093 100644 --- a/subex/main.go +++ b/subex/main.go @@ -276,7 +276,6 @@ func processInput(states []SubexEatBranch, input walk.Edible, nesting int) []Sub  	newStates := make([]SubexEatBranch, 0, 2)  	for _, state := range states { -		// TODO: What if nesting is changed by an epsilon state?  		if state.aux.nesting == nesting {  			newStates = addStates(newStates, state.eat(input))  		} else if state.aux.nesting < nesting { diff --git a/subex/main_test.go b/subex/main_test.go index 78a62c4..d7424b3 100644 --- a/subex/main_test.go +++ b/subex/main_test.go @@ -61,6 +61,15 @@ func TestSubexMain(t *testing.T) {  			},  		},  		{ +			subex: `~(.)~`, +			input: []walk.Value { +				walk.StringValue("a"), +			}, +			expected: []walk.Value { +				walk.StringValue("a"), +			}, +		}, +		{  			subex: `~(.$_(.{-0}))~`,  			input: []walk.Value {  				walk.StringValue("hello"), @@ -182,9 +191,54 @@ func TestSubexMain(t *testing.T) {  				},  			},  		}, +		{ +			subex: "-(`0`.)@", +			input: []walk.Value { +				walk.NumberValue(4), +			}, +			expected: []walk.Value { +				walk.ArrayValue { +					{ +						Index: 0, +						Value: walk.NumberValue(4), +					}, +				}, +			}, +		}, +		{ +			subex: `@(.$_~(.{-0})-{-0})~`, +			input: []walk.Value { +				walk.ArrayValue { +					{ +						Index: 0, +						Value: walk.StringValue("ab"), +					}, +					{ +						Index: 1, +						Value: walk.StringValue("cd"), +					}, +					{ +						Index: 2, +						Value: walk.StringValue("efg"), +					}, +					{ +						Index: 3, +						Value: walk.StringValue(""), +					}, +					{ +						Index: 4, +						Value: walk.StringValue("hijklm"), +					}, +				}, +			}, +			expected: []walk.Value { +				walk.StringValue("abcdefghijklm"), +			}, +		},  	} -	for _, test := range tests { +	for i, test := range tests { +		t.Logf("Running test: %d", i)  		lexer := NewStringRuneReader(test.subex)  		ast := Parse(lexer)  		transducer := CompileTransducer(ast) diff --git a/subex/parse.go b/subex/parse.go index 9602a4b..f1565f5 100644 --- a/subex/parse.go +++ b/subex/parse.go @@ -8,10 +8,44 @@ import (  type Type int  const ( -	ValueType Type = iota +	AnyType Type = iota +	ValueType  	RuneType  ) +func resolveTypes(t1 Type, t2 Type) Type { +	if t1 == AnyType { +		return t2 +	} + +	if t2 == AnyType { +		return t1 +	} + +	if t1 == t2 { +		return t1 +	} + +	panic("Types don't match in parser") +} + +type Structure int +const ( +	NoneStructure Structure = iota +	StringStructure +	ArrayStructure +) +func (s Structure) innerType() Type { +	switch s { +	case StringStructure: +		return RuneType +	case ArrayStructure: +		return ValueType +	default: +		panic("Invalid structure") +	} +} +  type RuneReader interface {  	Next() rune  	Rewind() @@ -270,48 +304,94 @@ func parseRuneReplacement(l RuneReader) (output []OutputRuneAST) {  // 	return parts  // } -func parseSubex(l RuneReader, minPower int, inType Type, outType Type) SubexAST { -	var lhs SubexAST +func parseDestructure(l RuneReader, destructure Structure, inType Type) (lhs SubexAST, outType Type) { +	if !accept(l, "(") { +		panic("Missing ( after destructure start") +	} + +	var innerInType Type +	var expectedInType Type +	switch destructure { +	case NoneStructure: +		innerInType = inType +		expectedInType = inType +	case StringStructure: +		innerInType = RuneType +		expectedInType = ValueType +	case ArrayStructure: +		innerInType = ValueType +		expectedInType = ValueType +	default: +		panic("Invalid structure") +	} + +	resolveTypes(inType, expectedInType) + +	lhs, innerOutType := parseSubex(l, 0, innerInType) +	if !accept(l, ")") { +		panic("Missing matching )") +	} + +	var structure Structure +	var expectedInnerOutType Type +	r := l.Next() +	switch r { +	case '-': +		structure = NoneStructure +		expectedInnerOutType = innerOutType +	case '~': +		structure = StringStructure +		expectedInnerOutType = RuneType +	case '@': +		structure = ArrayStructure +		expectedInnerOutType = ValueType +	default: +		panic("Missing matching destructure") +	} + +	innerOutType = resolveTypes(innerOutType, expectedInnerOutType) + +	switch structure { +	case NoneStructure: +		outType = innerOutType +	case StringStructure: +		outType = ValueType +	case ArrayStructure: +		outType = ValueType +	} + +	lhs = SubexASTDestructure { +		Destructure: destructure, +		Structure: structure, +		Content: lhs, +	} + +	return lhs, outType +} + +func parseSubex(l RuneReader, minPower int, inType Type) (lhs SubexAST, outType Type) {  	r := l.Next()  	switch r {  		case eof: -			return nil +			return nil, inType  		case '(': -			lhs = parseSubex(l, 0, inType, outType) +			lhs, outType = parseSubex(l, 0, inType)  			if !accept(l, ")") {  				panic("Missing matching )")  			} +		case '-': +			lhs, outType = parseDestructure(l, NoneStructure, inType)  		case '~': -			if !accept(l, "(") { -				panic("Missing ( after ~") -			} -			lhs = parseSubex(l, 0, RuneType, RuneType) -			if !accept(l, ")") { -				panic("Missing matching )") -			} -			if !accept(l, "~") { -				panic("Missing matching ~") -			} -			lhs = SubexASTEnterString {lhs} +			lhs, outType = parseDestructure(l, StringStructure, inType)  		case '@': -			if !accept(l, "(") { -				panic("Missing ( after @") -			} -			lhs = parseSubex(l, 0, ValueType, ValueType) -			if !accept(l, ")") { -				panic("Missing matching )") -			} -			if !accept(l, "@") { -				panic("Missing matching ~") -			} -			lhs = SubexASTEnterArray {lhs} +			lhs, outType = parseDestructure(l, ArrayStructure, inType)  		// TODO  		// case '[':  		// 	rangeParts := parseRangeSubex(l)  		// 	lhs = SubexASTRange {rangeParts} -		case ')', ']', '"', '|', ';', '{', '+', '-', '*', '/', '!', '=', '$': +		case ')', ']', '"', '|', ';', '{', '+', '*', '/', '!', '=', '$':  			l.Rewind() -			return SubexASTEmpty{} +			return SubexASTEmpty{}, inType  		// case '=':  		// 	replacement := parseReplacement(l)  		// 	lhs = SubexASTOutput{replacement} @@ -327,19 +407,20 @@ func parseSubex(l RuneReader, minPower int, inType Type, outType Type) SubexAST  		// 	)  		// 	lhs = SubexASTOutput {replacement}  		case '.': -			if inType != outType { -				panic("Copying value changes type!") -			} +			outType = inType  			if inType == RuneType {  				lhs = SubexASTCopyAnyRune{}  			} else {  				lhs = SubexASTCopyAnyValue{}  			}  		case '?': +			outType = inType  			lhs = SubexASTCopyBool{}  		case '%': +			outType = inType  			lhs = SubexASTCopyNumber{}  		case '`': +			outType = inType  			lhs = SubexASTOutputValues {parseValueReplacement(l)}  		// TODO  		// case '_': @@ -351,9 +432,7 @@ func parseSubex(l RuneReader, minPower int, inType Type, outType Type) SubexAST  		// case '"':  		// 	lhs = SubexASTCopyScalar {walk.NewAtomStringTerminal()}  		default: -			if inType != outType { -				panic("inType and outType don't match in copy") -			} +			outType = inType  			if inType == RuneType {  				lhs = SubexASTCopyRune {r}  			} else { @@ -367,8 +446,9 @@ func parseSubex(l RuneReader, minPower int, inType Type, outType Type) SubexAST  	}  	loop: for {  		if minPower <= 20 { -			next := parseSubex(l, 21, inType, outType) +			next, outType2 := parseSubex(l, 21, inType)  			if next != nil && (next != SubexASTEmpty{}) { +				outType = resolveTypes(outType, outType2)  				lhs = SubexASTConcat{lhs, next}  				continue loop  			} @@ -382,14 +462,18 @@ func parseSubex(l RuneReader, minPower int, inType Type, outType Type) SubexAST  				}  			case r == '+' && minPower <= 4:  				lhs = SubexASTSum {lhs} +				resolveTypes(inType, ValueType) +				outType = resolveTypes(outType, ValueType)  			case r == '*' && minPower <= 4:  				lhs = SubexASTProduct {lhs} -			case r == '-' && minPower <= 4: -				lhs = SubexASTNegate {lhs} +				resolveTypes(inType, ValueType) +				outType = resolveTypes(outType, ValueType)  			// case r == '/' && minPower <= 4:  				// lhs = SubexASTReciprocal {lhs}  			case r == '!' && minPower <= 4:  				lhs = SubexASTNot {lhs} +				resolveTypes(inType, ValueType) +				outType = resolveTypes(outType, ValueType)  			// case r == '=' && minPower <= 4:  				// lhs = SubexASTEqual {lhs}  			case r == '$' && minPower <= 4: @@ -398,15 +482,21 @@ func parseSubex(l RuneReader, minPower int, inType Type, outType Type) SubexAST  					panic("Missing slot character")  				}  				if slot == '_' { -					lhs = SubexASTDiscard {lhs} +					lhs = SubexASTDiscard { +						Content: lhs, +						InnerOutType: outType, +					}  				} else { +					resolveTypes(inType, ValueType)  					lhs = SubexASTStoreValues {  						Match: lhs,  						Slot: slot,  					}  				} +				outType = AnyType  			case r == '|' && minPower <= 8: -				rhs := parseSubex(l, 9, inType, outType) +				rhs, outType2 := parseSubex(l, 9, inType) +				outType = resolveTypes(outType, outType2)  				if rhs == nil {  					panic("Missing subex after |")  				} @@ -425,11 +515,12 @@ func parseSubex(l RuneReader, minPower int, inType Type, outType Type) SubexAST  				break loop  		}  	} -	return lhs +	return lhs, outType  }  func Parse(l RuneReader) SubexAST { -	ast := parseSubex(l, 0, ValueType, ValueType) +	ast, outType := parseSubex(l, 0, ValueType) +	outType = resolveTypes(outType, ValueType)  	if ast == nil {  		return SubexASTEmpty{}  	} diff --git a/subex/subexast.go b/subex/subexast.go index cef853b..7070baf 100644 --- a/subex/subexast.go +++ b/subex/subexast.go @@ -132,9 +132,6 @@ type SubexASTRepeat struct {  	Acceptable []ConvexRange  }  func (ast SubexASTRepeat) compileWith(next SubexState, slotMap *SlotMap, inType Type, outType Type) SubexState { -	if inType != outType { -		panic("Invalid types") -	}  	var state SubexState = &SubexDeadState{}  	for _, convex := range ast.Acceptable {  		state = &SubexGroupState {state, convex.compile(ast.Content, next, slotMap, inType, outType)} @@ -223,7 +220,8 @@ func (ast SubexASTCopyNumber) String() string {  type SubexASTCopyAnyValue struct {}  func (ast SubexASTCopyAnyValue) compileWith(next SubexState, slotMap *SlotMap, inType Type, outType Type) SubexState {  	if inType != ValueType || outType != ValueType { -		panic("Invalid types for SubexASTNot") +		fmt.Printf("%v, %v", inType, outType) +		panic("Invalid types for SubexASTCopyAnyValue")  	}  	return &SubexCopyState {  		next: next, @@ -446,9 +444,10 @@ func (ast SubexASTEmpty) String() string {  // Discards the output from the content subex  type SubexASTDiscard struct {  	Content SubexAST +	InnerOutType Type  }  func (ast SubexASTDiscard) compileWith(next SubexState, slotMap *SlotMap, inType Type, outType Type) SubexState { -	newNext := ast.Content.compileWith(&SubexDiscardState {next}, slotMap, inType, outType) +	newNext := ast.Content.compileWith(&SubexDiscardState {next}, slotMap, inType, ast.InnerOutType)  	if inType == ValueType {  		return &SubexCaptureBeginState {  			next: newNext, @@ -463,65 +462,101 @@ func (ast SubexASTDiscard) String() string {  	return fmt.Sprintf("(%v)$_", ast.Content)  } -// Go into an array, pass the content each of the values in the array to eat and then leave the array -type SubexASTEnterArray struct { +type SubexASTDestructure struct { +	Destructure Structure +	Structure Structure  	Content SubexAST  } -func (ast SubexASTEnterArray) compileWith(next SubexState, slotMap *SlotMap, inType Type, outType Type) SubexState { -	if inType != ValueType || outType != ValueType { -		panic("Invalid types for SubexASTEnterArray") +func (ast SubexASTDestructure) compileWith(next SubexState, slotMap *SlotMap, inType Type, outType Type) SubexState { +	var innerOutType Type +	var construct SubexState +	switch ast.Structure { +	case NoneStructure: +		innerOutType = outType +		construct = next +	case StringStructure: +		innerOutType = RuneType +		construct = &SubexConstructStringState { +			next: next, +		} +	case ArrayStructure: +		innerOutType = ValueType +		construct = &SubexConstructArrayState { +			next: next, +		}  	} -	return &SubexCaptureBeginState { -		next: &SubexCopyState { -			filter: anyArrayFilter{}, -			next: &SubexDiscardState { -				next: &SubexIncrementNestState { -					next: &SubexCaptureBeginState { -						next: ast.Content.compileWith( -							&SubexDiscardTerminalState { -								terminal: walk.ArrayEnd, -								next: &SubexDecrementNestState { -									next: &SubexConstructArrayState {next: next}, -								}, -							}, -							slotMap, -							ValueType, -							ValueType, -						), -					}, -				}, + +	var innerInType Type +	var destructFooter SubexState +	switch ast.Destructure { +	case NoneStructure: +		innerInType = inType +		destructFooter = construct +	case StringStructure: +		innerInType = RuneType +		destructFooter = &SubexDiscardTerminalState { +			terminal: walk.StringEnd, +			next: &SubexDecrementNestState { +				next: construct, +			}, +		} +	case ArrayStructure: +		innerInType = ValueType +		destructFooter = &SubexDiscardTerminalState { +			terminal: walk.ArrayEnd, +			next: &SubexDecrementNestState { +				next: construct,  			}, -		}, +		}  	} -} -type SubexASTEnterString struct { -	Content SubexAST -} -func (ast SubexASTEnterString) compileWith(next SubexState, slotMap *SlotMap, inType Type, outType Type) SubexState { -	if inType != ValueType || outType != ValueType { -		panic("Invalid types for SubexASTEnterString") +	inner := ast.Content.compileWith( +		destructFooter, +		slotMap, +		innerInType, +		innerOutType, +	) + +	var beginConstruct SubexState +	switch ast.Structure { +	case NoneStructure: +		beginConstruct = inner +	case StringStructure: +		beginConstruct = &SubexCaptureRunesBeginState { +			next: inner, +		} +	case ArrayStructure: +		beginConstruct = &SubexCaptureBeginState { +			next: inner, +		}  	} -	return &SubexCaptureBeginState { -		next: &SubexCopyState { -			filter: anyStringFilter{}, -			next: &SubexDiscardState { -				next: &SubexIncrementNestState { -					next: &SubexCaptureRunesBeginState { -						next: ast.Content.compileWith( -							&SubexDiscardTerminalState { -								terminal: walk.StringEnd, -								next: &SubexDecrementNestState { -									next: &SubexConstructStringState {next: next}, -								}, -							}, -							slotMap, -							RuneType, -							RuneType, -						), + +	switch ast.Destructure { +	case NoneStructure: +		return beginConstruct +	case StringStructure: +		return &SubexCaptureBeginState { +			next: &SubexCopyState { +				filter: anyStringFilter{}, +				next: &SubexDiscardState { +					next: &SubexIncrementNestState { +						next: beginConstruct, +					}, +				}, +			}, +		} +	case ArrayStructure: +		return &SubexCaptureBeginState { +			next: &SubexCopyState { +				filter: anyArrayFilter{}, +				next: &SubexDiscardState { +					next: &SubexIncrementNestState { +						next: beginConstruct,  					},  				},  			}, -		}, +		} +	default: +		panic("Invalid destructure in ast")  	}  } | 
