diff options
| -rw-r--r-- | subex/parse.go | 76 | ||||
| -rw-r--r-- | subex/subexast.go | 78 | ||||
| -rw-r--r-- | subex/subexstate.go | 9 | 
3 files changed, 151 insertions, 12 deletions
| diff --git a/subex/parse.go b/subex/parse.go index f2c77bc..24ae082 100644 --- a/subex/parse.go +++ b/subex/parse.go @@ -29,6 +29,68 @@ func parseTerminatorAtomLiteral(termType rune, l *RuneReader) walk.Atom {  	}  } +func charIsDigit(c rune) bool { +	return '0' <= c && c <= '9' +} + +// Parse a positive integer, reads digits 0-9 and stops at the first non-digit +func parseInt(l *RuneReader) (output int) { +	for { +		char := l.next() +		if charIsDigit(char) { +			output = output * 10 + int(char - '0') +		} else { +			break +		} +	} +	l.rewind() +	return output +} + +// Having just read {, read in and parse the range contents +func parseRepeatRange(l *RuneReader) (output []ConvexRange) { +	loop: for { +		var start, end int +		char := l.next() +		l.rewind() +		if char == '-' { +			start = -1 +		} else { +			start = parseInt(l) +		} +		switch l.next() { +			case ',': +				output = append(output, ConvexRange{start, start}) +				continue loop +			case '-': +				char := l.next() +				if charIsDigit(char) { +					l.rewind() +					end = parseInt(l) +				} else { +					l.rewind() +					end = -1 +				} +			case '}': +				output = append(output, ConvexRange{start, start}) +				break loop +			default: +				panic("Invalid character in repeat specifier") +		} +		switch l.next() { +			case ',': +				output = append(output, ConvexRange{start, end}) +				continue loop +			case '}': +				output = append(output, ConvexRange{start, end}) +				break loop +			default: +				panic("Invalid character in repeat specifier") +		} +	} +	return output +} +  func parseReplacement(l *RuneReader) (output []OutputContent) {  	// TODO escaping  	loop: for { @@ -144,7 +206,7 @@ func parseSubex(l *RuneReader, minPower int) SubexAST {  		case '[':  			rangeParts := parseRangeSubex(l)  			lhs = SubexASTRange {rangeParts} -		case ')', '*', '-', '|', '!', '?', ';': +		case ')', '*', '-', '|', '!', '?', ';', '{':  			l.rewind()  			return nil  		case '$': @@ -180,6 +242,11 @@ func parseSubex(l *RuneReader, minPower int) SubexAST {  		}  		r := l.next()  		switch { +			case r == '{' && minPower <= 8: +				lhs = SubexASTRepeat{ +					content: lhs, +					acceptable: parseRepeatRange(l), +				}  			case r == '*' && minPower <= 8:  				lhs = SubexASTMaximise{lhs}  			case r == '-' && minPower <= 8: @@ -203,6 +270,13 @@ func parseSubex(l *RuneReader, minPower int) SubexAST {  					content: lhs,  					delimiter: rhs,  				} +			//case r == '+' && minPower <= 6: +			//	rhs := parseSubex(l, 7) +			//	if rhs == nil { +			//		panic("Missing subex after +") +			//	} +			//	// TODO: Implement this. Runs subex on the left, then subex on the right, then sums the outputs of each and outputs that +			//	lhs = SubexASTAdd{lhs, rhs}  			default:  				l.rewind()  				break loop diff --git a/subex/subexast.go b/subex/subexast.go index 0c5c676..650f038 100644 --- a/subex/subexast.go +++ b/subex/subexast.go @@ -80,22 +80,78 @@ func (ast SubexASTMinimise) String() string {  	return fmt.Sprintf("(%v)-", ast.content)  } -// Run the subex as many times as possible but at least min times and at most max times +type ConvexRange struct { +	start, end int +} +func (cr ConvexRange) minmax() (int, int) { +		if cr.start == -1 { +			return cr.end, -1 +		} else if cr.end == -1 { +			return cr.start, -1 +		} else if cr.start < cr.end { +			return cr.start, cr.end +		} else { +			return cr.end, cr.start +		} +} +func (cr ConvexRange) decrement() ConvexRange { +	if cr.start == -1 { +		return ConvexRange{-1, cr.end - 1} +	} else if cr.end == -1 { +		return ConvexRange{cr.start - 1, -1} +	} else { +		return ConvexRange{cr.start - 1, cr.end - 1} +	} +} +func (cr ConvexRange) compile(content SubexAST, next SubexState) SubexState { +	min, _ := cr.minmax() +	if min != 0 { +		return content.compileWith(cr.decrement().compile(content, next)) +	} +	if cr.start == -1 { +		state := &SubexGroupState {nil, next} +		state.first = content.compileWith(state) +		return state +	} +	if cr.end == -1 { +		state := &SubexGroupState {next, nil} +		state.second = content.compileWith(state) +		return state +	} + +	if cr.end == 0 { +		state := next; +		for i := 0; i < cr.start; i += 1 { +			state = &SubexGroupState { +				content.compileWith(state), +				next, +			} +		} +		return state +	} else { +		state := next; +		for i := 0; i < cr.end; i += 1 { +			state = &SubexGroupState { +				next, +				content.compileWith(state), +			} +		} +		return state +	} +} + +// Try to run the subex a number of times that is one of the numbers in the acceptable range +// Prioritising the left  type SubexASTRepeat struct {  	content SubexAST -	min, max int +	acceptable []ConvexRange  }  func (ast SubexASTRepeat) compileWith(next SubexState) SubexState { -	for i := ast.min; i < ast.max; i += 1 { -		next = &SubexGroupState { -			ast.content.compileWith(next), -			next, -		} +	var state SubexState = &SubexDeadState{} +	for _, convex := range ast.acceptable { +		state = SubexGroupState {state, convex.compile(ast.content, next)}  	} -	for i := 0; i < ast.min; i += 1 { -		next = ast.content.compileWith(next) -	} -	return next +	return state  }  // Read in a single specific Atom and output it unchanged diff --git a/subex/subexstate.go b/subex/subexstate.go index 6318376..3c554a2 100644 --- a/subex/subexstate.go +++ b/subex/subexstate.go @@ -123,6 +123,15 @@ func (state SubexNoneState) accepting(store Store) [][]walk.Atom {  	return [][]walk.Atom{nil}  } +// A dead end state, handy for making internals work nicer but technically redundant +type SubexDeadState struct {} +func (state SubexDeadState) eat(store Store, char walk.Atom) []SubexBranch { +	return nil +} +func (state SubexDeadState) accepting (store Store) [][]walk.Atom { +	return nil +} +  // Read in a specific Atom and output it  type SubexCopyAtomState struct {  	atom walk.Atom | 
