From 8e9f0b186745afd51579d2a6136a57705efc7574 Mon Sep 17 00:00:00 2001
From: Charlie Stanton <charlie@shtanton.xyz>
Date: Tue, 18 Apr 2023 12:47:55 +0100
Subject: Adds the repeat construct, obsoleting maximise, minimise, try, maybe
 and probably more

The repeat construct repeats a subex a number of times, this number is based on a provided list which is ordered by priority and can be unbounded.
---
 subex/parse.go      | 76 ++++++++++++++++++++++++++++++++++++++++++++++++++-
 subex/subexast.go   | 78 +++++++++++++++++++++++++++++++++++++++++++++--------
 subex/subexstate.go |  9 +++++++
 3 files changed, 151 insertions(+), 12 deletions(-)

diff --git a/subex/parse.go b/subex/parse.go
index f2c77bc..24ae082 100644
--- a/subex/parse.go
+++ b/subex/parse.go
@@ -29,6 +29,68 @@ func parseTerminatorAtomLiteral(termType rune, l *RuneReader) walk.Atom {
 	}
 }
 
+func charIsDigit(c rune) bool {
+	return '0' <= c && c <= '9'
+}
+
+// Parse a positive integer, reads digits 0-9 and stops at the first non-digit
+func parseInt(l *RuneReader) (output int) {
+	for {
+		char := l.next()
+		if charIsDigit(char) {
+			output = output * 10 + int(char - '0')
+		} else {
+			break
+		}
+	}
+	l.rewind()
+	return output
+}
+
+// Having just read {, read in and parse the range contents
+func parseRepeatRange(l *RuneReader) (output []ConvexRange) {
+	loop: for {
+		var start, end int
+		char := l.next()
+		l.rewind()
+		if char == '-' {
+			start = -1
+		} else {
+			start = parseInt(l)
+		}
+		switch l.next() {
+			case ',':
+				output = append(output, ConvexRange{start, start})
+				continue loop
+			case '-':
+				char := l.next()
+				if charIsDigit(char) {
+					l.rewind()
+					end = parseInt(l)
+				} else {
+					l.rewind()
+					end = -1
+				}
+			case '}':
+				output = append(output, ConvexRange{start, start})
+				break loop
+			default:
+				panic("Invalid character in repeat specifier")
+		}
+		switch l.next() {
+			case ',':
+				output = append(output, ConvexRange{start, end})
+				continue loop
+			case '}':
+				output = append(output, ConvexRange{start, end})
+				break loop
+			default:
+				panic("Invalid character in repeat specifier")
+		}
+	}
+	return output
+}
+
 func parseReplacement(l *RuneReader) (output []OutputContent) {
 	// TODO escaping
 	loop: for {
@@ -144,7 +206,7 @@ func parseSubex(l *RuneReader, minPower int) SubexAST {
 		case '[':
 			rangeParts := parseRangeSubex(l)
 			lhs = SubexASTRange {rangeParts}
-		case ')', '*', '-', '|', '!', '?', ';':
+		case ')', '*', '-', '|', '!', '?', ';', '{':
 			l.rewind()
 			return nil
 		case '$':
@@ -180,6 +242,11 @@ func parseSubex(l *RuneReader, minPower int) SubexAST {
 		}
 		r := l.next()
 		switch {
+			case r == '{' && minPower <= 8:
+				lhs = SubexASTRepeat{
+					content: lhs,
+					acceptable: parseRepeatRange(l),
+				}
 			case r == '*' && minPower <= 8:
 				lhs = SubexASTMaximise{lhs}
 			case r == '-' && minPower <= 8:
@@ -203,6 +270,13 @@ func parseSubex(l *RuneReader, minPower int) SubexAST {
 					content: lhs,
 					delimiter: rhs,
 				}
+			//case r == '+' && minPower <= 6:
+			//	rhs := parseSubex(l, 7)
+			//	if rhs == nil {
+			//		panic("Missing subex after +")
+			//	}
+			//	// TODO: Implement this. Runs subex on the left, then subex on the right, then sums the outputs of each and outputs that
+			//	lhs = SubexASTAdd{lhs, rhs}
 			default:
 				l.rewind()
 				break loop
diff --git a/subex/subexast.go b/subex/subexast.go
index 0c5c676..650f038 100644
--- a/subex/subexast.go
+++ b/subex/subexast.go
@@ -80,22 +80,78 @@ func (ast SubexASTMinimise) String() string {
 	return fmt.Sprintf("(%v)-", ast.content)
 }
 
-// Run the subex as many times as possible but at least min times and at most max times
+type ConvexRange struct {
+	start, end int
+}
+func (cr ConvexRange) minmax() (int, int) {
+		if cr.start == -1 {
+			return cr.end, -1
+		} else if cr.end == -1 {
+			return cr.start, -1
+		} else if cr.start < cr.end {
+			return cr.start, cr.end
+		} else {
+			return cr.end, cr.start
+		}
+}
+func (cr ConvexRange) decrement() ConvexRange {
+	if cr.start == -1 {
+		return ConvexRange{-1, cr.end - 1}
+	} else if cr.end == -1 {
+		return ConvexRange{cr.start - 1, -1}
+	} else {
+		return ConvexRange{cr.start - 1, cr.end - 1}
+	}
+}
+func (cr ConvexRange) compile(content SubexAST, next SubexState) SubexState {
+	min, _ := cr.minmax()
+	if min != 0 {
+		return content.compileWith(cr.decrement().compile(content, next))
+	}
+	if cr.start == -1 {
+		state := &SubexGroupState {nil, next}
+		state.first = content.compileWith(state)
+		return state
+	}
+	if cr.end == -1 {
+		state := &SubexGroupState {next, nil}
+		state.second = content.compileWith(state)
+		return state
+	}
+
+	if cr.end == 0 {
+		state := next;
+		for i := 0; i < cr.start; i += 1 {
+			state = &SubexGroupState {
+				content.compileWith(state),
+				next,
+			}
+		}
+		return state
+	} else {
+		state := next;
+		for i := 0; i < cr.end; i += 1 {
+			state = &SubexGroupState {
+				next,
+				content.compileWith(state),
+			}
+		}
+		return state
+	}
+}
+
+// Try to run the subex a number of times that is one of the numbers in the acceptable range
+// Prioritising the left
 type SubexASTRepeat struct {
 	content SubexAST
-	min, max int
+	acceptable []ConvexRange
 }
 func (ast SubexASTRepeat) compileWith(next SubexState) SubexState {
-	for i := ast.min; i < ast.max; i += 1 {
-		next = &SubexGroupState {
-			ast.content.compileWith(next),
-			next,
-		}
+	var state SubexState = &SubexDeadState{}
+	for _, convex := range ast.acceptable {
+		state = SubexGroupState {state, convex.compile(ast.content, next)}
 	}
-	for i := 0; i < ast.min; i += 1 {
-		next = ast.content.compileWith(next)
-	}
-	return next
+	return state
 }
 
 // Read in a single specific Atom and output it unchanged
diff --git a/subex/subexstate.go b/subex/subexstate.go
index 6318376..3c554a2 100644
--- a/subex/subexstate.go
+++ b/subex/subexstate.go
@@ -123,6 +123,15 @@ func (state SubexNoneState) accepting(store Store) [][]walk.Atom {
 	return [][]walk.Atom{nil}
 }
 
+// A dead end state, handy for making internals work nicer but technically redundant
+type SubexDeadState struct {}
+func (state SubexDeadState) eat(store Store, char walk.Atom) []SubexBranch {
+	return nil
+}
+func (state SubexDeadState) accepting (store Store) [][]walk.Atom {
+	return nil
+}
+
 // Read in a specific Atom and output it
 type SubexCopyAtomState struct {
 	atom walk.Atom
-- 
cgit v1.2.3