<- Back to shtanton's homepage
aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorCharlie Stanton <charlie@shtanton.xyz>2023-04-18 12:47:55 +0100
committerCharlie Stanton <charlie@shtanton.xyz>2023-04-18 12:47:55 +0100
commit8e9f0b186745afd51579d2a6136a57705efc7574 (patch)
tree7ae5fa77518e2e9f4f4f6b87470ae314506daf0b
parentebd4f72fb2d1968ac9c13e5b5e743d265a9df6ca (diff)
downloadstred-go-8e9f0b186745afd51579d2a6136a57705efc7574.tar
Adds the repeat construct, obsoleting maximise, minimise, try, maybe and probably more
The repeat construct repeats a subex a number of times, this number is based on a provided list which is ordered by priority and can be unbounded.
-rw-r--r--subex/parse.go76
-rw-r--r--subex/subexast.go78
-rw-r--r--subex/subexstate.go9
3 files changed, 151 insertions, 12 deletions
diff --git a/subex/parse.go b/subex/parse.go
index f2c77bc..24ae082 100644
--- a/subex/parse.go
+++ b/subex/parse.go
@@ -29,6 +29,68 @@ func parseTerminatorAtomLiteral(termType rune, l *RuneReader) walk.Atom {
}
}
+func charIsDigit(c rune) bool {
+ return '0' <= c && c <= '9'
+}
+
+// Parse a positive integer, reads digits 0-9 and stops at the first non-digit
+func parseInt(l *RuneReader) (output int) {
+ for {
+ char := l.next()
+ if charIsDigit(char) {
+ output = output * 10 + int(char - '0')
+ } else {
+ break
+ }
+ }
+ l.rewind()
+ return output
+}
+
+// Having just read {, read in and parse the range contents
+func parseRepeatRange(l *RuneReader) (output []ConvexRange) {
+ loop: for {
+ var start, end int
+ char := l.next()
+ l.rewind()
+ if char == '-' {
+ start = -1
+ } else {
+ start = parseInt(l)
+ }
+ switch l.next() {
+ case ',':
+ output = append(output, ConvexRange{start, start})
+ continue loop
+ case '-':
+ char := l.next()
+ if charIsDigit(char) {
+ l.rewind()
+ end = parseInt(l)
+ } else {
+ l.rewind()
+ end = -1
+ }
+ case '}':
+ output = append(output, ConvexRange{start, start})
+ break loop
+ default:
+ panic("Invalid character in repeat specifier")
+ }
+ switch l.next() {
+ case ',':
+ output = append(output, ConvexRange{start, end})
+ continue loop
+ case '}':
+ output = append(output, ConvexRange{start, end})
+ break loop
+ default:
+ panic("Invalid character in repeat specifier")
+ }
+ }
+ return output
+}
+
func parseReplacement(l *RuneReader) (output []OutputContent) {
// TODO escaping
loop: for {
@@ -144,7 +206,7 @@ func parseSubex(l *RuneReader, minPower int) SubexAST {
case '[':
rangeParts := parseRangeSubex(l)
lhs = SubexASTRange {rangeParts}
- case ')', '*', '-', '|', '!', '?', ';':
+ case ')', '*', '-', '|', '!', '?', ';', '{':
l.rewind()
return nil
case '$':
@@ -180,6 +242,11 @@ func parseSubex(l *RuneReader, minPower int) SubexAST {
}
r := l.next()
switch {
+ case r == '{' && minPower <= 8:
+ lhs = SubexASTRepeat{
+ content: lhs,
+ acceptable: parseRepeatRange(l),
+ }
case r == '*' && minPower <= 8:
lhs = SubexASTMaximise{lhs}
case r == '-' && minPower <= 8:
@@ -203,6 +270,13 @@ func parseSubex(l *RuneReader, minPower int) SubexAST {
content: lhs,
delimiter: rhs,
}
+ //case r == '+' && minPower <= 6:
+ // rhs := parseSubex(l, 7)
+ // if rhs == nil {
+ // panic("Missing subex after +")
+ // }
+ // // TODO: Implement this. Runs subex on the left, then subex on the right, then sums the outputs of each and outputs that
+ // lhs = SubexASTAdd{lhs, rhs}
default:
l.rewind()
break loop
diff --git a/subex/subexast.go b/subex/subexast.go
index 0c5c676..650f038 100644
--- a/subex/subexast.go
+++ b/subex/subexast.go
@@ -80,22 +80,78 @@ func (ast SubexASTMinimise) String() string {
return fmt.Sprintf("(%v)-", ast.content)
}
-// Run the subex as many times as possible but at least min times and at most max times
+type ConvexRange struct {
+ start, end int
+}
+func (cr ConvexRange) minmax() (int, int) {
+ if cr.start == -1 {
+ return cr.end, -1
+ } else if cr.end == -1 {
+ return cr.start, -1
+ } else if cr.start < cr.end {
+ return cr.start, cr.end
+ } else {
+ return cr.end, cr.start
+ }
+}
+func (cr ConvexRange) decrement() ConvexRange {
+ if cr.start == -1 {
+ return ConvexRange{-1, cr.end - 1}
+ } else if cr.end == -1 {
+ return ConvexRange{cr.start - 1, -1}
+ } else {
+ return ConvexRange{cr.start - 1, cr.end - 1}
+ }
+}
+func (cr ConvexRange) compile(content SubexAST, next SubexState) SubexState {
+ min, _ := cr.minmax()
+ if min != 0 {
+ return content.compileWith(cr.decrement().compile(content, next))
+ }
+ if cr.start == -1 {
+ state := &SubexGroupState {nil, next}
+ state.first = content.compileWith(state)
+ return state
+ }
+ if cr.end == -1 {
+ state := &SubexGroupState {next, nil}
+ state.second = content.compileWith(state)
+ return state
+ }
+
+ if cr.end == 0 {
+ state := next;
+ for i := 0; i < cr.start; i += 1 {
+ state = &SubexGroupState {
+ content.compileWith(state),
+ next,
+ }
+ }
+ return state
+ } else {
+ state := next;
+ for i := 0; i < cr.end; i += 1 {
+ state = &SubexGroupState {
+ next,
+ content.compileWith(state),
+ }
+ }
+ return state
+ }
+}
+
+// Try to run the subex a number of times that is one of the numbers in the acceptable range
+// Prioritising the left
type SubexASTRepeat struct {
content SubexAST
- min, max int
+ acceptable []ConvexRange
}
func (ast SubexASTRepeat) compileWith(next SubexState) SubexState {
- for i := ast.min; i < ast.max; i += 1 {
- next = &SubexGroupState {
- ast.content.compileWith(next),
- next,
- }
+ var state SubexState = &SubexDeadState{}
+ for _, convex := range ast.acceptable {
+ state = SubexGroupState {state, convex.compile(ast.content, next)}
}
- for i := 0; i < ast.min; i += 1 {
- next = ast.content.compileWith(next)
- }
- return next
+ return state
}
// Read in a single specific Atom and output it unchanged
diff --git a/subex/subexstate.go b/subex/subexstate.go
index 6318376..3c554a2 100644
--- a/subex/subexstate.go
+++ b/subex/subexstate.go
@@ -123,6 +123,15 @@ func (state SubexNoneState) accepting(store Store) [][]walk.Atom {
return [][]walk.Atom{nil}
}
+// A dead end state, handy for making internals work nicer but technically redundant
+type SubexDeadState struct {}
+func (state SubexDeadState) eat(store Store, char walk.Atom) []SubexBranch {
+ return nil
+}
+func (state SubexDeadState) accepting (store Store) [][]walk.Atom {
+ return nil
+}
+
// Read in a specific Atom and output it
type SubexCopyAtomState struct {
atom walk.Atom