From 8e9f0b186745afd51579d2a6136a57705efc7574 Mon Sep 17 00:00:00 2001 From: Charlie Stanton Date: Tue, 18 Apr 2023 12:47:55 +0100 Subject: Adds the repeat construct, obsoleting maximise, minimise, try, maybe and probably more The repeat construct repeats a subex a number of times, this number is based on a provided list which is ordered by priority and can be unbounded. --- subex/parse.go | 76 ++++++++++++++++++++++++++++++++++++++++++++++++++- subex/subexast.go | 78 +++++++++++++++++++++++++++++++++++++++++++++-------- subex/subexstate.go | 9 +++++++ 3 files changed, 151 insertions(+), 12 deletions(-) diff --git a/subex/parse.go b/subex/parse.go index f2c77bc..24ae082 100644 --- a/subex/parse.go +++ b/subex/parse.go @@ -29,6 +29,68 @@ func parseTerminatorAtomLiteral(termType rune, l *RuneReader) walk.Atom { } } +func charIsDigit(c rune) bool { + return '0' <= c && c <= '9' +} + +// Parse a positive integer, reads digits 0-9 and stops at the first non-digit +func parseInt(l *RuneReader) (output int) { + for { + char := l.next() + if charIsDigit(char) { + output = output * 10 + int(char - '0') + } else { + break + } + } + l.rewind() + return output +} + +// Having just read {, read in and parse the range contents +func parseRepeatRange(l *RuneReader) (output []ConvexRange) { + loop: for { + var start, end int + char := l.next() + l.rewind() + if char == '-' { + start = -1 + } else { + start = parseInt(l) + } + switch l.next() { + case ',': + output = append(output, ConvexRange{start, start}) + continue loop + case '-': + char := l.next() + if charIsDigit(char) { + l.rewind() + end = parseInt(l) + } else { + l.rewind() + end = -1 + } + case '}': + output = append(output, ConvexRange{start, start}) + break loop + default: + panic("Invalid character in repeat specifier") + } + switch l.next() { + case ',': + output = append(output, ConvexRange{start, end}) + continue loop + case '}': + output = append(output, ConvexRange{start, end}) + break loop + default: + panic("Invalid character in repeat specifier") + } + } + return output +} + func parseReplacement(l *RuneReader) (output []OutputContent) { // TODO escaping loop: for { @@ -144,7 +206,7 @@ func parseSubex(l *RuneReader, minPower int) SubexAST { case '[': rangeParts := parseRangeSubex(l) lhs = SubexASTRange {rangeParts} - case ')', '*', '-', '|', '!', '?', ';': + case ')', '*', '-', '|', '!', '?', ';', '{': l.rewind() return nil case '$': @@ -180,6 +242,11 @@ func parseSubex(l *RuneReader, minPower int) SubexAST { } r := l.next() switch { + case r == '{' && minPower <= 8: + lhs = SubexASTRepeat{ + content: lhs, + acceptable: parseRepeatRange(l), + } case r == '*' && minPower <= 8: lhs = SubexASTMaximise{lhs} case r == '-' && minPower <= 8: @@ -203,6 +270,13 @@ func parseSubex(l *RuneReader, minPower int) SubexAST { content: lhs, delimiter: rhs, } + //case r == '+' && minPower <= 6: + // rhs := parseSubex(l, 7) + // if rhs == nil { + // panic("Missing subex after +") + // } + // // TODO: Implement this. Runs subex on the left, then subex on the right, then sums the outputs of each and outputs that + // lhs = SubexASTAdd{lhs, rhs} default: l.rewind() break loop diff --git a/subex/subexast.go b/subex/subexast.go index 0c5c676..650f038 100644 --- a/subex/subexast.go +++ b/subex/subexast.go @@ -80,22 +80,78 @@ func (ast SubexASTMinimise) String() string { return fmt.Sprintf("(%v)-", ast.content) } -// Run the subex as many times as possible but at least min times and at most max times +type ConvexRange struct { + start, end int +} +func (cr ConvexRange) minmax() (int, int) { + if cr.start == -1 { + return cr.end, -1 + } else if cr.end == -1 { + return cr.start, -1 + } else if cr.start < cr.end { + return cr.start, cr.end + } else { + return cr.end, cr.start + } +} +func (cr ConvexRange) decrement() ConvexRange { + if cr.start == -1 { + return ConvexRange{-1, cr.end - 1} + } else if cr.end == -1 { + return ConvexRange{cr.start - 1, -1} + } else { + return ConvexRange{cr.start - 1, cr.end - 1} + } +} +func (cr ConvexRange) compile(content SubexAST, next SubexState) SubexState { + min, _ := cr.minmax() + if min != 0 { + return content.compileWith(cr.decrement().compile(content, next)) + } + if cr.start == -1 { + state := &SubexGroupState {nil, next} + state.first = content.compileWith(state) + return state + } + if cr.end == -1 { + state := &SubexGroupState {next, nil} + state.second = content.compileWith(state) + return state + } + + if cr.end == 0 { + state := next; + for i := 0; i < cr.start; i += 1 { + state = &SubexGroupState { + content.compileWith(state), + next, + } + } + return state + } else { + state := next; + for i := 0; i < cr.end; i += 1 { + state = &SubexGroupState { + next, + content.compileWith(state), + } + } + return state + } +} + +// Try to run the subex a number of times that is one of the numbers in the acceptable range +// Prioritising the left type SubexASTRepeat struct { content SubexAST - min, max int + acceptable []ConvexRange } func (ast SubexASTRepeat) compileWith(next SubexState) SubexState { - for i := ast.min; i < ast.max; i += 1 { - next = &SubexGroupState { - ast.content.compileWith(next), - next, - } + var state SubexState = &SubexDeadState{} + for _, convex := range ast.acceptable { + state = SubexGroupState {state, convex.compile(ast.content, next)} } - for i := 0; i < ast.min; i += 1 { - next = ast.content.compileWith(next) - } - return next + return state } // Read in a single specific Atom and output it unchanged diff --git a/subex/subexstate.go b/subex/subexstate.go index 6318376..3c554a2 100644 --- a/subex/subexstate.go +++ b/subex/subexstate.go @@ -123,6 +123,15 @@ func (state SubexNoneState) accepting(store Store) [][]walk.Atom { return [][]walk.Atom{nil} } +// A dead end state, handy for making internals work nicer but technically redundant +type SubexDeadState struct {} +func (state SubexDeadState) eat(store Store, char walk.Atom) []SubexBranch { + return nil +} +func (state SubexDeadState) accepting (store Store) [][]walk.Atom { + return nil +} + // Read in a specific Atom and output it type SubexCopyAtomState struct { atom walk.Atom -- cgit v1.2.3