From 10f847acc7087317b0fbe20b7cf3307a0fafab8a Mon Sep 17 00:00:00 2001 From: Charlie Stanton Date: Wed, 19 Apr 2023 14:34:22 +0100 Subject: Changes the parsing API for subex to be more suitable to being part of a larger program --- subex/parse.go | 91 ++++++++++++++++++++++++++++++++-------------------------- 1 file changed, 51 insertions(+), 40 deletions(-) (limited to 'subex/parse.go') diff --git a/subex/parse.go b/subex/parse.go index d6ef995..e6efc2e 100644 --- a/subex/parse.go +++ b/subex/parse.go @@ -4,8 +4,24 @@ import ( "main/walk" ) -func expectBracket(l *RuneReader, ifLeft walk.Atom, ifRight walk.Atom) walk.Atom { - switch l.next() { +type RuneReader interface { + Next() rune + Rewind() +} + +func accept(l RuneReader, chars string) bool { + r := l.Next() + for _, char := range chars { + if char == r { + return true + } + } + l.Rewind() + return false +} + +func expectBracket(l RuneReader, ifLeft walk.Atom, ifRight walk.Atom) walk.Atom { + switch l.Next() { case '(': return ifLeft case ')': @@ -16,7 +32,7 @@ func expectBracket(l *RuneReader, ifLeft walk.Atom, ifRight walk.Atom) walk.Atom } // Having just read termType, read in a bracket and return the corresponding Atom -func parseTerminatorAtomLiteral(termType rune, l *RuneReader) walk.Atom { +func parseTerminatorAtomLiteral(termType rune, l RuneReader) walk.Atom { switch termType { case '@': return expectBracket(l, walk.ArrayBegin, walk.ArrayEnd) @@ -34,41 +50,41 @@ func charIsDigit(c rune) bool { } // Parse a positive integer, reads digits 0-9 and stops at the first non-digit -func parseInt(l *RuneReader) (output int) { +func parseInt(l RuneReader) (output int) { for { - char := l.next() + char := l.Next() if charIsDigit(char) { output = output * 10 + int(char - '0') } else { break } } - l.rewind() + l.Rewind() return output } // Having just read {, read in and parse the range contents -func parseRepeatRange(l *RuneReader) (output []ConvexRange) { +func parseRepeatRange(l RuneReader) (output []ConvexRange) { loop: for { var start, end int - char := l.next() - l.rewind() + char := l.Next() + l.Rewind() if char == '-' { start = -1 } else { start = parseInt(l) } - switch l.next() { + switch l.Next() { case ',': output = append(output, ConvexRange{start, start}) continue loop case '-': - char := l.next() + char := l.Next() if charIsDigit(char) { - l.rewind() + l.Rewind() end = parseInt(l) } else { - l.rewind() + l.Rewind() end = -1 } case '}': @@ -77,7 +93,7 @@ func parseRepeatRange(l *RuneReader) (output []ConvexRange) { default: panic("Invalid character in repeat specifier") } - switch l.next() { + switch l.Next() { case ',': output = append(output, ConvexRange{start, end}) continue loop @@ -91,17 +107,17 @@ func parseRepeatRange(l *RuneReader) (output []ConvexRange) { return output } -func parseReplacement(l *RuneReader) (output []OutputContent) { +func parseReplacement(l RuneReader) (output []OutputContent) { // TODO escaping loop: for { - r := l.next() + r := l.Next() switch r { case eof: panic("Missing closing \"") case '"': break loop case '$': - slot := l.next() + slot := l.Next() if slot == eof { panic("Missing slot character") } @@ -116,13 +132,13 @@ func parseReplacement(l *RuneReader) (output []OutputContent) { } // Parse the contents of a range subex [] into a map -func parseRangeSubex(l *RuneReader) map[walk.Atom]walk.Atom { +func parseRangeSubex(l RuneReader) map[walk.Atom]walk.Atom { // TODO escaping parts := make(map[walk.Atom]walk.Atom) var froms []walk.Atom var hasTo bool for { - fromsStart := l.next() + fromsStart := l.Next() if fromsStart == ']' { hasTo = false break @@ -136,10 +152,10 @@ func parseRangeSubex(l *RuneReader) map[walk.Atom]walk.Atom { continue } } - if l.accept("-") { - fromsEnd := l.next() + if accept(l, "-") { + fromsEnd := l.Next() if fromsEnd == ']' || fromsEnd == '=' { - l.rewind() + l.Rewind() fromsEnd = fromsStart } for i := fromsStart; i <= fromsEnd; i += 1 { @@ -156,7 +172,7 @@ func parseRangeSubex(l *RuneReader) map[walk.Atom]walk.Atom { var tos []walk.Atom if hasTo { for { - tosStart := l.next() + tosStart := l.Next() if tosStart == ']' { break } else { @@ -166,10 +182,10 @@ func parseRangeSubex(l *RuneReader) map[walk.Atom]walk.Atom { continue } } - if l.accept("-") { - tosEnd := l.next() + if accept(l, "-") { + tosEnd := l.Next() if tosEnd == ']' { - l.rewind() + l.Rewind() tosEnd = tosStart } for i := tosStart; i <= tosEnd; i += 1 { @@ -192,22 +208,22 @@ func parseRangeSubex(l *RuneReader) map[walk.Atom]walk.Atom { return parts } -func parseSubex(l *RuneReader, minPower int) SubexAST { +func parseSubex(l RuneReader, minPower int) SubexAST { var lhs SubexAST - r := l.next() + r := l.Next() switch r { case eof: return nil case '(': lhs = parseSubex(l, 0) - if !l.accept(")") { + if !accept(l, ")") { panic("Missing matching )") } case '[': rangeParts := parseRangeSubex(l) lhs = SubexASTRange {rangeParts} case ')', '|', ';', '{', '+', '$': - l.rewind() + l.Rewind() return nil case '"': replacement := parseReplacement(l) @@ -227,7 +243,7 @@ func parseSubex(l *RuneReader, minPower int) SubexAST { continue loop } } - r := l.next() + r := l.Next() switch { case r == '{' && minPower <= 8: lhs = SubexASTRepeat { @@ -245,7 +261,7 @@ func parseSubex(l *RuneReader, minPower int) SubexAST { case r == '!' && minPower <= 8: lhs = SubexASTNot {lhs} case r == '$' && minPower <= 8: - slot := l.next() + slot := l.Next() if slot == eof { panic("Missing slot character") } @@ -269,18 +285,13 @@ func parseSubex(l *RuneReader, minPower int) SubexAST { delimiter: rhs, } default: - l.rewind() + l.Rewind() break loop } } return lhs } -func Parse(input string) SubexAST { - l := RuneReader { - input: input, - pos: 0, - width: 0, - } - return parseSubex(&l, 0) +func Parse(l RuneReader) SubexAST { + return parseSubex(l, 0) } -- cgit v1.2.3