<- Back to shtanton's homepage
aboutsummaryrefslogtreecommitdiff
path: root/subex/parse.go
diff options
context:
space:
mode:
Diffstat (limited to 'subex/parse.go')
-rw-r--r--subex/parse.go175
1 files changed, 175 insertions, 0 deletions
diff --git a/subex/parse.go b/subex/parse.go
new file mode 100644
index 0000000..af575eb
--- /dev/null
+++ b/subex/parse.go
@@ -0,0 +1,175 @@
+package subex
+
+func parseReplacement(l *RuneReader) (output []TransducerOutput) {
+ loop: for {
+ r := l.next()
+ switch r {
+ case eof:
+ panic("Missing closing \"")
+ case '"':
+ break loop
+ case '$':
+ slot := l.next()
+ if slot == eof {
+ panic("Missing slot character")
+ }
+ output = append(output, TransducerReplacementLoad(slot))
+ default:
+ output = append(output, TransducerReplacementRune(r))
+ }
+ }
+ return output
+}
+
+func parseRangeSubex(l *RuneReader) map[rune]rune {
+ parts := make(map[rune]rune)
+ var froms []rune
+ var hasTo bool
+ for {
+ fromsStart := l.next()
+ if fromsStart == ']' {
+ hasTo = false
+ break
+ } else if fromsStart == '=' {
+ hasTo = true
+ break
+ }
+ var fromsEnd rune
+ if l.accept("-") {
+ fromsEnd = l.next()
+ if fromsEnd == ']' || fromsEnd == '=' {
+ l.rewind()
+ fromsEnd = fromsStart
+ }
+ } else {
+ fromsEnd = fromsStart
+ }
+ for i := fromsStart; i <= fromsEnd; i += 1 {
+ froms = append(froms, i)
+ }
+ }
+ if len(froms) == 0 {
+ panic("Missing from part of range expression")
+ }
+
+ var tos []rune
+ if hasTo {
+ for {
+ tosStart := l.next()
+ if tosStart == ']' {
+ break
+ }
+ var tosEnd rune
+ if l.accept("-") {
+ tosEnd = l.next()
+ if tosEnd == ']' {
+ l.rewind()
+ tosEnd = tosStart
+ }
+ } else {
+ tosEnd = tosStart
+ }
+ for i := tosStart; i <= tosEnd; i += 1 {
+ tos = append(tos, i)
+ }
+ }
+ } else {
+ tos = froms
+ }
+ if len(tos) == 0 {
+ panic("Missing to part of range expression")
+ }
+
+ for i, from := range froms {
+ parts[from] = tos[i % len(tos)]
+ }
+ return parts
+}
+
+func parseSubex(l *RuneReader, minPower int) SubexAST {
+ var lhs SubexAST
+ r := l.next()
+ switch r {
+ case eof:
+ return nil
+ case '(':
+ lhs = parseSubex(l, 0)
+ if !l.accept(")") {
+ panic("Missing matching )")
+ }
+ case '[':
+ rangeParts := parseRangeSubex(l)
+ lhs = SubexASTRange {rangeParts}
+ case ')', '*', '-', '|', '!', '?', ';':
+ l.rewind()
+ return nil
+ case '$':
+ slot := l.next()
+ if slot == eof {
+ panic("Missing slot character")
+ }
+ match := parseSubex(l, 100)
+ if match == nil {
+ panic("Missing regex for store")
+ }
+ lhs = SubexASTStore{
+ match: match,
+ slot: slot,
+ }
+ case '"':
+ replacement := parseReplacement(l)
+ lhs = SubexASTOutput{replacement}
+ case '.':
+ lhs = SubexASTCopyAny{}
+ default:
+ lhs = SubexASTCopyRune(r)
+ }
+ loop: for {
+ if minPower <= 0 {
+ next := parseSubex(l, 1)
+ if next != nil {
+ lhs = SubexASTConcat{lhs, next}
+ continue loop
+ }
+ }
+ r := l.next()
+ switch {
+ case r == '*' && minPower <= 8:
+ lhs = SubexASTMaximise{lhs}
+ case r == '-' && minPower <= 8:
+ lhs = SubexASTMinimise{lhs}
+ case r == '!' && minPower <= 8:
+ lhs = SubexASTTry{lhs}
+ case r == '?' && minPower <= 8:
+ lhs = SubexASTMaybe{lhs}
+ case r == '|' && minPower <= 4:
+ rhs := parseSubex(l, 5)
+ if rhs == nil {
+ panic("Missing subex after |")
+ }
+ lhs = SubexASTOr{lhs, rhs}
+ case r == ';' && minPower <= 2:
+ rhs := parseSubex(l, 3)
+ if rhs == nil {
+ panic("Missing subex after ;")
+ }
+ lhs = SubexASTJoin{
+ content: lhs,
+ delimiter: rhs,
+ }
+ default:
+ l.rewind()
+ break loop
+ }
+ }
+ return lhs
+}
+
+func Parse(input string) SubexAST {
+ l := RuneReader {
+ input: input,
+ pos: 0,
+ width: 0,
+ }
+ return parseSubex(&l, 0)
+}