package subex import ( "main/walk" ) func expectBracket(l *RuneReader, ifLeft walk.Atom, ifRight walk.Atom) walk.Atom { switch l.next() { case '(': return ifLeft case ')': return ifRight default: panic("Expected ( or )") } } // Having just read termType, read in a bracket and return the corresponding Atom func parseTerminatorAtomLiteral(termType rune, l *RuneReader) walk.Atom { switch termType { case '@': return expectBracket(l, walk.ArrayBegin, walk.ArrayEnd) case '~': return expectBracket(l, walk.StartString{}, walk.EndString{}) case '#': return expectBracket(l, walk.MapBegin, walk.MapEnd) default: return nil } } func charIsDigit(c rune) bool { return '0' <= c && c <= '9' } // Parse a positive integer, reads digits 0-9 and stops at the first non-digit func parseInt(l *RuneReader) (output int) { for { char := l.next() if charIsDigit(char) { output = output * 10 + int(char - '0') } else { break } } l.rewind() return output } // Having just read {, read in and parse the range contents func parseRepeatRange(l *RuneReader) (output []ConvexRange) { loop: for { var start, end int char := l.next() l.rewind() if char == '-' { start = -1 } else { start = parseInt(l) } switch l.next() { case ',': output = append(output, ConvexRange{start, start}) continue loop case '-': char := l.next() if charIsDigit(char) { l.rewind() end = parseInt(l) } else { l.rewind() end = -1 } case '}': output = append(output, ConvexRange{start, start}) break loop default: panic("Invalid character in repeat specifier") } switch l.next() { case ',': output = append(output, ConvexRange{start, end}) continue loop case '}': output = append(output, ConvexRange{start, end}) break loop default: panic("Invalid character in repeat specifier") } } return output } func parseReplacement(l *RuneReader) (output []OutputContent) { // TODO escaping loop: for { r := l.next() switch r { case eof: panic("Missing closing \"") case '"': break loop case '$': slot := l.next() if slot == eof { panic("Missing slot character") } output = append(output, OutputLoad{slot: slot}) case '@', '~', '#': output = append(output, OutputAtomLiteral{atom: parseTerminatorAtomLiteral(r, l)}) default: output = append(output, OutputAtomLiteral{atom: r}) } } return output } // Parse the contents of a range subex [] into a map func parseRangeSubex(l *RuneReader) map[walk.Atom]walk.Atom { // TODO escaping parts := make(map[walk.Atom]walk.Atom) var froms []walk.Atom var hasTo bool for { fromsStart := l.next() if fromsStart == ']' { hasTo = false break } else if fromsStart == '=' { hasTo = true break } else { atom := parseTerminatorAtomLiteral(fromsStart, l) if atom != nil { froms = append(froms, atom) continue } } if l.accept("-") { fromsEnd := l.next() if fromsEnd == ']' || fromsEnd == '=' { l.rewind() fromsEnd = fromsStart } for i := fromsStart; i <= fromsEnd; i += 1 { froms = append(froms, i) } } else { froms = append(froms, fromsStart) } } if len(froms) == 0 { panic("Missing from part of range expression") } var tos []walk.Atom if hasTo { for { tosStart := l.next() if tosStart == ']' { break } else { atom := parseTerminatorAtomLiteral(tosStart, l) if atom != nil { tos = append(tos, atom) continue } } if l.accept("-") { tosEnd := l.next() if tosEnd == ']' { l.rewind() tosEnd = tosStart } for i := tosStart; i <= tosEnd; i += 1 { tos = append(tos, i) } } else { tos = append(tos, tosStart) } } } else { tos = froms } if len(tos) == 0 { panic("Missing to part of range expression") } for i, from := range froms { parts[from] = tos[i % len(tos)] } return parts } func parseSubex(l *RuneReader, minPower int) SubexAST { var lhs SubexAST r := l.next() switch r { case eof: return nil case '(': lhs = parseSubex(l, 0) if !l.accept(")") { panic("Missing matching )") } case '[': rangeParts := parseRangeSubex(l) lhs = SubexASTRange {rangeParts} case ')', '*', '-', '|', '!', '?', ';', '{': l.rewind() return nil case '$': slot := l.next() if slot == eof { panic("Missing slot character") } match := parseSubex(l, 100) if match == nil { panic("Missing regex for store") } lhs = SubexASTStore{ match: match, slot: slot, } case '"': replacement := parseReplacement(l) lhs = SubexASTOutput{replacement} case '.': lhs = SubexASTCopyAny{} case '@', '#', '~': lhs = SubexASTCopyAtom{atom: parseTerminatorAtomLiteral(r, l)} default: lhs = SubexASTCopyAtom{atom: r} } loop: for { if minPower <= 0 { next := parseSubex(l, 1) if next != nil { lhs = SubexASTConcat{lhs, next} continue loop } } r := l.next() switch { case r == '{' && minPower <= 8: lhs = SubexASTRepeat{ content: lhs, acceptable: parseRepeatRange(l), } case r == '*' && minPower <= 8: lhs = SubexASTMaximise{lhs} case r == '-' && minPower <= 8: lhs = SubexASTMinimise{lhs} case r == '!' && minPower <= 8: lhs = SubexASTTry{lhs} case r == '?' && minPower <= 8: lhs = SubexASTMaybe{lhs} case r == '|' && minPower <= 4: rhs := parseSubex(l, 5) if rhs == nil { panic("Missing subex after |") } lhs = SubexASTOr{lhs, rhs} case r == ';' && minPower <= 2: rhs := parseSubex(l, 3) if rhs == nil { panic("Missing subex after ;") } lhs = SubexASTJoin{ content: lhs, delimiter: rhs, } //case r == '+' && minPower <= 6: // rhs := parseSubex(l, 7) // if rhs == nil { // panic("Missing subex after +") // } // // TODO: Implement this. Runs subex on the left, then subex on the right, then sums the outputs of each and outputs that // lhs = SubexASTAdd{lhs, rhs} default: l.rewind() break loop } } return lhs } func Parse(input string) SubexAST { l := RuneReader { input: input, pos: 0, width: 0, } return parseSubex(&l, 0) }