package subex import ( "main/walk" ) type RuneReader interface { Next() rune Rewind() } func accept(l RuneReader, chars string) bool { r := l.Next() for _, char := range chars { if char == r { return true } } l.Rewind() return false } func expectBracket(l RuneReader, ifLeft walk.Atom, ifRight walk.Atom) walk.Atom { switch l.Next() { case '(': return ifLeft case ')': return ifRight default: panic("Expected ( or )") } } // Having just read termType, read in a bracket and return the corresponding Atom func parseTerminatorAtomLiteral(termType rune, l RuneReader) walk.Atom { switch termType { case '@': return expectBracket(l, walk.ArrayBegin, walk.ArrayEnd) case '~': return expectBracket(l, walk.StartString{}, walk.EndString{}) case '#': return expectBracket(l, walk.MapBegin, walk.MapEnd) default: return nil } } func charIsDigit(c rune) bool { return '0' <= c && c <= '9' } // Parse a positive integer, reads digits 0-9 and stops at the first non-digit func parseInt(l RuneReader) (output int) { for { char := l.Next() if charIsDigit(char) { output = output * 10 + int(char - '0') } else { break } } l.Rewind() return output } // Having just read {, read in and parse the range contents func parseRepeatRange(l RuneReader) (output []ConvexRange) { loop: for { var start, end int char := l.Next() l.Rewind() if char == '-' { start = -1 } else { start = parseInt(l) } switch l.Next() { case ',': output = append(output, ConvexRange{start, start}) continue loop case '-': char := l.Next() if charIsDigit(char) { l.Rewind() end = parseInt(l) } else { l.Rewind() end = -1 } case '}': output = append(output, ConvexRange{start, start}) break loop default: panic("Invalid character in repeat specifier") } switch l.Next() { case ',': output = append(output, ConvexRange{start, end}) continue loop case '}': output = append(output, ConvexRange{start, end}) break loop default: panic("Invalid character in repeat specifier") } } return output } func parseReplacement(l RuneReader) (output []OutputContent) { // TODO escaping loop: for { r := l.Next() switch r { case eof: panic("Missing closing \"") case '"': break loop case '$': slot := l.Next() if slot == eof { panic("Missing slot character") } output = append(output, OutputLoad{slot: slot}) case '@', '~', '#': output = append(output, OutputAtomLiteral{atom: parseTerminatorAtomLiteral(r, l)}) default: output = append(output, OutputAtomLiteral{atom: walk.StringAtom(r)}) } } return output } // Parse the contents of a range subex [] into a map func parseRangeSubex(l RuneReader) map[walk.Atom]walk.Atom { // TODO escaping parts := make(map[walk.Atom]walk.Atom) var froms []walk.Atom var hasTo bool for { fromsStart := l.Next() if fromsStart == ']' { hasTo = false break } else if fromsStart == '=' { hasTo = true break } else { atom := parseTerminatorAtomLiteral(fromsStart, l) if atom != nil { froms = append(froms, atom) continue } } if accept(l, "-") { fromsEnd := l.Next() if fromsEnd == ']' || fromsEnd == '=' { l.Rewind() fromsEnd = fromsStart } for i := fromsStart; i <= fromsEnd; i += 1 { froms = append(froms, walk.StringAtom(i)) } } else { froms = append(froms, walk.StringAtom(fromsStart)) } } if len(froms) == 0 { panic("Missing from part of range expression") } var tos []walk.Atom if hasTo { for { tosStart := l.Next() if tosStart == ']' { break } else { atom := parseTerminatorAtomLiteral(tosStart, l) if atom != nil { tos = append(tos, atom) continue } } if accept(l, "-") { tosEnd := l.Next() if tosEnd == ']' { l.Rewind() tosEnd = tosStart } for i := tosStart; i <= tosEnd; i += 1 { tos = append(tos, walk.StringAtom(i)) } } else { tos = append(tos, walk.StringAtom(tosStart)) } } } else { tos = froms } if len(tos) == 0 { panic("Missing to part of range expression") } for i, from := range froms { parts[from] = tos[i % len(tos)] } return parts } func parseSubex(l RuneReader, minPower int) SubexAST { var lhs SubexAST r := l.Next() switch r { case eof: return nil case '(': lhs = parseSubex(l, 0) if !accept(l, ")") { panic("Missing matching )") } case '[': rangeParts := parseRangeSubex(l) lhs = SubexASTRange {rangeParts} case ')', '|', ';', '{', '+', '$': l.Rewind() return nil case '"': replacement := parseReplacement(l) lhs = SubexASTOutput{replacement} case '.': lhs = SubexASTCopyAny{} case '@', '#', '~': lhs = SubexASTCopyAtom{atom: parseTerminatorAtomLiteral(r, l)} default: lhs = SubexASTCopyAtom{atom: walk.StringAtom(r)} } loop: for { if minPower <= 0 { next := parseSubex(l, 1) if next != nil { lhs = SubexASTConcat{lhs, next} continue loop } } r := l.Next() switch { case r == '{' && minPower <= 8: lhs = SubexASTRepeat { content: lhs, acceptable: parseRepeatRange(l), } case r == '+' && minPower <= 8: lhs = SubexASTSum {lhs} case r == '*' && minPower <= 8: lhs = SubexASTProduct {lhs} case r == '-' && minPower <= 8: lhs = SubexASTNegate {lhs} case r == '/' && minPower <= 8: lhs = SubexASTReciprocal {lhs} case r == '!' && minPower <= 8: lhs = SubexASTNot {lhs} case r == '$' && minPower <= 8: slot := l.Next() if slot == eof { panic("Missing slot character") } lhs = SubexASTStore{ match: lhs, slot: slot, } case r == '|' && minPower <= 4: rhs := parseSubex(l, 5) if rhs == nil { panic("Missing subex after |") } lhs = SubexASTOr{lhs, rhs} case r == ';' && minPower <= 2: rhs := parseSubex(l, 3) if rhs == nil { panic("Missing subex after ;") } lhs = SubexASTJoin{ content: lhs, delimiter: rhs, } default: l.Rewind() break loop } } return lhs } func Parse(l RuneReader) SubexAST { return parseSubex(l, 0) }