package subex import ( "main/walk" "strconv" "strings" ) type RuneReader interface { Next() rune Rewind() } func accept(l RuneReader, chars string) bool { r := l.Next() for _, char := range chars { if char == r { return true } } l.Rewind() return false } func expectBracket(l RuneReader, ifLeft walk.Atom, ifRight walk.Atom) walk.Atom { switch l.Next() { case '(': return ifLeft case ')': return ifRight default: panic("Expected ( or )") } } func isNumericRune(r rune) bool { return '0' <= r && r <= '9' || r == '.' } // Having just parsed a `, read until the next ` and parse the contents into a list of non-string atoms func parseNonStringLiteral(l RuneReader) (literals []walk.Atom) { for { r := l.Next() if isNumericRune(r) { var builder strings.Builder builder.WriteRune(r) for { r := l.Next() if !isNumericRune(r) { l.Rewind() break } builder.WriteRune(r) } numberString := builder.String() number, err := strconv.ParseFloat(numberString, 64) if err != nil { panic("Invalid number literal") } literals = append(literals, walk.ValueNumber(number)) continue } switch r { case '`', '~': return literals case ' ', '\t': continue case 'n': if accept(l, "u") && accept(l, "l") && accept(l, "l") { literals = append(literals, walk.ValueNull{}) } else { panic("Invalid literal") } case 't': if accept(l, "r") && accept(l, "u") && accept(l, "e") { literals = append(literals, walk.ValueBool(true)) } else { panic("Invalid literal") } case 'f': if accept(l, "a") && accept(l, "l") && accept(l, "s") && accept(l, "e") { literals = append(literals, walk.ValueBool(false)) } else { panic("Invalid literal") } case '{': literals = append(literals, walk.MapBegin) case '}': literals = append(literals, walk.MapEnd) case '[': literals = append(literals, walk.ArrayBegin) case ']': literals = append(literals, walk.ArrayEnd) default: panic("Invalid literal") } } } func charIsDigit(c rune) bool { return '0' <= c && c <= '9' } // Parse a positive integer, reads digits 0-9 and stops at the first non-digit func parseInt(l RuneReader) (output int) { for { char := l.Next() if charIsDigit(char) { output = output * 10 + int(char - '0') } else { break } } l.Rewind() return output } // Having just read {, read in and parse the range contents func parseRepeatRange(l RuneReader) (output []ConvexRange) { loop: for { var start, end int char := l.Next() l.Rewind() if char == '-' { start = -1 } else { start = parseInt(l) } switch l.Next() { case ',': output = append(output, ConvexRange{start, start}) continue loop case '-': char := l.Next() if charIsDigit(char) { l.Rewind() end = parseInt(l) } else { l.Rewind() end = -1 } case '}': output = append(output, ConvexRange{start, start}) break loop default: panic("Invalid character in repeat specifier") } switch l.Next() { case ',': output = append(output, ConvexRange{start, end}) continue loop case '}': output = append(output, ConvexRange{start, end}) break loop default: panic("Invalid character in repeat specifier") } } return output } func parseReplacement(l RuneReader) (output []OutputContent) { // TODO escaping loop: for { r := l.Next() switch r { case eof: panic("Missing closing \"") case '=': break loop case '$': slot := l.Next() if slot == eof { panic("Missing slot character") } output = append(output, OutputLoad{slot: slot}) case '`': literals := parseNonStringLiteral(l) for _, literal := range literals { output = append(output, OutputAtomLiteral {literal}) } case '"': output = append(output, OutputAtomLiteral {walk.StringTerminal{}}) default: output = append(output, OutputAtomLiteral{atom: walk.StringAtom(r)}) } } return output } // Parse the contents of a range subex [] into a map func parseRangeSubex(l RuneReader) map[walk.Atom]walk.Atom { // TODO escaping parts := make(map[walk.Atom]walk.Atom) var froms []walk.Atom var hasTo bool for { fromsStart := l.Next() if fromsStart == ']' { hasTo = false break } else if fromsStart == '=' { hasTo = true break } else if fromsStart == '`' { literals := parseNonStringLiteral(l) froms = append(froms, literals...) continue } else if fromsStart == '"' { froms = append(froms, walk.StringTerminal{}) continue } if accept(l, "-") { fromsEnd := l.Next() if fromsEnd == ']' || fromsEnd == '=' { l.Rewind() fromsEnd = fromsStart } for i := fromsStart; i <= fromsEnd; i += 1 { froms = append(froms, walk.StringAtom(i)) } } else { froms = append(froms, walk.StringAtom(fromsStart)) } } if len(froms) == 0 { panic("Missing from part of range expression") } var tos []walk.Atom if hasTo { for { tosStart := l.Next() if tosStart == ']' { break } else if tosStart == '`' { literals := parseNonStringLiteral(l) tos = append(tos, literals...) continue } else if tosStart == '"' { tos = append(tos, walk.StringTerminal{}) continue } if accept(l, "-") { tosEnd := l.Next() if tosEnd == ']' { l.Rewind() tosEnd = tosStart } for i := tosStart; i <= tosEnd; i += 1 { tos = append(tos, walk.StringAtom(i)) } } else { tos = append(tos, walk.StringAtom(tosStart)) } } } else { tos = froms } if len(tos) == 0 { panic("Missing to part of range expression") } for i, from := range froms { parts[from] = tos[i % len(tos)] } return parts } func parseSubex(l RuneReader, minPower int) SubexAST { var lhs SubexAST r := l.Next() switch r { case eof: return nil case '(': lhs = parseSubex(l, 0) if !accept(l, ")") { panic("Missing matching )") } case '[': rangeParts := parseRangeSubex(l) lhs = SubexASTRange {rangeParts} case ')', '|', ';', '{', '+', '-', '*', '/', '!', '$': l.Rewind() return nil case '=': replacement := parseReplacement(l) lhs = SubexASTOutput{replacement} case '.': lhs = SubexASTCopyAny{} case '?': lhs = SubexASTCopyBool{} case '%': lhs = SubexASTCopyNumber{} case '_': lhs = SubexASTCopyStringAtom{} case '#': lhs = SubexASTCopyString{} case ',': lhs = SubexASTCopyValue{} case '"': lhs = SubexASTCopyAtom {walk.StringTerminal{}} case '`': literals := parseNonStringLiteral(l) lhs = SubexASTEmpty{} for _, literal := range literals { lhs = SubexASTConcat {lhs, SubexASTCopyAtom {literal}} } case '~': literals := parseNonStringLiteral(l) var replacement []OutputContent for _, literal := range literals { replacement = append(replacement, OutputAtomLiteral {literal}) } lhs = SubexASTOutput {replacement} default: lhs = SubexASTCopyAtom{Atom: walk.StringAtom(r)} } loop: for { if minPower <= 0 { next := parseSubex(l, 1) if next != nil { lhs = SubexASTConcat{lhs, next} continue loop } } r := l.Next() switch { case r == '{' && minPower <= 8: lhs = SubexASTRepeat { Content: lhs, Acceptable: parseRepeatRange(l), } case r == '+' && minPower <= 8: lhs = SubexASTSum {lhs} case r == '*' && minPower <= 8: lhs = SubexASTProduct {lhs} case r == '-' && minPower <= 8: lhs = SubexASTNegate {lhs} case r == '/' && minPower <= 8: lhs = SubexASTReciprocal {lhs} case r == '!' && minPower <= 8: lhs = SubexASTNot {lhs} case r == '$' && minPower <= 8: slot := l.Next() if slot == eof { panic("Missing slot character") } lhs = SubexASTStore{ Match: lhs, Slot: slot, } case r == '|' && minPower <= 4: rhs := parseSubex(l, 5) if rhs == nil { panic("Missing subex after |") } lhs = SubexASTOr{lhs, rhs} case r == ';' && minPower <= 2: rhs := parseSubex(l, 3) if rhs == nil { panic("Missing subex after ;") } lhs = SubexASTJoin{ Content: lhs, Delimiter: rhs, } default: l.Rewind() break loop } } return lhs } func Parse(l RuneReader) SubexAST { ast := parseSubex(l, 0) if ast == nil { return SubexASTEmpty{} } return ast }