package subex import ( "main/walk" "strconv" "strings" ) type RuneReader interface { Next() rune Rewind() } func accept(l RuneReader, chars string) bool { r := l.Next() for _, char := range chars { if char == r { return true } } l.Rewind() return false } func isNumericRune(r rune) bool { return '0' <= r && r <= '9' || r == '.' } // Having just parsed a `, read until the next ` and parse the contents into a list of non-string atoms func parseScalarLiteral(l RuneReader) (walk.Scalar, bool) { r := l.Next() if isNumericRune(r) { var builder strings.Builder builder.WriteRune(r) for { r := l.Next() if !isNumericRune(r) { l.Rewind() break } builder.WriteRune(r) } numberString := builder.String() number, err := strconv.ParseFloat(numberString, 64) if err != nil { panic("Invalid number literal") } return walk.NumberScalar(number), true } switch r { case 'n': if accept(l, "u") && accept(l, "l") && accept(l, "l") { return walk.NullScalar{}, true } else { panic("Invalid literal") } case 't': if accept(l, "r") && accept(l, "u") && accept(l, "e") { return walk.BoolScalar(true), true } else { panic("Invalid literal") } case 'f': if accept(l, "a") && accept(l, "l") && accept(l, "s") && accept(l, "e") { return walk.BoolScalar(false), true } else { panic("Invalid literal") } default: panic("Invalid literal") } } func charIsDigit(c rune) bool { return '0' <= c && c <= '9' } // Parse a positive integer, reads digits 0-9 and stops at the first non-digit func parseInt(l RuneReader) (output int) { for { char := l.Next() if charIsDigit(char) { output = output * 10 + int(char - '0') } else { break } } l.Rewind() return output } // Having just read {, read in and parse the range contents func parseRepeatRange(l RuneReader) (output []ConvexRange) { loop: for { var start, end int char := l.Next() l.Rewind() if char == '-' { start = -1 } else { start = parseInt(l) } switch l.Next() { case ',': output = append(output, ConvexRange{start, start}) continue loop case '-': char := l.Next() if charIsDigit(char) { l.Rewind() end = parseInt(l) } else { l.Rewind() end = -1 } case '}': output = append(output, ConvexRange{start, start}) break loop default: panic("Invalid character in repeat specifier") } switch l.Next() { case ',': output = append(output, ConvexRange{start, end}) continue loop case '}': output = append(output, ConvexRange{start, end}) break loop default: panic("Invalid character in repeat specifier") } } return output } // TODO: Consider if it's worth making better use of the go type system to enforce output being all runes or all values func parseReplacement(l RuneReader, runic bool) (output []OutputContentAST) { // TODO escaping // TODO add arrays, maps and strings loop: for { r := l.Next() switch r { case eof: panic("Missing closing `") case '`': break loop case '$': slot := l.Next() if slot == eof { panic("Missing slot character") } output = append(output, OutputLoadAST{slot: slot}) default: if runic { output = append(output, OutputRuneLiteralAST {walk.StringRuneAtom(r)}) } else { l.Rewind() scalar, ok := parseScalarLiteral(l) if !ok { panic("Invalid scalar literal") } output = append(output, OutputValueLiteralAST {scalar}) } } } return output } // Parse the contents of a range subex [] into a map // func parseRangeSubex(l RuneReader) map[walk.AtomOLD]walk.AtomOLD { // // TODO escaping // parts := make(map[walk.AtomOLD]walk.AtomOLD) // var froms []walk.AtomOLD // var hasTo bool // for { // fromsStart := l.Next() // if fromsStart == ']' { // hasTo = false // break // } else if fromsStart == '=' { // hasTo = true // break // } else if fromsStart == '`' { // literals := parseNonStringLiteral(l) // froms = append(froms, literals...) // continue // } else if fromsStart == '"' { // froms = append(froms, walk.NewAtomStringTerminal()) // continue // } // if accept(l, "-") { // fromsEnd := l.Next() // if fromsEnd == ']' || fromsEnd == '=' { // l.Rewind() // fromsEnd = fromsStart // } // for i := fromsStart; i <= fromsEnd; i += 1 { // froms = append(froms, walk.NewAtomStringRune(i)) // } // } else { // froms = append(froms, walk.NewAtomStringRune(fromsStart)) // } // } // if len(froms) == 0 { // panic("Missing from part of range expression") // } // var tos []walk.AtomOLD // if hasTo { // for { // tosStart := l.Next() // if tosStart == ']' { // break // } else if tosStart == '`' { // literals := parseNonStringLiteral(l) // tos = append(tos, literals...) // continue // } else if tosStart == '"' { // tos = append(tos, walk.NewAtomStringTerminal()) // continue // } // if accept(l, "-") { // tosEnd := l.Next() // if tosEnd == ']' { // l.Rewind() // tosEnd = tosStart // } // for i := tosStart; i <= tosEnd; i += 1 { // tos = append(tos, walk.NewAtomStringRune(i)) // } // } else { // tos = append(tos, walk.NewAtomStringRune(tosStart)) // } // } // } else { // tos = froms // } // if len(tos) == 0 { // panic("Missing to part of range expression") // } // for i, from := range froms { // parts[from] = tos[i % len(tos)] // } // return parts // } func parseSubex(l RuneReader, minPower int, runic bool) SubexAST { var lhs SubexAST r := l.Next() switch r { case eof: return nil case '(': lhs = parseSubex(l, 0, runic) if !accept(l, ")") { panic("Missing matching )") } // TODO // case '[': // rangeParts := parseRangeSubex(l) // lhs = SubexASTRange {rangeParts} case ')', ']', '"', '|', ';', '{', '+', '-', '*', '/', '!', '=', '$': l.Rewind() return SubexASTEmpty{} // case '=': // replacement := parseReplacement(l) // lhs = SubexASTOutput{replacement} // case '^': // replacement := parseReplacement(l) // replacement = append( // []OutputContentAST{OutputValueLiteralAST {walk.NewAtomStringTerminal()}}, // replacement... // ) // replacement = append( // replacement, // OutputValueLiteralAST {walk.NewAtomStringTerminal()}, // ) // lhs = SubexASTOutput {replacement} case '.': if runic { lhs = SubexASTCopyAnyRune{} } else { lhs = SubexASTCopyAnyValue{} } case '?': lhs = SubexASTCopyBool{} case '%': lhs = SubexASTCopyNumber{} case ':': if runic { lhs = SubexASTCopyRune {':'} } else { if !accept(l, "[") { panic("Missing [ after :") } lhs = SubexASTEnterArray {parseSubex(l, 0, runic)} if !accept(l, "]") { panic("Missing matching ]") } } case '`': lhs = SubexASTOutput {parseReplacement(l, runic)} case '~': if runic { lhs = SubexASTCopyRune {'~'} } else { if !accept(l, "\"") { panic("Missing \" after ~") } lhs = SubexASTEnterString {parseSubex(l, 0, true)} if !accept(l, "\"") { panic("Missing matching \"") } } // TODO // case '_': // lhs = SubexASTCopyStringAtom{} // case '#': // lhs = SubexASTCopyString{} // case ',': // lhs = SubexASTCopyValue{} // case '"': // lhs = SubexASTCopyScalar {walk.NewAtomStringTerminal()} // case '~': // literals := parseNonStringLiteral(l) // var replacement []OutputContentAST // for _, literal := range literals { // replacement = append(replacement, OutputValueLiteralAST {literal}) // } // lhs = SubexASTOutput {replacement} default: if runic { lhs = SubexASTCopyRune {r} } else { l.Rewind() scalar, ok := parseScalarLiteral(l) if !ok { panic("Invalid subex") } lhs = SubexASTCopyScalar {scalar} } } loop: for { if minPower <= 20 { next := parseSubex(l, 21, runic) if next != nil && (next != SubexASTEmpty{}) { lhs = SubexASTConcat{lhs, next} continue loop } } r := l.Next() switch { case r == '{' && minPower <= 4: lhs = SubexASTRepeat { Content: lhs, Acceptable: parseRepeatRange(l), } case r == '+' && minPower <= 4: lhs = SubexASTSum {lhs} case r == '*' && minPower <= 4: lhs = SubexASTProduct {lhs} case r == '-' && minPower <= 4: lhs = SubexASTNegate {lhs} case r == '/' && minPower <= 4: lhs = SubexASTReciprocal {lhs} case r == '!' && minPower <= 4: lhs = SubexASTNot {lhs} case r == '=' && minPower <= 4: lhs = SubexASTEqual {lhs} case r == '$' && minPower <= 4: slot := l.Next() if slot == eof { panic("Missing slot character") } if slot == '_' { lhs = SubexASTDiscard {lhs} } else { lhs = SubexASTStore{ Match: lhs, Slot: slot, } } case r == '|' && minPower <= 8: rhs := parseSubex(l, 9, runic) if rhs == nil { panic("Missing subex after |") } lhs = SubexASTOr{lhs, rhs} case r == ';' && minPower <= 10: rhs := parseSubex(l, 11, runic) if rhs == nil { panic("Missing subex after ;") } lhs = SubexASTJoin{ Content: lhs, Delimiter: rhs, } default: l.Rewind() break loop } } return lhs } func Parse(l RuneReader) SubexAST { ast := parseSubex(l, 0, false) if ast == nil { return SubexASTEmpty{} } return ast }