package subex import ( "main/walk" "strconv" "strings" ) type Type int const ( ValueType Type = iota RuneType ) type RuneReader interface { Next() rune Rewind() } func accept(l RuneReader, chars string) bool { r := l.Next() for _, char := range chars { if char == r { return true } } l.Rewind() return false } func isNumericRune(r rune) bool { return '0' <= r && r <= '9' || r == '.' } // Having just parsed a `, read until the next ` and parse the contents into a list of non-string atoms func parseScalarLiteral(l RuneReader) (walk.Scalar, bool) { r := l.Next() if isNumericRune(r) { var builder strings.Builder builder.WriteRune(r) for { r := l.Next() if !isNumericRune(r) { l.Rewind() break } builder.WriteRune(r) } numberString := builder.String() number, err := strconv.ParseFloat(numberString, 64) if err != nil { panic("Invalid number literal") } return walk.NumberValue(number), true } switch r { case 'n': if accept(l, "u") && accept(l, "l") && accept(l, "l") { return walk.NullValue{}, true } else { panic("Invalid literal") } case 't': if accept(l, "r") && accept(l, "u") && accept(l, "e") { return walk.BoolValue(true), true } else { panic("Invalid literal") } case 'f': if accept(l, "a") && accept(l, "l") && accept(l, "s") && accept(l, "e") { return walk.BoolValue(false), true } else { panic("Invalid literal") } default: panic("Invalid literal") } } func charIsDigit(c rune) bool { return '0' <= c && c <= '9' } // Parse a positive integer, reads digits 0-9 and stops at the first non-digit func parseInt(l RuneReader) (output int) { for { char := l.Next() if charIsDigit(char) { output = output * 10 + int(char - '0') } else { break } } l.Rewind() return output } // Having just read {, read in and parse the range contents func parseRepeatRange(l RuneReader) (output []ConvexRange) { loop: for { var start, end int char := l.Next() l.Rewind() if char == '-' { start = -1 } else { start = parseInt(l) } switch l.Next() { case ',': output = append(output, ConvexRange{start, start}) continue loop case '-': char := l.Next() if charIsDigit(char) { l.Rewind() end = parseInt(l) } else { l.Rewind() end = -1 } case '}': output = append(output, ConvexRange{start, start}) break loop default: panic("Invalid character in repeat specifier") } switch l.Next() { case ',': output = append(output, ConvexRange{start, end}) continue loop case '}': output = append(output, ConvexRange{start, end}) break loop default: panic("Invalid character in repeat specifier") } } return output } func parseValueReplacement(l RuneReader) (output []OutputValueAST) { // TODO escaping // TODO add arrays, maps and strings loop: for { r := l.Next() switch r { case eof: panic("Missing closing `") case ' ': case '`': break loop case '$': slot := l.Next() if slot == eof { panic("Missing slot character") } output = append(output, OutputValueLoadAST {slot: slot}) default: l.Rewind() scalar, ok := parseScalarLiteral(l) if !ok { panic("Invalid scalar literal") } output = append(output, OutputValueLiteralAST {scalar}) } } return output } func parseRuneReplacement(l RuneReader) (output []OutputRuneAST) { // TODO escaping // TODO add arrays, maps and strings loop: for { r := l.Next() switch r { case eof: panic("Missing closing `") case '`': break loop case '$': slot := l.Next() if slot == eof { panic("Missing slot character") } output = append(output, OutputRuneLoadAST {slot: slot}) default: output = append(output, OutputRuneLiteralAST {r}) } } return output } // Parse the contents of a range subex [] into a map // func parseRangeSubex(l RuneReader) map[walk.AtomOLD]walk.AtomOLD { // // TODO escaping // parts := make(map[walk.AtomOLD]walk.AtomOLD) // var froms []walk.AtomOLD // var hasTo bool // for { // fromsStart := l.Next() // if fromsStart == ']' { // hasTo = false // break // } else if fromsStart == '=' { // hasTo = true // break // } else if fromsStart == '`' { // literals := parseNonStringLiteral(l) // froms = append(froms, literals...) // continue // } else if fromsStart == '"' { // froms = append(froms, walk.NewAtomStringTerminal()) // continue // } // if accept(l, "-") { // fromsEnd := l.Next() // if fromsEnd == ']' || fromsEnd == '=' { // l.Rewind() // fromsEnd = fromsStart // } // for i := fromsStart; i <= fromsEnd; i += 1 { // froms = append(froms, walk.NewAtomStringRune(i)) // } // } else { // froms = append(froms, walk.NewAtomStringRune(fromsStart)) // } // } // if len(froms) == 0 { // panic("Missing from part of range expression") // } // var tos []walk.AtomOLD // if hasTo { // for { // tosStart := l.Next() // if tosStart == ']' { // break // } else if tosStart == '`' { // literals := parseNonStringLiteral(l) // tos = append(tos, literals...) // continue // } else if tosStart == '"' { // tos = append(tos, walk.NewAtomStringTerminal()) // continue // } // if accept(l, "-") { // tosEnd := l.Next() // if tosEnd == ']' { // l.Rewind() // tosEnd = tosStart // } // for i := tosStart; i <= tosEnd; i += 1 { // tos = append(tos, walk.NewAtomStringRune(i)) // } // } else { // tos = append(tos, walk.NewAtomStringRune(tosStart)) // } // } // } else { // tos = froms // } // if len(tos) == 0 { // panic("Missing to part of range expression") // } // for i, from := range froms { // parts[from] = tos[i % len(tos)] // } // return parts // } func parseSubex(l RuneReader, minPower int, inType Type, outType Type) SubexAST { var lhs SubexAST r := l.Next() switch r { case eof: return nil case '(': lhs = parseSubex(l, 0, inType, outType) if !accept(l, ")") { panic("Missing matching )") } case '~': if !accept(l, "(") { panic("Missing ( after ~") } lhs = parseSubex(l, 0, RuneType, RuneType) if !accept(l, ")") { panic("Missing matching )") } if !accept(l, "~") { panic("Missing matching ~") } lhs = SubexASTEnterString {lhs} case '@': if !accept(l, "(") { panic("Missing ( after @") } lhs = parseSubex(l, 0, ValueType, ValueType) if !accept(l, ")") { panic("Missing matching )") } if !accept(l, "@") { panic("Missing matching ~") } lhs = SubexASTEnterArray {lhs} // TODO // case '[': // rangeParts := parseRangeSubex(l) // lhs = SubexASTRange {rangeParts} case ')', ']', '"', '|', ';', '{', '+', '-', '*', '/', '!', '=', '$': l.Rewind() return SubexASTEmpty{} // case '=': // replacement := parseReplacement(l) // lhs = SubexASTOutput{replacement} // case '^': // replacement := parseReplacement(l) // replacement = append( // []OutputContentAST{OutputValueLiteralAST {walk.NewAtomStringTerminal()}}, // replacement... // ) // replacement = append( // replacement, // OutputValueLiteralAST {walk.NewAtomStringTerminal()}, // ) // lhs = SubexASTOutput {replacement} case '.': if inType != outType { panic("Copying value changes type!") } if inType == RuneType { lhs = SubexASTCopyAnyRune{} } else { lhs = SubexASTCopyAnyValue{} } case '?': lhs = SubexASTCopyBool{} case '%': lhs = SubexASTCopyNumber{} case '`': lhs = SubexASTOutputValues {parseValueReplacement(l)} // TODO // case '_': // lhs = SubexASTCopyStringAtom{} // case '#': // lhs = SubexASTCopyString{} // case ',': // lhs = SubexASTCopyValue{} // case '"': // lhs = SubexASTCopyScalar {walk.NewAtomStringTerminal()} default: if inType != outType { panic("inType and outType don't match in copy") } if inType == RuneType { lhs = SubexASTCopyRune {r} } else { l.Rewind() scalar, ok := parseScalarLiteral(l) if !ok { panic("Invalid subex") } lhs = SubexASTCopyScalar {scalar} } } loop: for { if minPower <= 20 { next := parseSubex(l, 21, inType, outType) if next != nil && (next != SubexASTEmpty{}) { lhs = SubexASTConcat{lhs, next} continue loop } } r := l.Next() switch { case r == '{' && minPower <= 4: lhs = SubexASTRepeat { Content: lhs, Acceptable: parseRepeatRange(l), } case r == '+' && minPower <= 4: lhs = SubexASTSum {lhs} case r == '*' && minPower <= 4: lhs = SubexASTProduct {lhs} case r == '-' && minPower <= 4: lhs = SubexASTNegate {lhs} // case r == '/' && minPower <= 4: // lhs = SubexASTReciprocal {lhs} case r == '!' && minPower <= 4: lhs = SubexASTNot {lhs} // case r == '=' && minPower <= 4: // lhs = SubexASTEqual {lhs} case r == '$' && minPower <= 4: slot := l.Next() if slot == eof { panic("Missing slot character") } if slot == '_' { lhs = SubexASTDiscard {lhs} } else { lhs = SubexASTStoreValues { Match: lhs, Slot: slot, } } case r == '|' && minPower <= 8: rhs := parseSubex(l, 9, inType, outType) if rhs == nil { panic("Missing subex after |") } lhs = SubexASTOr{lhs, rhs} /*case r == ';' && minPower <= 10: rhs := parseSubex(l, 11, inType, outType) if rhs == nil { panic("Missing subex after ;") } lhs = SubexASTJoin { Content: lhs, Delimiter: rhs, }*/ default: l.Rewind() break loop } } return lhs } func Parse(l RuneReader) SubexAST { ast := parseSubex(l, 0, ValueType, ValueType) if ast == nil { return SubexASTEmpty{} } return ast }