package subex import ( "main/walk" "strconv" "strings" ) type Type int const ( AnyType Type = iota ValueType RuneType ) func resolveTypes(t1 Type, t2 Type) Type { if t1 == AnyType { return t2 } if t2 == AnyType { return t1 } if t1 == t2 { return t1 } panic("Types don't match in parser") } type Structure int const ( NoneStructure Structure = iota StringStructure ArrayStructure ArrayValuesStructure MapStructure ) func (s Structure) String() string { switch s { case NoneStructure: return "-" case StringStructure: return "~" case ArrayStructure: return "@" case ArrayValuesStructure: return ":" case MapStructure: return "#" default: panic("Invalid structure") } } type DestructureMethod int const ( Normal DestructureMethod = iota Iterate ) type RuneReader interface { Next() rune Rewind() RewindRune(r rune) } func accept(l RuneReader, chars string) bool { r := l.Next() for _, char := range chars { if char == r { return true } } l.Rewind() return false } func isNumericRune(r rune) bool { return '0' <= r && r <= '9' || r == '.' } // Having just parsed a `, read until the next ` and parse the contents into a list of non-string atoms func parseScalarLiteral(l RuneReader) (walk.Scalar, bool) { r := l.Next() if isNumericRune(r) { var builder strings.Builder builder.WriteRune(r) for { r := l.Next() if !isNumericRune(r) { l.Rewind() break } builder.WriteRune(r) } numberString := builder.String() number, err := strconv.ParseFloat(numberString, 64) if err != nil { panic("Invalid number literal") } return walk.NumberValue(number), true } switch r { case 'n': if accept(l, "u") && accept(l, "l") && accept(l, "l") { return walk.NullValue{}, true } else { panic("Invalid literal") } case 't': if accept(l, "r") && accept(l, "u") && accept(l, "e") { return walk.BoolValue(true), true } else { panic("Invalid literal") } case 'f': if accept(l, "a") && accept(l, "l") && accept(l, "s") && accept(l, "e") { return walk.BoolValue(false), true } else { panic("Invalid literal") } default: panic("Invalid literal") } } func charIsDigit(c rune) bool { return '0' <= c && c <= '9' } // Parse a positive integer, reads digits 0-9 and stops at the first non-digit func parseInt(l RuneReader) (output int) { for { char := l.Next() if charIsDigit(char) { output = output * 10 + int(char - '0') } else { break } } l.Rewind() return output } func parseNumberFilter(l RuneReader, minPower int) SubexASTNumberFilter { var lhs SubexASTNumberFilter r := l.Next() switch r { case eof: panic("Missing matching ]") case 'c': count := parseInt(l) lhs = SubexASTNumberFilterCount {count} case 'p': var subset NumberSubset if l.Next() == 'i' { subset = NumberSubsetPositiveInteger } else { subset = NumberSubsetPositiveReal l.Rewind() } lhs = SubexASTNumberFilterSubset { subset: subset, } default: if !isNumericRune(r) { panic("Invalid character in numeric []") } var builder strings.Builder builder.WriteRune(r) for { r := l.Next() if !isNumericRune(r) { l.Rewind() break } builder.WriteRune(r) } numberString := builder.String() number, err := strconv.ParseFloat(numberString, 64) if err != nil { panic("Invalid number literal") } lhs = SubexASTNumberFilterLiteral {number} } loop: for { r := l.Next() switch { case r == '+' && minPower <= 10: lhs = SubexASTNumberFilterAdd { lhs: lhs, rhs: parseNumberFilter(l, 11), } case r == '*' && minPower <= 20: lhs = SubexASTNumberFilterMultiply { lhs: lhs, rhs: parseNumberFilter(l, 21), } default: l.Rewind() break loop } } return lhs } // Having just read {, read in and parse the range contents func parseRepeatRange(l RuneReader) (output []ConvexRange) { loop: for { var start, end int char := l.Next() l.Rewind() if char == '-' { start = -1 } else { start = parseInt(l) } switch l.Next() { case ',': output = append(output, ConvexRange{start, start}) continue loop case '-': char := l.Next() if charIsDigit(char) { l.Rewind() end = parseInt(l) } else { l.Rewind() end = -1 } case '}': output = append(output, ConvexRange{start, start}) break loop default: panic("Invalid character in repeat specifier") } switch l.Next() { case ',': output = append(output, ConvexRange{start, end}) continue loop case '}': output = append(output, ConvexRange{start, end}) break loop default: panic("Invalid character in repeat specifier") } } return output } func parseValueReplacementOLD(l RuneReader, end rune) (output SubexAST) { output = SubexASTEmpty{} // TODO escaping // TODO add arrays, maps and strings loop: for { r := l.Next() switch r { case eof: panic("Missing closing `") case ' ': case end: break loop case '$': slot := l.Next() if slot == eof { panic("Missing slot character") } output = SubexASTConcat { First: output, Second: SubexASTOutputValueLoad { slot: slot, }, } // TODO: destructures case '#': if !accept(l, "(") { panic("Missing ( after #") } output = SubexASTConcat { First: output, Second: SubexASTDestructure { Destructure: NoneStructure, Structure: MapStructure, Content: parseValueReplacementOLD(l, ')'), }, } if !accept(l, "#") { panic("Missing # after )") } case '"': output = SubexASTConcat { First: output, Second: SubexASTDestructure { Destructure: NoneStructure, Structure: StringStructure, Content: parseRuneReplacement(l, '"'), }, } default: l.Rewind() scalar, ok := parseScalarLiteral(l) if !ok { panic("Invalid scalar literal") } output = SubexASTConcat { First: output, Second: SubexASTOutputValueLiteral { literal: scalar, }, } } } return output } func parseRuneReplacement(l RuneReader, end rune) (output SubexAST) { output = SubexASTEmpty{} // TODO escaping loop: for { r := l.Next() switch r { case eof: panic("Missing closing `") case end: break loop case '<': slot := l.Next() if slot == eof { panic("Missing slot character") } output = SubexASTConcat { First: output, Second: SubexASTOutputRuneLoad { slot: slot, }, } default: output = SubexASTConcat { First: output, Second: SubexASTOutputRuneLiteral { literal: r, }, } } } return output } func parseValueReplacement(l RuneReader, end rune, minPower int) SubexAST { // TODO: escaping probably var lhs SubexAST r := l.Next() switch r { case eof: panic("Missing closing `") case end: l.Rewind() return SubexASTEmpty{} case 'n': if !accept(l, "u") { panic("Expected null") } if !accept(l, "l") { panic("Expected null") } if !accept(l, "l") { panic("Expected null") } lhs = SubexASTOutputValueLiteral { literal: walk.NullValue{}, } // TODO: everything except numbers, strings, maps, and null case '"': lhs = SubexASTDestructure { Destructure: NoneStructure, Structure: StringStructure, Content: parseRuneReplacement(l, '"'), } case '#': if !accept(l, "(") { panic("Missing ( after #") } lhs = SubexASTDestructure { Destructure: NoneStructure, Structure: MapStructure, Content: parseValueReplacement(l, ')', 0), } if !accept(l, ")") { panic("Missing closing )") } if !accept(l, "#") { panic("Missing # after )") } case '<': slot := l.Next() if slot == eof { panic("Missing slot character") } lhs = SubexASTOutputValueLoad { slot: slot, } default: if !isNumericRune(r) { panic("Invalid character in numeric") } var builder strings.Builder builder.WriteRune(r) for { r := l.Next() if !isNumericRune(r) { l.Rewind() break } builder.WriteRune(r) } numberString := builder.String() number, err := strconv.ParseFloat(numberString, 64) if err != nil { panic("Invalid number literal") } lhs = SubexASTOutputValueLiteral { literal: walk.NumberValue(number), } } loop: for { r := l.Next() switch { case r == eof: panic("Missing closing `") case r == '+' && minPower <= 10: lhs = SubexASTBinop { op: binopAdd, lhs: lhs, rhs: parseValueReplacement(l, end, 11), } case r == '*' && minPower <= 20: lhs = SubexASTBinop { op: binopMultiply, lhs: lhs, rhs: parseValueReplacement(l, end, 21), } case r == '/' && minPower <= 20: lhs = SubexASTBinop { op: binopDivide, lhs: lhs, rhs: parseValueReplacement(l, end, 21), } case r == end: l.Rewind() break loop case minPower <= 2: l.Rewind() lhs = SubexASTConcat { First: lhs, Second: parseValueReplacement(l, end, 3), } default: l.Rewind() break loop } } return lhs } // Parse the contents of a range subex [] into a map // func parseRangeSubex(l RuneReader) map[walk.AtomOLD]walk.AtomOLD { // // TODO escaping // parts := make(map[walk.AtomOLD]walk.AtomOLD) // var froms []walk.AtomOLD // var hasTo bool // for { // fromsStart := l.Next() // if fromsStart == ']' { // hasTo = false // break // } else if fromsStart == '=' { // hasTo = true // break // } else if fromsStart == '`' { // literals := parseNonStringLiteral(l) // froms = append(froms, literals...) // continue // } else if fromsStart == '"' { // froms = append(froms, walk.NewAtomStringTerminal()) // continue // } // if accept(l, "-") { // fromsEnd := l.Next() // if fromsEnd == ']' || fromsEnd == '=' { // l.Rewind() // fromsEnd = fromsStart // } // for i := fromsStart; i <= fromsEnd; i += 1 { // froms = append(froms, walk.NewAtomStringRune(i)) // } // } else { // froms = append(froms, walk.NewAtomStringRune(fromsStart)) // } // } // if len(froms) == 0 { // panic("Missing from part of range expression") // } // var tos []walk.AtomOLD // if hasTo { // for { // tosStart := l.Next() // if tosStart == ']' { // break // } else if tosStart == '`' { // literals := parseNonStringLiteral(l) // tos = append(tos, literals...) // continue // } else if tosStart == '"' { // tos = append(tos, walk.NewAtomStringTerminal()) // continue // } // if accept(l, "-") { // tosEnd := l.Next() // if tosEnd == ']' { // l.Rewind() // tosEnd = tosStart // } // for i := tosStart; i <= tosEnd; i += 1 { // tos = append(tos, walk.NewAtomStringRune(i)) // } // } else { // tos = append(tos, walk.NewAtomStringRune(tosStart)) // } // } // } else { // tos = froms // } // if len(tos) == 0 { // panic("Missing to part of range expression") // } // for i, from := range froms { // parts[from] = tos[i % len(tos)] // } // return parts // } func parseDestructure(l RuneReader, destructure Structure, inType Type) (lhs SubexAST, outType Type) { var method rune switch l.Next() { case '(': method = ')' case '[': method = ']' default: panic("Missing ( or [ after destructure start") } var innerInType Type var expectedInType Type switch destructure { case NoneStructure: innerInType = inType expectedInType = inType case StringStructure: innerInType = RuneType expectedInType = ValueType case ArrayStructure: innerInType = ValueType expectedInType = ValueType case ArrayValuesStructure: innerInType = ValueType expectedInType = ValueType case MapStructure: innerInType = ValueType expectedInType = ValueType default: panic("Invalid structure") } resolveTypes(inType, expectedInType) lhs, innerOutType := parseSubex(l, 0, innerInType) if !accept(l, string(method)) { panic("Missing matching ) or ]") } switch method { case ')': case ']': lhs = SubexASTRepeat { Content: lhs, Acceptable: []ConvexRange{{ Start: -1, End: 0, }}, } default: panic("Invalid method") } var structure Structure var expectedInnerOutType Type r := l.Next() switch r { case '-': structure = NoneStructure expectedInnerOutType = innerOutType case '~': structure = StringStructure expectedInnerOutType = RuneType case '@': structure = ArrayStructure expectedInnerOutType = ValueType case ':': structure = ArrayValuesStructure expectedInnerOutType = ValueType case '#': structure = MapStructure expectedInnerOutType = ValueType default: panic("Missing matching destructure") } innerOutType = resolveTypes(innerOutType, expectedInnerOutType) switch structure { case NoneStructure: outType = innerOutType case StringStructure: outType = ValueType case ArrayStructure: outType = ValueType case ArrayValuesStructure: outType = ValueType case MapStructure: outType = ValueType } lhs = SubexASTDestructure { Destructure: destructure, Structure: structure, Content: lhs, } return lhs, outType } func parseSubex(l RuneReader, minPower int, inType Type) (lhs SubexAST, outType Type) { start: r := l.Next() switch r { case eof: return nil, inType case '(': lhs, outType = parseSubex(l, 0, inType) if !accept(l, ")") { panic("Missing matching )") } case '-': lhs, outType = parseDestructure(l, NoneStructure, inType) case '~': lhs, outType = parseDestructure(l, StringStructure, inType) case '@': lhs, outType = parseDestructure(l, ArrayStructure, inType) case ':': lhs, outType = parseDestructure(l, ArrayValuesStructure, inType) case '#': lhs, outType = parseDestructure(l, MapStructure, inType) case '"': switch inType { case ValueType: var innerOutType Type lhs, innerOutType = parseSubex(l, 0, RuneType) if !accept(l, "\"") { panic("Missing matching \"") } resolveTypes(innerOutType, RuneType) lhs = SubexASTDestructure { Destructure: StringStructure, Structure: StringStructure, Content: lhs, } outType = ValueType // RuneType default: l.Rewind() return SubexASTEmpty{}, inType } case '<': slot := l.Next() switch slot { case eof: panic("Missing slot") case '>': panic("Parsing error. Tried to parse <> as a subex with nothing before it") default: switch inType { case ValueType: lhs = SubexASTOutputValueLoad { slot: slot, } case RuneType: lhs = SubexASTOutputRuneLoad { slot: slot, } default: panic("Invalid inType") } } case '[': switch inType { case ValueType: lhs = SubexASTCopyNumberFilter { filter: parseNumberFilter(l, 0), } if !accept(l, "]") { panic("Missing matching ]") } default: // TODO: other types panic("[] is only valid for values currently") } case ')', ']', '|', '{', '+', '*': l.Rewind() return SubexASTEmpty{}, inType case '.': outType = inType if inType == RuneType { lhs = SubexASTCopyAnyRune{} } else { lhs = SubexASTCopyAnyValue{} } case ',': switch inType { case ValueType: outType = inType lhs = SubexASTCopyAnySimpleValue{} case RuneType: outType = inType lhs = SubexASTCopyRune{','} default: panic("Invalid inType") } case 'r': switch inType { case ValueType: outType = inType lhs = SubexASTCopyNumberFilter { filter: SubexASTNumberFilterSubset { subset: NumberSubsetReal, }, } case RuneType: outType = inType lhs = SubexASTCopyRune {'r'} default: panic("Invalid inType") } case '?': outType = inType lhs = SubexASTCopyBool{} case '`': outType = inType switch inType { case ValueType: lhs = parseValueReplacement(l, '`', 0) if !accept(l, "`") { panic("Missing closing `") } case RuneType: lhs = parseRuneReplacement(l, '`') default: panic("Invalid inType") } case ' ': switch inType { case RuneType: outType = RuneType lhs = SubexASTCopyRune {' '} case ValueType: goto start } default: outType = inType switch inType { case RuneType: lhs = SubexASTCopyRune {r} // ValueType, NumberType case ValueType: l.Rewind() scalar, ok := parseScalarLiteral(l) if !ok { panic("Invalid subex") } lhs = SubexASTCopyScalar {scalar} } } loop: for { r := l.Next() switch { case r == eof: break loop case r == '{' && minPower <= 10: lhs = SubexASTRepeat { Content: lhs, Acceptable: parseRepeatRange(l), } case r == '+' && minPower <= 10: lhs = SubexASTRepeat { Content: lhs, Acceptable: []ConvexRange {{ Start: -1, End: 1, }}, } case r == '*' && minPower <= 10: lhs = SubexASTRepeat { Content: lhs, Acceptable: []ConvexRange {{ Start: -1, End: 0, }}, } case r == '_' && minPower <= 10: switch inType { case ValueType: lhs = SubexASTDiscard { Content: lhs, InnerOutType: outType, } outType = AnyType case RuneType: // Just a concat lhs = SubexASTConcat { lhs, SubexASTCopyRune { rune: '_', }, } outType = AnyType default: panic("Invalid inType") } case r == '%' && minPower <= 10: slot := l.Next() switch slot { case eof: panic("Missing slot character") case '<', '>': panic("Invalid character after %") case '_': panic("Cannot load from _") default: switch inType { case ValueType: lhs = SubexASTConcat { First: SubexASTStoreValues { Match: lhs, Slot: slot, }, Second: SubexASTOutputValueLoad { slot: slot, }, } case RuneType: lhs = SubexASTConcat { First: SubexASTStoreRunes { Match: lhs, Slot: slot, }, Second: SubexASTOutputRuneLoad { slot: slot, }, } default: panic("Invalid inType") } } case r == '>' && minPower <= 10: slot := l.Next() switch slot { case eof: panic("Missing slot character") case '>': slot := l.Next() switch slot { case eof: panic("Missing slot character") case '_': lhs = SubexASTDiscard { Content: lhs, InnerOutType: outType, } outType = AnyType default: switch inType { case ValueType: lhs = SubexASTAppendStoreValues { Match: lhs, Slot: slot, } case RuneType: lhs = SubexASTAppendStoreRunes { Match: lhs, Slot: slot, } default: panic("Invalid inType") } outType = AnyType } case '<': slot := l.Next() switch slot { case eof: panic("Missing slot character") case '_': panic("Cannot load from _ slot") default: switch inType { case ValueType: lhs = SubexASTConcat { First: SubexASTStoreValues { Match: lhs, Slot: slot, }, Second: SubexASTOutputValueLoad { slot: slot, }, } case RuneType: lhs = SubexASTConcat { First: SubexASTStoreRunes { Match: lhs, Slot: slot, }, Second: SubexASTOutputRuneLoad { slot: slot, }, } default: panic("Invalid inType") } outType = inType } case '_': lhs = SubexASTDiscard { Content: lhs, InnerOutType: outType, } outType = AnyType default: switch inType { case ValueType: lhs = SubexASTStoreValues { Match: lhs, Slot: slot, } case RuneType: lhs = SubexASTStoreRunes { Match: lhs, Slot: slot, } default: panic("Invalid type") } outType = AnyType } case r == '<' && minPower <= 6: slot := l.Next() switch slot { case eof: panic("Missing slot character") case '_': panic("Cannot load from _ slot") case '>': slot := l.Next() switch slot { case eof: panic("Missing slot character") case '_': panic("Cannot load from _ slot") default: switch inType { case ValueType: lhs = SubexASTConcat { SubexASTOutputValueLoad { slot: slot, }, SubexASTStoreValues { Match: lhs, Slot: slot, }, } case RuneType: lhs = SubexASTConcat { SubexASTOutputRuneLoad { slot: slot, }, SubexASTStoreRunes { Match: lhs, Slot: slot, }, } default: panic("Invalid inType") } } default: // This is just a concat l.Rewind() l.RewindRune('<') next, outType2 := parseSubex(l, 7, inType) // TODO: next might legitimately be SubexASTEmpty, e.g. `` if next != nil && (next != SubexASTEmpty{}) { outType = resolveTypes(outType, outType2) lhs = SubexASTConcat{lhs, next} continue loop } } case r == '|' && minPower <= 2: rhs, outType2 := parseSubex(l, 3, inType) outType = resolveTypes(outType, outType2) if rhs == nil { panic("Missing subex after |") } lhs = SubexASTOr{lhs, rhs} case minPower <= 6: l.Rewind() next, outType2 := parseSubex(l, 7, inType) // TODO: next might legitimately be SubexASTEmpty, e.g. `` if next != nil && (next != SubexASTEmpty{}) { outType = resolveTypes(outType, outType2) lhs = SubexASTConcat{lhs, next} } else { break loop } default: l.Rewind() break loop } } return lhs, outType } func Parse(l RuneReader) SubexAST { ast, outType := parseSubex(l, 0, ValueType) outType = resolveTypes(outType, ValueType) if ast == nil { return SubexASTEmpty{} } return ast }