diff options
Diffstat (limited to 'subex/parse.go')
| -rw-r--r-- | subex/parse.go | 792 |
1 files changed, 647 insertions, 145 deletions
diff --git a/subex/parse.go b/subex/parse.go index b6bf2f6..01a747b 100644 --- a/subex/parse.go +++ b/subex/parse.go @@ -1,7 +1,6 @@ package subex import ( - "fmt" "main/walk" "strconv" "strings" @@ -55,9 +54,16 @@ func (s Structure) String() string { } } +type DestructureMethod int +const ( + Normal DestructureMethod = iota + Iterate +) + type RuneReader interface { Next() rune Rewind() + RewindRune(r rune) } func accept(l RuneReader, chars string) bool { @@ -116,7 +122,6 @@ func parseScalarLiteral(l RuneReader) (walk.Scalar, bool) { panic("Invalid literal") } default: - fmt.Printf("%c\n", r) panic("Invalid literal") } } @@ -139,6 +144,166 @@ func parseInt(l RuneReader) (output int) { return output } +// Parse a number literal in a number expression +func parseNumberLiteral(l RuneReader) NumberExprLiteral { + var builder strings.Builder + for { + r := l.Next() + if !isNumericRune(r) { + l.Rewind() + break + } + builder.WriteRune(r) + } + numberString := builder.String() + number, err := strconv.ParseFloat(numberString, 64) + if err != nil { + panic("Invalid number literal") + } + return NumberExprLiteral { + Value: number, + } +} + +// Parse a numeric expression +func parseNumberExpression(l RuneReader, minPower int) NumberExpr { + var lhs NumberExpr + switch l.Next() { + case '(': + lhs = parseNumberExpression(l, 0) + if !accept(l, ")") { + panic("Missing closing )") + } + case 'n': + lhs = NumberExprVariable{} + case '-': + lhs = NumberExprLiteral{0} + l.Rewind() + case '!': + lhs = NumberExprNot { + Right: parseNumberExpression(l, 13), + } + default: + l.Rewind() + lhs = parseNumberLiteral(l) + } + + loop: for { + r := l.Next() + switch { + case r == '|' && minPower <= 8: + lhs = NumberExprOr { + Left: lhs, + Right: parseNumberExpression(l, 9), + } + case r == '&' && minPower <= 10: + lhs = NumberExprAnd { + Left: lhs, + Right: parseNumberExpression(l, 11), + } + case r == '<' && minPower <= 20: + if accept(l, "=") { + lhs = NumberExprAtMost { + Left: lhs, + Right: parseNumberExpression(l, 21), + } + } else { + lhs = NumberExprLessThan { + Left: lhs, + Right: parseNumberExpression(l, 21), + } + } + case r == '>' && minPower <= 20: + if accept(l, "=") { + lhs = NumberExprAtLeast { + Left: lhs, + Right: parseNumberExpression(l, 21), + } + } else { + lhs = NumberExprGreaterThan { + Left: lhs, + Right: parseNumberExpression(l, 21), + } + } + case r == '=' && minPower <= 20: + lhs = NumberExprEqual { + Left: lhs, + Right: parseNumberExpression(l, 21), + } + case r == '~' && minPower <= 20: + lhs = NumberExprNot { + Right: NumberExprEqual { + Left: lhs, + Right: parseNumberExpression(l, 21), + }, + } + case r == '+' && minPower <= 30: + lhs = NumberExprAdd { + Left: lhs, + Right: parseNumberExpression(l, 31), + } + case r == '-' && minPower <= 30: + lhs = NumberExprSubtract { + Left: lhs, + Right: parseNumberExpression(l, 31), + } + case r == '*' && minPower <= 36: + lhs = NumberExprMultiply { + Left: lhs, + Right: parseNumberExpression(l, 37), + } + case r == '/' && minPower <= 36: + lhs = NumberExprDivide { + Left: lhs, + Right: parseNumberExpression(l, 37), + } + case r == '%' && minPower <= 36: + lhs = NumberExprMod { + Left: lhs, + Right: parseNumberExpression(l, 37), + } + case r == '^' && minPower <= 40: + lhs = NumberExprExponent { + Left: lhs, + Right: parseNumberExpression(l, 41), + } + default: + l.Rewind() + break loop + } + } + + return lhs +} + +// Having just read a [ in a value subex, parse the number mapping contents up +// to but not including the closing ] +func parseNumberMapping(l RuneReader) SubexAST { + numRange := parseNumberExpression(l, 0) + var numReplace []NumberExpr + if accept(l, ":") { + if !accept(l, "]") { + for { + numReplace = append( + numReplace, + parseNumberExpression(l, 0), + ) + if !accept(l, ",") { + break + } + } + } else { + l.Rewind() + } + } else { + numReplace = []NumberExpr{NumberExprVariable{}} + } + return SubexASTNumberMapping { + Range: numRange, + Replace: numReplace, + } +} + // Having just read {, read in and parse the range contents func parseRepeatRange(l RuneReader) (output []ConvexRange) { loop: for { @@ -183,7 +348,7 @@ func parseRepeatRange(l RuneReader) (output []ConvexRange) { return output } -func parseValueReplacement(l RuneReader, end rune) (output SubexAST) { +func parseValueReplacementOLD(l RuneReader, end rune) (output SubexAST) { output = SubexASTEmpty{} // TODO escaping // TODO add arrays, maps and strings @@ -216,7 +381,7 @@ func parseValueReplacement(l RuneReader, end rune) (output SubexAST) { Second: SubexASTDestructure { Destructure: NoneStructure, Structure: MapStructure, - Content: parseValueReplacement(l, ')'), + Content: parseValueReplacementOLD(l, ')'), }, } if !accept(l, "#") { @@ -258,7 +423,7 @@ func parseRuneReplacement(l RuneReader, end rune) (output SubexAST) { panic("Missing closing `") case end: break loop - case '$': + case '<': slot := l.Next() if slot == eof { panic("Missing slot character") @@ -281,6 +446,126 @@ func parseRuneReplacement(l RuneReader, end rune) (output SubexAST) { return output } +func parseValueReplacement(l RuneReader, end rune, minPower int) SubexAST { + // TODO: escaping probably + var lhs SubexAST + r := l.Next() + switch r { + case eof: + panic("Missing closing `") + case end: + l.Rewind() + return SubexASTEmpty{} + case 'n': + if !accept(l, "u") { + panic("Expected null") + } + if !accept(l, "l") { + panic("Expected null") + } + if !accept(l, "l") { + panic("Expected null") + } + lhs = SubexASTOutputValueLiteral { + literal: walk.NullValue{}, + } + // TODO: everything except numbers, strings, maps, and null + case '"': + lhs = SubexASTDestructure { + Destructure: NoneStructure, + Structure: StringStructure, + Content: parseRuneReplacement(l, '"'), + } + case '#': + if !accept(l, "(") { + panic("Missing ( after #") + } + lhs = SubexASTDestructure { + Destructure: NoneStructure, + Structure: MapStructure, + Content: parseValueReplacement(l, ')', 0), + } + if !accept(l, ")") { + panic("Missing closing )") + } + if !accept(l, "#") { + panic("Missing # after )") + } + case '<': + slot := l.Next() + if slot == eof { + panic("Missing slot character") + } + lhs = SubexASTOutputValueLoad { + slot: slot, + } + default: + if !isNumericRune(r) { + panic("Invalid character in numeric") + } + + var builder strings.Builder + builder.WriteRune(r) + for { + r := l.Next() + if !isNumericRune(r) { + l.Rewind() + break + } + builder.WriteRune(r) + } + numberString := builder.String() + number, err := strconv.ParseFloat(numberString, 64) + if err != nil { + panic("Invalid number literal") + } + + lhs = SubexASTOutputValueLiteral { + literal: walk.NumberValue(number), + } + } + + loop: for { + r := l.Next() + switch { + case r == eof: + panic("Missing closing `") + case r == '+' && minPower <= 10: + lhs = SubexASTBinop { + op: binopAdd, + lhs: lhs, + rhs: parseValueReplacement(l, end, 11), + } + case r == '*' && minPower <= 20: + lhs = SubexASTBinop { + op: binopMultiply, + lhs: lhs, + rhs: parseValueReplacement(l, end, 21), + } + case r == '/' && minPower <= 20: + lhs = SubexASTBinop { + op: binopDivide, + lhs: lhs, + rhs: parseValueReplacement(l, end, 21), + } + case r == end: + l.Rewind() + break loop + case minPower <= 2: + l.Rewind() + lhs = SubexASTConcat { + First: lhs, + Second: parseValueReplacement(l, end, 3), + } + default: + l.Rewind() + break loop + } + } + + return lhs +} + // Parse the contents of a range subex [] into a map // func parseRangeSubex(l RuneReader) map[walk.AtomOLD]walk.AtomOLD { // // TODO escaping @@ -361,8 +646,14 @@ func parseRuneReplacement(l RuneReader, end rune) (output SubexAST) { // } func parseDestructure(l RuneReader, destructure Structure, inType Type) (lhs SubexAST, outType Type) { - if !accept(l, "(") { - panic("Missing ( after destructure start") + var method rune + switch l.Next() { + case '(': + method = ')' + case '[': + method = ']' + default: + panic("Missing ( or [ after destructure start") } var innerInType Type @@ -390,8 +681,22 @@ func parseDestructure(l RuneReader, destructure Structure, inType Type) (lhs Sub resolveTypes(inType, expectedInType) lhs, innerOutType := parseSubex(l, 0, innerInType) - if !accept(l, ")") { - panic("Missing matching )") + if !accept(l, string(method)) { + panic("Missing matching ) or ]") + } + + switch method { + case ')': + case ']': + lhs = SubexASTRepeat { + Content: lhs, + Acceptable: []ConvexRange{{ + Start: -1, + End: 0, + }}, + } + default: + panic("Invalid method") } var structure Structure @@ -445,181 +750,378 @@ func parseSubex(l RuneReader, minPower int, inType Type) (lhs SubexAST, outType start: r := l.Next() switch r { - case eof: - return nil, inType - case '(': - lhs, outType = parseSubex(l, 0, inType) - if !accept(l, ")") { - panic("Missing matching )") - } - case '-': - lhs, outType = parseDestructure(l, NoneStructure, inType) - case '~': - lhs, outType = parseDestructure(l, StringStructure, inType) - case '@': - lhs, outType = parseDestructure(l, ArrayStructure, inType) - case ':': - lhs, outType = parseDestructure(l, ArrayValuesStructure, inType) - case '#': - lhs, outType = parseDestructure(l, MapStructure, inType) - case '"': - if inType == ValueType { - var innerOutType Type - lhs, innerOutType = parseSubex(l, 0, RuneType) - if !accept(l, "\"") { - panic("Missing matching \"") - } - resolveTypes(innerOutType, RuneType) - lhs = SubexASTDestructure { - Destructure: StringStructure, - Structure: StringStructure, - Content: lhs, - } - outType = ValueType - } else { - l.Rewind() - return SubexASTEmpty{}, inType + case eof: + return nil, inType + case '(': + lhs, outType = parseSubex(l, 0, inType) + if !accept(l, ")") { + panic("Missing matching )") + } + case '-': + lhs, outType = parseDestructure(l, NoneStructure, inType) + case '~': + lhs, outType = parseDestructure(l, StringStructure, inType) + case '@': + lhs, outType = parseDestructure(l, ArrayStructure, inType) + case ':': + lhs, outType = parseDestructure(l, ArrayValuesStructure, inType) + case '#': + lhs, outType = parseDestructure(l, MapStructure, inType) + case '"': + switch inType { + case ValueType: + var innerOutType Type + lhs, innerOutType = parseSubex(l, 0, RuneType) + if !accept(l, "\"") { + panic("Missing matching \"") } - // TODO - // case '[': - // rangeParts := parseRangeSubex(l) - // lhs = SubexASTRange {rangeParts} - case ')', ']', '|', ';', '{', '+', '*', '/', '!', '=', '$': + resolveTypes(innerOutType, RuneType) + lhs = SubexASTDestructure { + Destructure: StringStructure, + Structure: StringStructure, + Content: lhs, + } + outType = ValueType + // RuneType + default: l.Rewind() return SubexASTEmpty{}, inType - // case '=': - // replacement := parseReplacement(l) - // lhs = SubexASTOutput{replacement} - // case '^': - // replacement := parseReplacement(l) - // replacement = append( - // []OutputContentAST{OutputValueLiteralAST {walk.NewAtomStringTerminal()}}, - // replacement... - // ) - // replacement = append( - // replacement, - // OutputValueLiteralAST {walk.NewAtomStringTerminal()}, - // ) - // lhs = SubexASTOutput {replacement} - case '.': - outType = inType - if inType == RuneType { - lhs = SubexASTCopyAnyRune{} - } else { - lhs = SubexASTCopyAnyValue{} - } - case '?': - outType = inType - lhs = SubexASTCopyBool{} - case '%': - outType = inType - lhs = SubexASTCopyNumber{} - case '`': - outType = inType + } + case '<': + slot := l.Next() + switch slot { + case eof: + panic("Missing slot") + case '>': + panic("Parsing error. Tried to parse <> as a subex with nothing before it") + default: switch inType { case ValueType: - lhs = parseValueReplacement(l, '`') + lhs = SubexASTOutputValueLoad { + slot: slot, + } case RuneType: - lhs = parseRuneReplacement(l, '`') + lhs = SubexASTOutputRuneLoad { + slot: slot, + } default: panic("Invalid inType") } - case ' ': - if inType == RuneType { - outType = RuneType - lhs = SubexASTCopyRune {' '} - } else { - goto start + } + case '[': + switch inType { + case ValueType: + lhs = parseNumberMapping(l) + if !accept(l, "]") { + panic("Missing matching ]") } default: + // TODO: other types + panic("[] is only valid for values currently") + } + case ')', ']', '|', '{', '+', '*': + l.Rewind() + return SubexASTEmpty{}, inType + case '.': + outType = inType + if inType == RuneType { + lhs = SubexASTCopyAnyRune{} + } else { + lhs = SubexASTCopyAnyValue{} + } + case ',': + switch inType { + case ValueType: outType = inType - if inType == RuneType { - lhs = SubexASTCopyRune {r} - } else { - l.Rewind() - scalar, ok := parseScalarLiteral(l) - if !ok { - panic("Invalid subex") - } - lhs = SubexASTCopyScalar {scalar} + lhs = SubexASTCopyAnySimpleValue{} + case RuneType: + outType = inType + lhs = SubexASTCopyRune{','} + default: + panic("Invalid inType") + } + case '?': + outType = inType + lhs = SubexASTCopyBool{} + case '`': + outType = inType + switch inType { + case ValueType: + lhs = parseValueReplacement(l, '`', 0) + if !accept(l, "`") { + panic("Missing closing `") } - } - loop: for { - if minPower <= 20 { - next, outType2 := parseSubex(l, 21, inType) - // TODO: next might legitimately be SubexASTEmpty, e.g. `` - if next != nil && (next != SubexASTEmpty{}) { - outType = resolveTypes(outType, outType2) - lhs = SubexASTConcat{lhs, next} - continue loop + case RuneType: + lhs = parseRuneReplacement(l, '`') + default: + panic("Invalid inType") + } + case ' ': + switch inType { + case RuneType: + outType = RuneType + lhs = SubexASTCopyRune {' '} + case ValueType: + goto start + } + default: + outType = inType + switch inType { + case RuneType: + lhs = SubexASTCopyRune {r} + // ValueType, NumberType + case ValueType: + l.Rewind() + scalar, ok := parseScalarLiteral(l) + if !ok { + panic("Invalid subex") } + lhs = SubexASTCopyScalar {scalar} } + } + loop: for { r := l.Next() switch { - case r == '{' && minPower <= 4: - lhs = SubexASTRepeat { + case r == eof: + break loop + case r == '{' && minPower <= 10: + lhs = SubexASTRepeat { + Content: lhs, + Acceptable: parseRepeatRange(l), + } + case r == '+' && minPower <= 10: + lhs = SubexASTRepeat { + Content: lhs, + Acceptable: []ConvexRange {{ + Start: -1, + End: 1, + }}, + } + case r == '*' && minPower <= 10: + lhs = SubexASTRepeat { + Content: lhs, + Acceptable: []ConvexRange {{ + Start: -1, + End: 0, + }}, + } + case r == '_' && minPower <= 10: + switch inType { + case ValueType: + lhs = SubexASTDiscard { Content: lhs, - Acceptable: parseRepeatRange(l), + InnerOutType: outType, } - case r == '+' && minPower <= 4: - lhs = SubexASTSum {lhs} - resolveTypes(inType, ValueType) - outType = resolveTypes(outType, ValueType) - case r == '*' && minPower <= 4: - lhs = SubexASTProduct {lhs} - resolveTypes(inType, ValueType) - outType = resolveTypes(outType, ValueType) - // case r == '/' && minPower <= 4: - // lhs = SubexASTReciprocal {lhs} - case r == '!' && minPower <= 4: - lhs = SubexASTNot {lhs} - resolveTypes(inType, ValueType) - outType = resolveTypes(outType, ValueType) - // case r == '=' && minPower <= 4: - // lhs = SubexASTEqual {lhs} - case r == '$' && minPower <= 4: + outType = AnyType + case RuneType: + // Just a concat + lhs = SubexASTConcat { + lhs, + SubexASTCopyRune { + rune: '_', + }, + } + outType = AnyType + default: + panic("Invalid inType") + } + case r == '%' && minPower <= 10: + slot := l.Next() + switch slot { + case eof: + panic("Missing slot character") + case '<', '>': + panic("Invalid character after %") + case '_': + panic("Cannot load from _") + default: + switch inType { + case ValueType: + lhs = SubexASTConcat { + First: SubexASTStoreValues { + Match: lhs, + Slot: slot, + }, + Second: SubexASTOutputValueLoad { + slot: slot, + }, + } + case RuneType: + lhs = SubexASTConcat { + First: SubexASTStoreRunes { + Match: lhs, + Slot: slot, + }, + Second: SubexASTOutputRuneLoad { + slot: slot, + }, + } + default: + panic("Invalid inType") + } + } + case r == '>' && minPower <= 10: + slot := l.Next() + switch slot { + case eof: + panic("Missing slot character") + case '>': slot := l.Next() - if slot == eof { + switch slot { + case eof: panic("Missing slot character") - } - if slot == '_' { + case '_': lhs = SubexASTDiscard { Content: lhs, InnerOutType: outType, } - } else { - if inType == ValueType { - lhs = SubexASTStoreValues { + outType = AnyType + default: + switch inType { + case ValueType: + lhs = SubexASTAppendStoreValues { Match: lhs, Slot: slot, } - } else { - lhs = SubexASTStoreRunes { + case RuneType: + lhs = SubexASTAppendStoreRunes { Match: lhs, Slot: slot, } + default: + panic("Invalid inType") } + outType = AnyType + } + case '<': + slot := l.Next() + switch slot { + case eof: + panic("Missing slot character") + case '_': + panic("Cannot load from _ slot") + default: + switch inType { + case ValueType: + lhs = SubexASTConcat { + First: SubexASTStoreValues { + Match: lhs, + Slot: slot, + }, + Second: SubexASTOutputValueLoad { + slot: slot, + }, + } + case RuneType: + lhs = SubexASTConcat { + First: SubexASTStoreRunes { + Match: lhs, + Slot: slot, + }, + Second: SubexASTOutputRuneLoad { + slot: slot, + }, + } + default: + panic("Invalid inType") + } + outType = inType + } + case '_': + lhs = SubexASTDiscard { + Content: lhs, + InnerOutType: outType, } outType = AnyType - case r == '|' && minPower <= 8: - rhs, outType2 := parseSubex(l, 9, inType) - outType = resolveTypes(outType, outType2) - if rhs == nil { - panic("Missing subex after |") + default: + switch inType { + case ValueType: + lhs = SubexASTStoreValues { + Match: lhs, + Slot: slot, + } + case RuneType: + lhs = SubexASTStoreRunes { + Match: lhs, + Slot: slot, + } + default: + panic("Invalid type") } - lhs = SubexASTOr{lhs, rhs} - /*case r == ';' && minPower <= 10: - rhs := parseSubex(l, 11, inType, outType) - if rhs == nil { - panic("Missing subex after ;") + outType = AnyType + } + case r == '<' && minPower <= 6: + slot := l.Next() + switch slot { + case eof: + panic("Missing slot character") + case '_': + panic("Cannot load from _ slot") + case '>': + slot := l.Next() + switch slot { + case eof: + panic("Missing slot character") + case '_': + panic("Cannot load from _ slot") + default: + switch inType { + case ValueType: + lhs = SubexASTConcat { + SubexASTOutputValueLoad { + slot: slot, + }, + SubexASTStoreValues { + Match: lhs, + Slot: slot, + }, + } + case RuneType: + lhs = SubexASTConcat { + SubexASTOutputRuneLoad { + slot: slot, + }, + SubexASTStoreRunes { + Match: lhs, + Slot: slot, + }, + } + default: + panic("Invalid inType") + } } - lhs = SubexASTJoin { - Content: lhs, - Delimiter: rhs, - }*/ default: + // This is just a concat l.Rewind() + l.RewindRune('<') + next, outType2 := parseSubex(l, 7, inType) + // TODO: next might legitimately be SubexASTEmpty, e.g. `` + if next != nil && (next != SubexASTEmpty{}) { + outType = resolveTypes(outType, outType2) + lhs = SubexASTConcat{lhs, next} + continue loop + } + } + case r == '|' && minPower <= 2: + rhs, outType2 := parseSubex(l, 3, inType) + outType = resolveTypes(outType, outType2) + if rhs == nil { + panic("Missing subex after |") + } + lhs = SubexASTOr{lhs, rhs} + case minPower <= 6: + l.Rewind() + next, outType2 := parseSubex(l, 7, inType) + // TODO: next might legitimately be SubexASTEmpty, e.g. `` + if next != nil && (next != SubexASTEmpty{}) { + outType = resolveTypes(outType, outType2) + lhs = SubexASTConcat{lhs, next} + } else { break loop + } + default: + l.Rewind() + break loop } } return lhs, outType |
