From 62aa738be03845f96c40edde087ea39693b27e4e Mon Sep 17 00:00:00 2001 From: Charlie Stanton Date: Sun, 15 Dec 2024 17:54:45 +0000 Subject: Implement new number system --- subex/parse.go | 661 ++++++++++++++++++++++++++++++++++++++++++++++----------- 1 file changed, 538 insertions(+), 123 deletions(-) (limited to 'subex/parse.go') diff --git a/subex/parse.go b/subex/parse.go index e91008a..179cc01 100644 --- a/subex/parse.go +++ b/subex/parse.go @@ -1,7 +1,6 @@ package subex import ( - "fmt" "main/walk" "strconv" "strings" @@ -64,6 +63,7 @@ const ( type RuneReader interface { Next() rune Rewind() + RewindRune(r rune) } func accept(l RuneReader, chars string) bool { @@ -122,7 +122,6 @@ func parseScalarLiteral(l RuneReader) (walk.Scalar, bool) { panic("Invalid literal") } default: - fmt.Printf("%c\n", r) panic("Invalid literal") } } @@ -145,6 +144,72 @@ func parseInt(l RuneReader) (output int) { return output } +func parseNumberFilter(l RuneReader, minPower int) SubexASTNumberFilter { + var lhs SubexASTNumberFilter + r := l.Next() + switch r { + case eof: + panic("Missing matching ]") + case 'c': + count := parseInt(l) + lhs = SubexASTNumberFilterCount {count} + case 'p': + var subset NumberSubset + if l.Next() == 'i' { + subset = NumberSubsetPositiveInteger + } else { + subset = NumberSubsetPositiveReal + l.Rewind() + } + lhs = SubexASTNumberFilterSubset { + subset: subset, + } + default: + if !isNumericRune(r) { + panic("Invalid character in numeric []") + } + + var builder strings.Builder + builder.WriteRune(r) + for { + r := l.Next() + if !isNumericRune(r) { + l.Rewind() + break + } + builder.WriteRune(r) + } + numberString := builder.String() + number, err := strconv.ParseFloat(numberString, 64) + if err != nil { + panic("Invalid number literal") + } + + lhs = SubexASTNumberFilterLiteral {number} + } + + loop: for { + r := l.Next() + switch { + case r == '+' && minPower <= 10: + lhs = SubexASTNumberFilterAdd { + lhs: lhs, + rhs: parseNumberFilter(l, 11), + } + case r == '*' && minPower <= 20: + lhs = SubexASTNumberFilterMultiply { + lhs: lhs, + rhs: parseNumberFilter(l, 21), + } + default: + l.Rewind() + break loop + } + } + + return lhs +} + // Having just read {, read in and parse the range contents func parseRepeatRange(l RuneReader) (output []ConvexRange) { loop: for { @@ -189,7 +254,7 @@ func parseRepeatRange(l RuneReader) (output []ConvexRange) { return output } -func parseValueReplacement(l RuneReader, end rune) (output SubexAST) { +func parseValueReplacementOLD(l RuneReader, end rune) (output SubexAST) { output = SubexASTEmpty{} // TODO escaping // TODO add arrays, maps and strings @@ -222,7 +287,7 @@ func parseValueReplacement(l RuneReader, end rune) (output SubexAST) { Second: SubexASTDestructure { Destructure: NoneStructure, Structure: MapStructure, - Content: parseValueReplacement(l, ')'), + Content: parseValueReplacementOLD(l, ')'), }, } if !accept(l, "#") { @@ -264,7 +329,7 @@ func parseRuneReplacement(l RuneReader, end rune) (output SubexAST) { panic("Missing closing `") case end: break loop - case '$': + case '<': slot := l.Next() if slot == eof { panic("Missing slot character") @@ -287,6 +352,126 @@ func parseRuneReplacement(l RuneReader, end rune) (output SubexAST) { return output } +func parseValueReplacement(l RuneReader, end rune, minPower int) SubexAST { + // TODO: escaping probably + var lhs SubexAST + r := l.Next() + switch r { + case eof: + panic("Missing closing `") + case end: + l.Rewind() + return SubexASTEmpty{} + case 'n': + if !accept(l, "u") { + panic("Expected null") + } + if !accept(l, "l") { + panic("Expected null") + } + if !accept(l, "l") { + panic("Expected null") + } + lhs = SubexASTOutputValueLiteral { + literal: walk.NullValue{}, + } + // TODO: everything except numbers, strings, maps, and null + case '"': + lhs = SubexASTDestructure { + Destructure: NoneStructure, + Structure: StringStructure, + Content: parseRuneReplacement(l, '"'), + } + case '#': + if !accept(l, "(") { + panic("Missing ( after #") + } + lhs = SubexASTDestructure { + Destructure: NoneStructure, + Structure: MapStructure, + Content: parseValueReplacement(l, ')', 0), + } + if !accept(l, ")") { + panic("Missing closing )") + } + if !accept(l, "#") { + panic("Missing # after )") + } + case '<': + slot := l.Next() + if slot == eof { + panic("Missing slot character") + } + lhs = SubexASTOutputValueLoad { + slot: slot, + } + default: + if !isNumericRune(r) { + panic("Invalid character in numeric") + } + + var builder strings.Builder + builder.WriteRune(r) + for { + r := l.Next() + if !isNumericRune(r) { + l.Rewind() + break + } + builder.WriteRune(r) + } + numberString := builder.String() + number, err := strconv.ParseFloat(numberString, 64) + if err != nil { + panic("Invalid number literal") + } + + lhs = SubexASTOutputValueLiteral { + literal: walk.NumberValue(number), + } + } + + loop: for { + r := l.Next() + switch { + case r == eof: + panic("Missing closing `") + case r == '+' && minPower <= 10: + lhs = SubexASTBinop { + op: binopAdd, + lhs: lhs, + rhs: parseValueReplacement(l, end, 11), + } + case r == '*' && minPower <= 20: + lhs = SubexASTBinop { + op: binopMultiply, + lhs: lhs, + rhs: parseValueReplacement(l, end, 21), + } + case r == '/' && minPower <= 20: + lhs = SubexASTBinop { + op: binopDivide, + lhs: lhs, + rhs: parseValueReplacement(l, end, 21), + } + case r == end: + l.Rewind() + break loop + case minPower <= 2: + l.Rewind() + lhs = SubexASTConcat { + First: lhs, + Second: parseValueReplacement(l, end, 3), + } + default: + l.Rewind() + break loop + } + } + + return lhs +} + // Parse the contents of a range subex [] into a map // func parseRangeSubex(l RuneReader) map[walk.AtomOLD]walk.AtomOLD { // // TODO escaping @@ -471,165 +656,395 @@ func parseSubex(l RuneReader, minPower int, inType Type) (lhs SubexAST, outType start: r := l.Next() switch r { - case eof: - return nil, inType - case '(': - lhs, outType = parseSubex(l, 0, inType) - if !accept(l, ")") { - panic("Missing matching )") - } - case '-': - lhs, outType = parseDestructure(l, NoneStructure, inType) - case '~': - lhs, outType = parseDestructure(l, StringStructure, inType) - case '@': - lhs, outType = parseDestructure(l, ArrayStructure, inType) - case ':': - lhs, outType = parseDestructure(l, ArrayValuesStructure, inType) - case '#': - lhs, outType = parseDestructure(l, MapStructure, inType) - case '"': - if inType == ValueType { - var innerOutType Type - lhs, innerOutType = parseSubex(l, 0, RuneType) - if !accept(l, "\"") { - panic("Missing matching \"") - } - resolveTypes(innerOutType, RuneType) - lhs = SubexASTDestructure { - Destructure: StringStructure, - Structure: StringStructure, - Content: lhs, - } - outType = ValueType - } else { - l.Rewind() - return SubexASTEmpty{}, inType + case eof: + return nil, inType + case '(': + lhs, outType = parseSubex(l, 0, inType) + if !accept(l, ")") { + panic("Missing matching )") + } + case '-': + lhs, outType = parseDestructure(l, NoneStructure, inType) + case '~': + lhs, outType = parseDestructure(l, StringStructure, inType) + case '@': + lhs, outType = parseDestructure(l, ArrayStructure, inType) + case ':': + lhs, outType = parseDestructure(l, ArrayValuesStructure, inType) + case '#': + lhs, outType = parseDestructure(l, MapStructure, inType) + case '"': + switch inType { + case ValueType: + var innerOutType Type + lhs, innerOutType = parseSubex(l, 0, RuneType) + if !accept(l, "\"") { + panic("Missing matching \"") + } + resolveTypes(innerOutType, RuneType) + lhs = SubexASTDestructure { + Destructure: StringStructure, + Structure: StringStructure, + Content: lhs, } - // TODO - // case '[': - // rangeParts := parseRangeSubex(l) - // lhs = SubexASTRange {rangeParts} - case ')', ']', '|', ';', '{', '+', '*', '/', '!', '=', '$': + outType = ValueType + // RuneType + default: l.Rewind() return SubexASTEmpty{}, inType - case '.': - outType = inType - if inType == RuneType { - lhs = SubexASTCopyAnyRune{} - } else { - lhs = SubexASTCopyAnyValue{} - } - case ',': + } + case '<': + slot := l.Next() + switch slot { + case eof: + panic("Missing slot") + case '>': + panic("Parsing error. Tried to parse <> as a subex with nothing before it") + default: switch inType { case ValueType: - outType = inType - lhs = SubexASTCopyAnySimpleValue{} + lhs = SubexASTOutputValueLoad { + slot: slot, + } case RuneType: - outType = inType - lhs = SubexASTCopyRune{','} + lhs = SubexASTOutputRuneLoad { + slot: slot, + } default: panic("Invalid inType") } - case '?': + } + case '[': + switch inType { + case ValueType: + lhs = SubexASTCopyNumberFilter { + filter: parseNumberFilter(l, 0), + } + if !accept(l, "]") { + panic("Missing matching ]") + } + default: + // TODO: other types + panic("[] is only valid for values currently") + } + case ')', ']', '|', '{', '+', '*': + l.Rewind() + return SubexASTEmpty{}, inType + case '.': + outType = inType + if inType == RuneType { + lhs = SubexASTCopyAnyRune{} + } else { + lhs = SubexASTCopyAnyValue{} + } + case ',': + switch inType { + case ValueType: outType = inType - lhs = SubexASTCopyBool{} - case '%': + lhs = SubexASTCopyAnySimpleValue{} + case RuneType: outType = inType - lhs = SubexASTCopyNumber{} - case '`': + lhs = SubexASTCopyRune{','} + default: + panic("Invalid inType") + } + case 'r': + switch inType { + case ValueType: outType = inType - switch inType { - case ValueType: - lhs = parseValueReplacement(l, '`') - case RuneType: - lhs = parseRuneReplacement(l, '`') - default: - panic("Invalid inType") + lhs = SubexASTCopyNumberFilter { + filter: SubexASTNumberFilterSubset { + subset: NumberSubsetReal, + }, } - case ' ': - if inType == RuneType { - outType = RuneType - lhs = SubexASTCopyRune {' '} - } else { - goto start + case RuneType: + outType = inType + lhs = SubexASTCopyRune {'r'} + default: + panic("Invalid inType") + } + case '?': + outType = inType + lhs = SubexASTCopyBool{} + case '`': + outType = inType + switch inType { + case ValueType: + lhs = parseValueReplacement(l, '`', 0) + if !accept(l, "`") { + panic("Missing closing `") } + case RuneType: + lhs = parseRuneReplacement(l, '`') default: - outType = inType - if inType == RuneType { - lhs = SubexASTCopyRune {r} - } else { - l.Rewind() - scalar, ok := parseScalarLiteral(l) - if !ok { - panic("Invalid subex") - } - lhs = SubexASTCopyScalar {scalar} + panic("Invalid inType") + } + case ' ': + switch inType { + case RuneType: + outType = RuneType + lhs = SubexASTCopyRune {' '} + case ValueType: + goto start + } + default: + outType = inType + switch inType { + case RuneType: + lhs = SubexASTCopyRune {r} + // ValueType, NumberType + case ValueType: + l.Rewind() + scalar, ok := parseScalarLiteral(l) + if !ok { + panic("Invalid subex") } + lhs = SubexASTCopyScalar {scalar} + } } loop: for { - if minPower <= 20 { - next, outType2 := parseSubex(l, 21, inType) - // TODO: next might legitimately be SubexASTEmpty, e.g. `` - if next != nil && (next != SubexASTEmpty{}) { - outType = resolveTypes(outType, outType2) - lhs = SubexASTConcat{lhs, next} - continue loop - } - } r := l.Next() switch { - case r == '{' && minPower <= 4: - lhs = SubexASTRepeat { + case r == eof: + break loop + case r == '{' && minPower <= 10: + lhs = SubexASTRepeat { + Content: lhs, + Acceptable: parseRepeatRange(l), + } + case r == '+' && minPower <= 10: + lhs = SubexASTRepeat { + Content: lhs, + Acceptable: []ConvexRange {{ + Start: -1, + End: 1, + }}, + } + case r == '*' && minPower <= 10: + lhs = SubexASTRepeat { + Content: lhs, + Acceptable: []ConvexRange {{ + Start: -1, + End: 0, + }}, + } + case r == '_' && minPower <= 10: + switch inType { + case ValueType: + lhs = SubexASTDiscard { Content: lhs, - Acceptable: parseRepeatRange(l), + InnerOutType: outType, } - case r == '+' && minPower <= 4: - lhs = SubexASTSum {lhs} - resolveTypes(inType, ValueType) - outType = resolveTypes(outType, ValueType) - case r == '*' && minPower <= 4: - lhs = SubexASTProduct {lhs} - resolveTypes(inType, ValueType) - outType = resolveTypes(outType, ValueType) - case r == '!' && minPower <= 4: - lhs = SubexASTNot {lhs} - resolveTypes(inType, ValueType) - outType = resolveTypes(outType, ValueType) - case r == '$' && minPower <= 4: + outType = AnyType + case RuneType: + // Just a concat + lhs = SubexASTConcat { + lhs, + SubexASTCopyRune { + rune: '_', + }, + } + outType = AnyType + default: + panic("Invalid inType") + } + case r == '%' && minPower <= 10: + slot := l.Next() + switch slot { + case eof: + panic("Missing slot character") + case '<', '>': + panic("Invalid character after %") + case '_': + panic("Cannot load from _") + default: + switch inType { + case ValueType: + lhs = SubexASTConcat { + First: SubexASTStoreValues { + Match: lhs, + Slot: slot, + }, + Second: SubexASTOutputValueLoad { + slot: slot, + }, + } + case RuneType: + lhs = SubexASTConcat { + First: SubexASTStoreRunes { + Match: lhs, + Slot: slot, + }, + Second: SubexASTOutputRuneLoad { + slot: slot, + }, + } + default: + panic("Invalid inType") + } + } + case r == '>' && minPower <= 10: + slot := l.Next() + switch slot { + case eof: + panic("Missing slot character") + case '>': slot := l.Next() - if slot == eof { + switch slot { + case eof: panic("Missing slot character") - } - if slot == '_' { + case '_': lhs = SubexASTDiscard { Content: lhs, InnerOutType: outType, } - } else { - if inType == ValueType { - lhs = SubexASTStoreValues { + outType = AnyType + default: + switch inType { + case ValueType: + lhs = SubexASTAppendStoreValues { Match: lhs, Slot: slot, } - } else { - lhs = SubexASTStoreRunes { + case RuneType: + lhs = SubexASTAppendStoreRunes { Match: lhs, Slot: slot, } + default: + panic("Invalid inType") } + outType = AnyType + } + case '<': + slot := l.Next() + switch slot { + case eof: + panic("Missing slot character") + case '_': + panic("Cannot load from _ slot") + default: + switch inType { + case ValueType: + lhs = SubexASTConcat { + First: SubexASTStoreValues { + Match: lhs, + Slot: slot, + }, + Second: SubexASTOutputValueLoad { + slot: slot, + }, + } + case RuneType: + lhs = SubexASTConcat { + First: SubexASTStoreRunes { + Match: lhs, + Slot: slot, + }, + Second: SubexASTOutputRuneLoad { + slot: slot, + }, + } + default: + panic("Invalid inType") + } + outType = inType + } + case '_': + lhs = SubexASTDiscard { + Content: lhs, + InnerOutType: outType, } outType = AnyType - case r == '|' && minPower <= 8: - rhs, outType2 := parseSubex(l, 9, inType) - outType = resolveTypes(outType, outType2) - if rhs == nil { - panic("Missing subex after |") + default: + switch inType { + case ValueType: + lhs = SubexASTStoreValues { + Match: lhs, + Slot: slot, + } + case RuneType: + lhs = SubexASTStoreRunes { + Match: lhs, + Slot: slot, + } + default: + panic("Invalid type") + } + outType = AnyType + } + case r == '<' && minPower <= 6: + slot := l.Next() + switch slot { + case eof: + panic("Missing slot character") + case '_': + panic("Cannot load from _ slot") + case '>': + slot := l.Next() + switch slot { + case eof: + panic("Missing slot character") + case '_': + panic("Cannot load from _ slot") + default: + switch inType { + case ValueType: + lhs = SubexASTConcat { + SubexASTOutputValueLoad { + slot: slot, + }, + SubexASTStoreValues { + Match: lhs, + Slot: slot, + }, + } + case RuneType: + lhs = SubexASTConcat { + SubexASTOutputRuneLoad { + slot: slot, + }, + SubexASTStoreRunes { + Match: lhs, + Slot: slot, + }, + } + default: + panic("Invalid inType") + } } - lhs = SubexASTOr{lhs, rhs} default: + // This is just a concat l.Rewind() + l.RewindRune('<') + next, outType2 := parseSubex(l, 7, inType) + // TODO: next might legitimately be SubexASTEmpty, e.g. `` + if next != nil && (next != SubexASTEmpty{}) { + outType = resolveTypes(outType, outType2) + lhs = SubexASTConcat{lhs, next} + continue loop + } + } + case r == '|' && minPower <= 2: + rhs, outType2 := parseSubex(l, 3, inType) + outType = resolveTypes(outType, outType2) + if rhs == nil { + panic("Missing subex after |") + } + lhs = SubexASTOr{lhs, rhs} + case minPower <= 6: + l.Rewind() + next, outType2 := parseSubex(l, 7, inType) + // TODO: next might legitimately be SubexASTEmpty, e.g. `` + if next != nil && (next != SubexASTEmpty{}) { + outType = resolveTypes(outType, outType2) + lhs = SubexASTConcat{lhs, next} + } else { break loop + } + default: + l.Rewind() + break loop } } return lhs, outType -- cgit v1.2.3