diff options
Diffstat (limited to 'subex/parse.go')
-rw-r--r-- | subex/parse.go | 454 |
1 files changed, 343 insertions, 111 deletions
diff --git a/subex/parse.go b/subex/parse.go index 35baaa2..e91008a 100644 --- a/subex/parse.go +++ b/subex/parse.go @@ -1,11 +1,66 @@ package subex import ( + "fmt" "main/walk" "strconv" "strings" ) +type Type int +const ( + AnyType Type = iota + ValueType + RuneType +) + +func resolveTypes(t1 Type, t2 Type) Type { + if t1 == AnyType { + return t2 + } + + if t2 == AnyType { + return t1 + } + + if t1 == t2 { + return t1 + } + + panic("Types don't match in parser") +} + +type Structure int +const ( + NoneStructure Structure = iota + StringStructure + ArrayStructure + ArrayValuesStructure + MapStructure +) +func (s Structure) String() string { + switch s { + case NoneStructure: + return "-" + case StringStructure: + return "~" + case ArrayStructure: + return "@" + case ArrayValuesStructure: + return ":" + case MapStructure: + return "#" + default: + panic("Invalid structure") + } +} + +type DestructureMethod int +const ( + Normal DestructureMethod = iota + Iterate +) + type RuneReader interface { Next() rune Rewind() @@ -45,28 +100,29 @@ func parseScalarLiteral(l RuneReader) (walk.Scalar, bool) { if err != nil { panic("Invalid number literal") } - return walk.NumberScalar(number), true + return walk.NumberValue(number), true } switch r { case 'n': if accept(l, "u") && accept(l, "l") && accept(l, "l") { - return walk.NullScalar{}, true + return walk.NullValue{}, true } else { panic("Invalid literal") } case 't': if accept(l, "r") && accept(l, "u") && accept(l, "e") { - return walk.BoolScalar(true), true + return walk.BoolValue(true), true } else { panic("Invalid literal") } case 'f': if accept(l, "a") && accept(l, "l") && accept(l, "s") && accept(l, "e") { - return walk.BoolScalar(false), true + return walk.BoolValue(false), true } else { panic("Invalid literal") } default: + fmt.Printf("%c\n", r) panic("Invalid literal") } } @@ -133,34 +189,99 @@ func parseRepeatRange(l RuneReader) (output []ConvexRange) { return output } -// TODO: Consider if it's worth making better use of the go type system to enforce output being all runes or all values -func parseReplacement(l RuneReader, runic bool) (output []OutputContentAST) { +func parseValueReplacement(l RuneReader, end rune) (output SubexAST) { + output = SubexASTEmpty{} // TODO escaping // TODO add arrays, maps and strings loop: for { r := l.Next() switch r { - case eof: - panic("Missing closing `") - case '`': - break loop - case '$': - slot := l.Next() - if slot == eof { - panic("Missing slot character") - } - output = append(output, OutputLoadAST{slot: slot}) - default: - if runic { - output = append(output, OutputRuneLiteralAST {walk.StringRuneAtom(r)}) - } else { - l.Rewind() - scalar, ok := parseScalarLiteral(l) - if !ok { - panic("Invalid scalar literal") - } - output = append(output, OutputValueLiteralAST {scalar}) - } + case eof: + panic("Missing closing `") + case ' ': + case end: + break loop + case '$': + slot := l.Next() + if slot == eof { + panic("Missing slot character") + } + output = SubexASTConcat { + First: output, + Second: SubexASTOutputValueLoad { + slot: slot, + }, + } + // TODO: destructures + case '#': + if !accept(l, "(") { + panic("Missing ( after #") + } + output = SubexASTConcat { + First: output, + Second: SubexASTDestructure { + Destructure: NoneStructure, + Structure: MapStructure, + Content: parseValueReplacement(l, ')'), + }, + } + if !accept(l, "#") { + panic("Missing # after )") + } + case '"': + output = SubexASTConcat { + First: output, + Second: SubexASTDestructure { + Destructure: NoneStructure, + Structure: StringStructure, + Content: parseRuneReplacement(l, '"'), + }, + } + default: + l.Rewind() + scalar, ok := parseScalarLiteral(l) + if !ok { + panic("Invalid scalar literal") + } + output = SubexASTConcat { + First: output, + Second: SubexASTOutputValueLiteral { + literal: scalar, + }, + } + } + } + return output +} + +func parseRuneReplacement(l RuneReader, end rune) (output SubexAST) { + output = SubexASTEmpty{} + // TODO escaping + loop: for { + r := l.Next() + switch r { + case eof: + panic("Missing closing `") + case end: + break loop + case '$': + slot := l.Next() + if slot == eof { + panic("Missing slot character") + } + output = SubexASTConcat { + First: output, + Second: SubexASTOutputRuneLoad { + slot: slot, + }, + } + default: + output = SubexASTConcat { + First: output, + Second: SubexASTOutputRuneLiteral { + literal: r, + }, + } } } return output @@ -245,92 +366,197 @@ func parseReplacement(l RuneReader, runic bool) (output []OutputContentAST) { // return parts // } -func parseSubex(l RuneReader, minPower int, runic bool) SubexAST { - var lhs SubexAST +func parseDestructure(l RuneReader, destructure Structure, inType Type) (lhs SubexAST, outType Type) { + var method rune + switch l.Next() { + case '(': + method = ')' + case '[': + method = ']' + default: + panic("Missing ( or [ after destructure start") + } + + var innerInType Type + var expectedInType Type + switch destructure { + case NoneStructure: + innerInType = inType + expectedInType = inType + case StringStructure: + innerInType = RuneType + expectedInType = ValueType + case ArrayStructure: + innerInType = ValueType + expectedInType = ValueType + case ArrayValuesStructure: + innerInType = ValueType + expectedInType = ValueType + case MapStructure: + innerInType = ValueType + expectedInType = ValueType + default: + panic("Invalid structure") + } + + resolveTypes(inType, expectedInType) + + lhs, innerOutType := parseSubex(l, 0, innerInType) + if !accept(l, string(method)) { + panic("Missing matching ) or ]") + } + + switch method { + case ')': + case ']': + lhs = SubexASTRepeat { + Content: lhs, + Acceptable: []ConvexRange{{ + Start: -1, + End: 0, + }}, + } + default: + panic("Invalid method") + } + + var structure Structure + var expectedInnerOutType Type + r := l.Next() + switch r { + case '-': + structure = NoneStructure + expectedInnerOutType = innerOutType + case '~': + structure = StringStructure + expectedInnerOutType = RuneType + case '@': + structure = ArrayStructure + expectedInnerOutType = ValueType + case ':': + structure = ArrayValuesStructure + expectedInnerOutType = ValueType + case '#': + structure = MapStructure + expectedInnerOutType = ValueType + default: + panic("Missing matching destructure") + } + + innerOutType = resolveTypes(innerOutType, expectedInnerOutType) + + switch structure { + case NoneStructure: + outType = innerOutType + case StringStructure: + outType = ValueType + case ArrayStructure: + outType = ValueType + case ArrayValuesStructure: + outType = ValueType + case MapStructure: + outType = ValueType + } + + lhs = SubexASTDestructure { + Destructure: destructure, + Structure: structure, + Content: lhs, + } + + return lhs, outType +} + +func parseSubex(l RuneReader, minPower int, inType Type) (lhs SubexAST, outType Type) { + start: r := l.Next() switch r { case eof: - return nil + return nil, inType case '(': - lhs = parseSubex(l, 0, runic) + lhs, outType = parseSubex(l, 0, inType) if !accept(l, ")") { panic("Missing matching )") } + case '-': + lhs, outType = parseDestructure(l, NoneStructure, inType) + case '~': + lhs, outType = parseDestructure(l, StringStructure, inType) + case '@': + lhs, outType = parseDestructure(l, ArrayStructure, inType) + case ':': + lhs, outType = parseDestructure(l, ArrayValuesStructure, inType) + case '#': + lhs, outType = parseDestructure(l, MapStructure, inType) + case '"': + if inType == ValueType { + var innerOutType Type + lhs, innerOutType = parseSubex(l, 0, RuneType) + if !accept(l, "\"") { + panic("Missing matching \"") + } + resolveTypes(innerOutType, RuneType) + lhs = SubexASTDestructure { + Destructure: StringStructure, + Structure: StringStructure, + Content: lhs, + } + outType = ValueType + } else { + l.Rewind() + return SubexASTEmpty{}, inType + } // TODO // case '[': // rangeParts := parseRangeSubex(l) // lhs = SubexASTRange {rangeParts} - case ')', ']', '"', '|', ';', '{', '+', '-', '*', '/', '!', '=', '$': + case ')', ']', '|', ';', '{', '+', '*', '/', '!', '=', '$': l.Rewind() - return SubexASTEmpty{} - // case '=': - // replacement := parseReplacement(l) - // lhs = SubexASTOutput{replacement} - // case '^': - // replacement := parseReplacement(l) - // replacement = append( - // []OutputContentAST{OutputValueLiteralAST {walk.NewAtomStringTerminal()}}, - // replacement... - // ) - // replacement = append( - // replacement, - // OutputValueLiteralAST {walk.NewAtomStringTerminal()}, - // ) - // lhs = SubexASTOutput {replacement} + return SubexASTEmpty{}, inType case '.': - if runic { + outType = inType + if inType == RuneType { lhs = SubexASTCopyAnyRune{} } else { lhs = SubexASTCopyAnyValue{} } + case ',': + switch inType { + case ValueType: + outType = inType + lhs = SubexASTCopyAnySimpleValue{} + case RuneType: + outType = inType + lhs = SubexASTCopyRune{','} + default: + panic("Invalid inType") + } case '?': + outType = inType lhs = SubexASTCopyBool{} case '%': + outType = inType lhs = SubexASTCopyNumber{} - case ':': - if runic { - lhs = SubexASTCopyRune {':'} - } else { - if !accept(l, "[") { - panic("Missing [ after :") - } - lhs = SubexASTEnterArray {parseSubex(l, 0, runic)} - if !accept(l, "]") { - panic("Missing matching ]") - } - } case '`': - lhs = SubexASTOutput {parseReplacement(l, runic)} - case '~': - if runic { - lhs = SubexASTCopyRune {'~'} + outType = inType + switch inType { + case ValueType: + lhs = parseValueReplacement(l, '`') + case RuneType: + lhs = parseRuneReplacement(l, '`') + default: + panic("Invalid inType") + } + case ' ': + if inType == RuneType { + outType = RuneType + lhs = SubexASTCopyRune {' '} } else { - if !accept(l, "\"") { - panic("Missing \" after ~") - } - lhs = SubexASTEnterString {parseSubex(l, 0, true)} - if !accept(l, "\"") { - panic("Missing matching \"") - } + goto start } - // TODO - // case '_': - // lhs = SubexASTCopyStringAtom{} - // case '#': - // lhs = SubexASTCopyString{} - // case ',': - // lhs = SubexASTCopyValue{} - // case '"': - // lhs = SubexASTCopyScalar {walk.NewAtomStringTerminal()} - // case '~': - // literals := parseNonStringLiteral(l) - // var replacement []OutputContentAST - // for _, literal := range literals { - // replacement = append(replacement, OutputValueLiteralAST {literal}) - // } - // lhs = SubexASTOutput {replacement} default: - if runic { + outType = inType + if inType == RuneType { lhs = SubexASTCopyRune {r} } else { l.Rewind() @@ -343,8 +569,10 @@ func parseSubex(l RuneReader, minPower int, runic bool) SubexAST { } loop: for { if minPower <= 20 { - next := parseSubex(l, 21, runic) + next, outType2 := parseSubex(l, 21, inType) + // TODO: next might legitimately be SubexASTEmpty, e.g. `` if next != nil && (next != SubexASTEmpty{}) { + outType = resolveTypes(outType, outType2) lhs = SubexASTConcat{lhs, next} continue loop } @@ -358,54 +586,58 @@ func parseSubex(l RuneReader, minPower int, runic bool) SubexAST { } case r == '+' && minPower <= 4: lhs = SubexASTSum {lhs} + resolveTypes(inType, ValueType) + outType = resolveTypes(outType, ValueType) case r == '*' && minPower <= 4: lhs = SubexASTProduct {lhs} - case r == '-' && minPower <= 4: - lhs = SubexASTNegate {lhs} - case r == '/' && minPower <= 4: - lhs = SubexASTReciprocal {lhs} + resolveTypes(inType, ValueType) + outType = resolveTypes(outType, ValueType) case r == '!' && minPower <= 4: lhs = SubexASTNot {lhs} - case r == '=' && minPower <= 4: - lhs = SubexASTEqual {lhs} + resolveTypes(inType, ValueType) + outType = resolveTypes(outType, ValueType) case r == '$' && minPower <= 4: slot := l.Next() if slot == eof { panic("Missing slot character") } if slot == '_' { - lhs = SubexASTDiscard {lhs} + lhs = SubexASTDiscard { + Content: lhs, + InnerOutType: outType, + } } else { - lhs = SubexASTStore{ - Match: lhs, - Slot: slot, + if inType == ValueType { + lhs = SubexASTStoreValues { + Match: lhs, + Slot: slot, + } + } else { + lhs = SubexASTStoreRunes { + Match: lhs, + Slot: slot, + } } } + outType = AnyType case r == '|' && minPower <= 8: - rhs := parseSubex(l, 9, runic) + rhs, outType2 := parseSubex(l, 9, inType) + outType = resolveTypes(outType, outType2) if rhs == nil { panic("Missing subex after |") } lhs = SubexASTOr{lhs, rhs} - case r == ';' && minPower <= 10: - rhs := parseSubex(l, 11, runic) - if rhs == nil { - panic("Missing subex after ;") - } - lhs = SubexASTJoin{ - Content: lhs, - Delimiter: rhs, - } default: l.Rewind() break loop } } - return lhs + return lhs, outType } func Parse(l RuneReader) SubexAST { - ast := parseSubex(l, 0, false) + ast, outType := parseSubex(l, 0, ValueType) + outType = resolveTypes(outType, ValueType) if ast == nil { return SubexASTEmpty{} } |