diff options
Diffstat (limited to 'subex')
-rw-r--r-- | subex/filter.go | 26 | ||||
-rw-r--r-- | subex/main.go | 66 | ||||
-rw-r--r-- | subex/main_test.go | 238 | ||||
-rw-r--r-- | subex/parse.go | 388 | ||||
-rw-r--r-- | subex/subexast.go | 343 | ||||
-rw-r--r-- | subex/subexstate.go | 184 |
6 files changed, 914 insertions, 331 deletions
diff --git a/subex/filter.go b/subex/filter.go index dce0f0e..309d6c7 100644 --- a/subex/filter.go +++ b/subex/filter.go @@ -27,6 +27,26 @@ func (_ anyBoolFilter) valueFilter(value walk.Value) bool { return isBool } +type simpleValueFilter struct {} +func (_ simpleValueFilter) valueFilter(value walk.Value) bool { + switch value := value.(type) { + case walk.NullValue: + return true + case walk.BoolValue: + return true + case walk.NumberValue: + return true + case walk.StringValue: + return true + case walk.ArrayValue: + return len(value) == 0 + case walk.MapValue: + return len(value) == 0 + default: + panic("Invalid value type") + } +} + type anyValueFilter struct {} func (_ anyValueFilter) valueFilter(value walk.Value) bool { return true @@ -38,6 +58,12 @@ func (_ anyArrayFilter) valueFilter(value walk.Value) bool { return isArray } +type anyMapFilter struct {} +func (_ anyMapFilter) valueFilter(value walk.Value) bool { + _, isMap := value.(walk.MapValue) + return isMap +} + type anyStringFilter struct {} func (_ anyStringFilter) valueFilter(value walk.Value) bool { _, isString := value.(walk.StringValue) diff --git a/subex/main.go b/subex/main.go index 86a8d41..32a5cf3 100644 --- a/subex/main.go +++ b/subex/main.go @@ -150,7 +150,7 @@ type auxiliaryState struct { outputStack OutputStack // How deeply nested the current execution is inside of the overall value // i.e. starts at zero, is incremented to one when entering an array - nesting int + nesting []bool } func (aux auxiliaryState) cloneStore() auxiliaryState { @@ -204,16 +204,6 @@ func (aux auxiliaryState) topAppendRune(runes []rune) auxiliaryState { return aux } -func (aux auxiliaryState) incNest() auxiliaryState { - aux.nesting++ - return aux -} - -func (aux auxiliaryState) decNest() auxiliaryState { - aux.nesting-- - return aux -} - type SubexBranch struct { state SubexState aux auxiliaryState @@ -236,8 +226,15 @@ func (pair SubexEatBranch) accepting() []OutputStack { } func equalStates(left SubexEatBranch, right SubexEatBranch) bool { - // Only care about if they are the same pointer - return left.state == right.state && left.aux.nesting == right.aux.nesting + if left.state != right.state || len(left.aux.nesting) != len(right.aux.nesting) { + return false + } + for i, l := range left.aux.nesting { + if l != right.aux.nesting[i] { + return false + } + } + return true } // If two branches have the same state, only the first has a chance of being successful @@ -257,11 +254,11 @@ outer: return states[:uniqueStates] } -func addStates(curStates []SubexEatBranch, newStates []SubexBranch) []SubexEatBranch { +func addStates(curStates []SubexEatBranch, newStates []SubexBranch, nesting []bool) []SubexEatBranch { for _, state := range newStates { switch s := state.state.(type) { case SubexEpsilonState: - curStates = addStates(curStates, s.epsilon(state.aux)) + curStates = addStates(curStates, s.epsilon(state.aux), nesting) case SubexEatState: curStates = append(curStates, SubexEatBranch{ state: s, @@ -272,14 +269,19 @@ func addStates(curStates []SubexEatBranch, newStates []SubexBranch) []SubexEatBr return curStates } -func processInput(states []SubexEatBranch, input walk.Edible, nesting int) []SubexEatBranch { +func processInput(states []SubexEatBranch, input walk.Edible, nesting []bool) []SubexEatBranch { newStates := make([]SubexEatBranch, 0, 2) for _, state := range states { - // TODO: What if nesting is changed by an epsilon state? - if state.aux.nesting == nesting { - newStates = addStates(newStates, state.eat(input)) - } else if state.aux.nesting < nesting { + if len(state.aux.nesting) > len(nesting) { + continue + } + + if (len(state.aux.nesting) == len(nesting) && + (len(state.aux.nesting) == 0 || len(nesting) == 0 || + state.aux.nesting[len(nesting) - 1] || nesting[len(nesting) - 1])) { + newStates = addStates(newStates, state.eat(input), nesting) + } else { newStates = append(newStates, state) } } @@ -287,21 +289,21 @@ func processInput(states []SubexEatBranch, input walk.Edible, nesting int) []Sub switch input := input.(type) { case walk.StringValue: for _, r := range input { - newStates = processInput(newStates, walk.RuneEdible(r), nesting+1) + newStates = processInput(newStates, walk.RuneEdible(r), append(nesting, true)) } - newStates = processInput(newStates, walk.StringEnd, nesting+1) + newStates = processInput(newStates, walk.StringEnd, append(nesting, true)) case walk.ArrayValue: for _, el := range input { - newStates = processInput(newStates, walk.NumberValue(el.Index), nesting+1) - newStates = processInput(newStates, el.Value, nesting+1) + newStates = processInput(newStates, walk.NumberValue(el.Index), append(nesting, false)) + newStates = processInput(newStates, el.Value, append(nesting, true)) } - newStates = processInput(newStates, walk.ArrayEnd, nesting+1) + newStates = processInput(newStates, walk.ArrayEnd, append(nesting, true)) case walk.MapValue: for _, el := range input { - newStates = processInput(newStates, walk.StringValue(el.Key), nesting+1) - newStates = processInput(newStates, el.Value, nesting+1) + newStates = processInput(newStates, walk.StringValue(el.Key), append(nesting, false)) + newStates = processInput(newStates, el.Value, append(nesting, true)) } - newStates = processInput(newStates, walk.MapEnd, nesting+1) + newStates = processInput(newStates, walk.MapEnd, append(nesting, true)) } newStates = pruneStates(newStates) @@ -322,20 +324,20 @@ func RunTransducer(transducer Transducer, input []walk.Value) (output []walk.Val values: make([][]walk.Value, transducer.storeSize.values), runes: make([][]rune, transducer.storeSize.runes), }, - nesting: 0, + nesting: nil, }, - }}) + }}, nil) for _, value := range input { if len(states) == 0 { break } - states = processInput(states, value, 0) + states = processInput(states, value, nil) } for _, state := range states { - if state.aux.nesting > 0 { + if len(state.aux.nesting) > 0 { continue } acceptingStacks := state.accepting() diff --git a/subex/main_test.go b/subex/main_test.go index 78a62c4..fb6f152 100644 --- a/subex/main_test.go +++ b/subex/main_test.go @@ -61,6 +61,15 @@ func TestSubexMain(t *testing.T) { }, }, { + subex: `~(.)~`, + input: []walk.Value { + walk.StringValue("a"), + }, + expected: []walk.Value { + walk.StringValue("a"), + }, + }, + { subex: `~(.$_(.{-0}))~`, input: []walk.Value { walk.StringValue("hello"), @@ -70,6 +79,21 @@ func TestSubexMain(t *testing.T) { }, }, { + subex: `#(".".{-0})-`, + input: []walk.Value { + walk.MapValue { + { + Key: "a", + Value: walk.NullValue{}, + }, + }, + }, + expected: []walk.Value { + walk.StringValue("a"), + walk.NullValue{}, + }, + }, + { subex: "@(..$a`$a$a`{-0})@", input: []walk.Value { walk.ArrayValue { @@ -182,9 +206,221 @@ func TestSubexMain(t *testing.T) { }, }, }, + { + subex: "-(`0`.)@", + input: []walk.Value { + walk.NumberValue(4), + }, + expected: []walk.Value { + walk.ArrayValue { + { + Index: 0, + Value: walk.NumberValue(4), + }, + }, + }, + }, + { + subex: `@(.$_~(.{-0})-{-0})~`, + input: []walk.Value { + walk.ArrayValue { + { + Index: 0, + Value: walk.StringValue("ab"), + }, + { + Index: 1, + Value: walk.StringValue("cd"), + }, + { + Index: 2, + Value: walk.StringValue("efg"), + }, + { + Index: 3, + Value: walk.StringValue(""), + }, + { + Index: 4, + Value: walk.StringValue("hijklm"), + }, + }, + }, + expected: []walk.Value { + walk.StringValue("abcdefghijklm"), + }, + }, + { + subex: ":(.)-", + input: []walk.Value { + walk.ArrayValue { + { + Index: 0, + Value: walk.NullValue{}, + }, + }, + }, + expected: []walk.Value { + walk.NullValue{}, + }, + }, + { + subex: ":(.{-0}+)-", + input: []walk.Value { + walk.ArrayValue { + { + Index: 0, + Value: walk.NumberValue(4), + }, + { + Index: 1, + Value: walk.NumberValue(-123), + }, + { + Index: 2, + Value: walk.NumberValue(124), + }, + }, + }, + expected: []walk.Value { + walk.NumberValue(5), + }, + }, + { + subex: "~(-(.)~{-0}):", + input: []walk.Value { + walk.StringValue("abc"), + }, + expected: []walk.Value { + walk.ArrayValue { + { + Index: 0, + Value: walk.StringValue("a"), + }, + { + Index: 0, + Value: walk.StringValue("b"), + }, + { + Index: 0, + Value: walk.StringValue("c"), + }, + }, + }, + }, + { + subex: "#(.(.$_){-0}):", + input: []walk.Value { + walk.MapValue { + { + Key: "a", + Value: walk.NullValue{}, + }, + { + Key: "b", + Value: walk.NumberValue(4), + }, + { + Key: "c", + Value: walk.StringValue("hello"), + }, + }, + }, + expected: []walk.Value { + walk.ArrayValue { + { + Index: 0, + Value: walk.StringValue("a"), + }, + { + Index: 0, + Value: walk.StringValue("b"), + }, + { + Index: 0, + Value: walk.StringValue("c"), + }, + }, + }, + }, + { + subex: ":(.`null`{-0})#", + input: []walk.Value { + walk.ArrayValue { + { + Index: 0, + Value: walk.StringValue("a"), + }, + { + Index: 1, + Value: walk.StringValue("b"), + }, + { + Index: 2, + Value: walk.StringValue("c"), + }, + }, + }, + expected: []walk.Value { + walk.MapValue { + { + Key: "a", + Value: walk.NullValue{}, + }, + { + Key: "b", + Value: walk.NullValue{}, + }, + { + Key: "c", + Value: walk.NullValue{}, + }, + }, + }, + }, + { + subex: `#(".$_(.{-0})".{-0})#`, + input: []walk.Value { + walk.MapValue { + { + Key: "hello", + Value: walk.NullValue{}, + }, + { + Key: "world", + Value: walk.NullValue{}, + }, + }, + }, + expected: []walk.Value { + walk.MapValue { + { + Key: "ello", + Value: walk.NullValue{}, + }, + { + Key: "orld", + Value: walk.NullValue{}, + }, + }, + }, + }, + { + subex: ".{-0}`\"hello\"`", + input: []walk.Value { + walk.NumberValue(1), + walk.NumberValue(2), + }, + expected: []walk.Value { + walk.NumberValue(1), + walk.NumberValue(2), + walk.StringValue("hello"), + }, + }, } - for _, test := range tests { + for i, test := range tests { + t.Logf("Running test: %d", i) lexer := NewStringRuneReader(test.subex) ast := Parse(lexer) transducer := CompileTransducer(ast) diff --git a/subex/parse.go b/subex/parse.go index 9602a4b..e91008a 100644 --- a/subex/parse.go +++ b/subex/parse.go @@ -1,6 +1,7 @@ package subex import ( + "fmt" "main/walk" "strconv" "strings" @@ -8,10 +9,58 @@ import ( type Type int const ( - ValueType Type = iota + AnyType Type = iota + ValueType RuneType ) +func resolveTypes(t1 Type, t2 Type) Type { + if t1 == AnyType { + return t2 + } + + if t2 == AnyType { + return t1 + } + + if t1 == t2 { + return t1 + } + + panic("Types don't match in parser") +} + +type Structure int +const ( + NoneStructure Structure = iota + StringStructure + ArrayStructure + ArrayValuesStructure + MapStructure +) +func (s Structure) String() string { + switch s { + case NoneStructure: + return "-" + case StringStructure: + return "~" + case ArrayStructure: + return "@" + case ArrayValuesStructure: + return ":" + case MapStructure: + return "#" + default: + panic("Invalid structure") + } +} + +type DestructureMethod int +const ( + Normal DestructureMethod = iota + Iterate +) + type RuneReader interface { Next() rune Rewind() @@ -73,6 +122,7 @@ func parseScalarLiteral(l RuneReader) (walk.Scalar, bool) { panic("Invalid literal") } default: + fmt.Printf("%c\n", r) panic("Invalid literal") } } @@ -139,7 +189,8 @@ func parseRepeatRange(l RuneReader) (output []ConvexRange) { return output } -func parseValueReplacement(l RuneReader) (output []OutputValueAST) { +func parseValueReplacement(l RuneReader, end rune) (output SubexAST) { + output = SubexASTEmpty{} // TODO escaping // TODO add arrays, maps and strings loop: for { @@ -148,44 +199,89 @@ func parseValueReplacement(l RuneReader) (output []OutputValueAST) { case eof: panic("Missing closing `") case ' ': - case '`': + case end: break loop case '$': slot := l.Next() if slot == eof { panic("Missing slot character") } - output = append(output, OutputValueLoadAST {slot: slot}) + output = SubexASTConcat { + First: output, + Second: SubexASTOutputValueLoad { + slot: slot, + }, + } + // TODO: destructures + case '#': + if !accept(l, "(") { + panic("Missing ( after #") + } + output = SubexASTConcat { + First: output, + Second: SubexASTDestructure { + Destructure: NoneStructure, + Structure: MapStructure, + Content: parseValueReplacement(l, ')'), + }, + } + if !accept(l, "#") { + panic("Missing # after )") + } + case '"': + output = SubexASTConcat { + First: output, + Second: SubexASTDestructure { + Destructure: NoneStructure, + Structure: StringStructure, + Content: parseRuneReplacement(l, '"'), + }, + } default: l.Rewind() scalar, ok := parseScalarLiteral(l) if !ok { panic("Invalid scalar literal") } - output = append(output, OutputValueLiteralAST {scalar}) + output = SubexASTConcat { + First: output, + Second: SubexASTOutputValueLiteral { + literal: scalar, + }, + } } } return output } -func parseRuneReplacement(l RuneReader) (output []OutputRuneAST) { +func parseRuneReplacement(l RuneReader, end rune) (output SubexAST) { + output = SubexASTEmpty{} // TODO escaping - // TODO add arrays, maps and strings loop: for { r := l.Next() switch r { case eof: panic("Missing closing `") - case '`': + case end: break loop case '$': slot := l.Next() if slot == eof { panic("Missing slot character") } - output = append(output, OutputRuneLoadAST {slot: slot}) + output = SubexASTConcat { + First: output, + Second: SubexASTOutputRuneLoad { + slot: slot, + }, + } default: - output = append(output, OutputRuneLiteralAST {r}) + output = SubexASTConcat { + First: output, + Second: SubexASTOutputRuneLiteral { + literal: r, + }, + } } } return output @@ -270,90 +366,196 @@ func parseRuneReplacement(l RuneReader) (output []OutputRuneAST) { // return parts // } -func parseSubex(l RuneReader, minPower int, inType Type, outType Type) SubexAST { - var lhs SubexAST +func parseDestructure(l RuneReader, destructure Structure, inType Type) (lhs SubexAST, outType Type) { + var method rune + switch l.Next() { + case '(': + method = ')' + case '[': + method = ']' + default: + panic("Missing ( or [ after destructure start") + } + + var innerInType Type + var expectedInType Type + switch destructure { + case NoneStructure: + innerInType = inType + expectedInType = inType + case StringStructure: + innerInType = RuneType + expectedInType = ValueType + case ArrayStructure: + innerInType = ValueType + expectedInType = ValueType + case ArrayValuesStructure: + innerInType = ValueType + expectedInType = ValueType + case MapStructure: + innerInType = ValueType + expectedInType = ValueType + default: + panic("Invalid structure") + } + + resolveTypes(inType, expectedInType) + + lhs, innerOutType := parseSubex(l, 0, innerInType) + if !accept(l, string(method)) { + panic("Missing matching ) or ]") + } + + switch method { + case ')': + case ']': + lhs = SubexASTRepeat { + Content: lhs, + Acceptable: []ConvexRange{{ + Start: -1, + End: 0, + }}, + } + default: + panic("Invalid method") + } + + var structure Structure + var expectedInnerOutType Type + r := l.Next() + switch r { + case '-': + structure = NoneStructure + expectedInnerOutType = innerOutType + case '~': + structure = StringStructure + expectedInnerOutType = RuneType + case '@': + structure = ArrayStructure + expectedInnerOutType = ValueType + case ':': + structure = ArrayValuesStructure + expectedInnerOutType = ValueType + case '#': + structure = MapStructure + expectedInnerOutType = ValueType + default: + panic("Missing matching destructure") + } + + innerOutType = resolveTypes(innerOutType, expectedInnerOutType) + + switch structure { + case NoneStructure: + outType = innerOutType + case StringStructure: + outType = ValueType + case ArrayStructure: + outType = ValueType + case ArrayValuesStructure: + outType = ValueType + case MapStructure: + outType = ValueType + } + + lhs = SubexASTDestructure { + Destructure: destructure, + Structure: structure, + Content: lhs, + } + + return lhs, outType +} + +func parseSubex(l RuneReader, minPower int, inType Type) (lhs SubexAST, outType Type) { + start: r := l.Next() switch r { case eof: - return nil + return nil, inType case '(': - lhs = parseSubex(l, 0, inType, outType) + lhs, outType = parseSubex(l, 0, inType) if !accept(l, ")") { panic("Missing matching )") } + case '-': + lhs, outType = parseDestructure(l, NoneStructure, inType) case '~': - if !accept(l, "(") { - panic("Missing ( after ~") - } - lhs = parseSubex(l, 0, RuneType, RuneType) - if !accept(l, ")") { - panic("Missing matching )") - } - if !accept(l, "~") { - panic("Missing matching ~") - } - lhs = SubexASTEnterString {lhs} + lhs, outType = parseDestructure(l, StringStructure, inType) case '@': - if !accept(l, "(") { - panic("Missing ( after @") - } - lhs = parseSubex(l, 0, ValueType, ValueType) - if !accept(l, ")") { - panic("Missing matching )") - } - if !accept(l, "@") { - panic("Missing matching ~") + lhs, outType = parseDestructure(l, ArrayStructure, inType) + case ':': + lhs, outType = parseDestructure(l, ArrayValuesStructure, inType) + case '#': + lhs, outType = parseDestructure(l, MapStructure, inType) + case '"': + if inType == ValueType { + var innerOutType Type + lhs, innerOutType = parseSubex(l, 0, RuneType) + if !accept(l, "\"") { + panic("Missing matching \"") + } + resolveTypes(innerOutType, RuneType) + lhs = SubexASTDestructure { + Destructure: StringStructure, + Structure: StringStructure, + Content: lhs, + } + outType = ValueType + } else { + l.Rewind() + return SubexASTEmpty{}, inType } - lhs = SubexASTEnterArray {lhs} // TODO // case '[': // rangeParts := parseRangeSubex(l) // lhs = SubexASTRange {rangeParts} - case ')', ']', '"', '|', ';', '{', '+', '-', '*', '/', '!', '=', '$': + case ')', ']', '|', ';', '{', '+', '*', '/', '!', '=', '$': l.Rewind() - return SubexASTEmpty{} - // case '=': - // replacement := parseReplacement(l) - // lhs = SubexASTOutput{replacement} - // case '^': - // replacement := parseReplacement(l) - // replacement = append( - // []OutputContentAST{OutputValueLiteralAST {walk.NewAtomStringTerminal()}}, - // replacement... - // ) - // replacement = append( - // replacement, - // OutputValueLiteralAST {walk.NewAtomStringTerminal()}, - // ) - // lhs = SubexASTOutput {replacement} + return SubexASTEmpty{}, inType case '.': - if inType != outType { - panic("Copying value changes type!") - } + outType = inType if inType == RuneType { lhs = SubexASTCopyAnyRune{} } else { lhs = SubexASTCopyAnyValue{} } + case ',': + switch inType { + case ValueType: + outType = inType + lhs = SubexASTCopyAnySimpleValue{} + case RuneType: + outType = inType + lhs = SubexASTCopyRune{','} + default: + panic("Invalid inType") + } case '?': + outType = inType lhs = SubexASTCopyBool{} case '%': + outType = inType lhs = SubexASTCopyNumber{} case '`': - lhs = SubexASTOutputValues {parseValueReplacement(l)} - // TODO - // case '_': - // lhs = SubexASTCopyStringAtom{} - // case '#': - // lhs = SubexASTCopyString{} - // case ',': - // lhs = SubexASTCopyValue{} - // case '"': - // lhs = SubexASTCopyScalar {walk.NewAtomStringTerminal()} - default: - if inType != outType { - panic("inType and outType don't match in copy") + outType = inType + switch inType { + case ValueType: + lhs = parseValueReplacement(l, '`') + case RuneType: + lhs = parseRuneReplacement(l, '`') + default: + panic("Invalid inType") + } + case ' ': + if inType == RuneType { + outType = RuneType + lhs = SubexASTCopyRune {' '} + } else { + goto start } + default: + outType = inType if inType == RuneType { lhs = SubexASTCopyRune {r} } else { @@ -367,8 +569,10 @@ func parseSubex(l RuneReader, minPower int, inType Type, outType Type) SubexAST } loop: for { if minPower <= 20 { - next := parseSubex(l, 21, inType, outType) + next, outType2 := parseSubex(l, 21, inType) + // TODO: next might legitimately be SubexASTEmpty, e.g. `` if next != nil && (next != SubexASTEmpty{}) { + outType = resolveTypes(outType, outType2) lhs = SubexASTConcat{lhs, next} continue loop } @@ -382,54 +586,58 @@ func parseSubex(l RuneReader, minPower int, inType Type, outType Type) SubexAST } case r == '+' && minPower <= 4: lhs = SubexASTSum {lhs} + resolveTypes(inType, ValueType) + outType = resolveTypes(outType, ValueType) case r == '*' && minPower <= 4: lhs = SubexASTProduct {lhs} - case r == '-' && minPower <= 4: - lhs = SubexASTNegate {lhs} - // case r == '/' && minPower <= 4: - // lhs = SubexASTReciprocal {lhs} + resolveTypes(inType, ValueType) + outType = resolveTypes(outType, ValueType) case r == '!' && minPower <= 4: lhs = SubexASTNot {lhs} - // case r == '=' && minPower <= 4: - // lhs = SubexASTEqual {lhs} + resolveTypes(inType, ValueType) + outType = resolveTypes(outType, ValueType) case r == '$' && minPower <= 4: slot := l.Next() if slot == eof { panic("Missing slot character") } if slot == '_' { - lhs = SubexASTDiscard {lhs} + lhs = SubexASTDiscard { + Content: lhs, + InnerOutType: outType, + } } else { - lhs = SubexASTStoreValues { - Match: lhs, - Slot: slot, + if inType == ValueType { + lhs = SubexASTStoreValues { + Match: lhs, + Slot: slot, + } + } else { + lhs = SubexASTStoreRunes { + Match: lhs, + Slot: slot, + } } } + outType = AnyType case r == '|' && minPower <= 8: - rhs := parseSubex(l, 9, inType, outType) + rhs, outType2 := parseSubex(l, 9, inType) + outType = resolveTypes(outType, outType2) if rhs == nil { panic("Missing subex after |") } lhs = SubexASTOr{lhs, rhs} - /*case r == ';' && minPower <= 10: - rhs := parseSubex(l, 11, inType, outType) - if rhs == nil { - panic("Missing subex after ;") - } - lhs = SubexASTJoin { - Content: lhs, - Delimiter: rhs, - }*/ default: l.Rewind() break loop } } - return lhs + return lhs, outType } func Parse(l RuneReader) SubexAST { - ast := parseSubex(l, 0, ValueType, ValueType) + ast, outType := parseSubex(l, 0, ValueType) + outType = resolveTypes(outType, ValueType) if ast == nil { return SubexASTEmpty{} } diff --git a/subex/subexast.go b/subex/subexast.go index cef853b..655a783 100644 --- a/subex/subexast.go +++ b/subex/subexast.go @@ -46,6 +46,25 @@ func (ast SubexASTStoreValues) String() string { return fmt.Sprintf("$%c(%v)", ast.Slot, ast.Match) } +type SubexASTStoreRunes struct { + Match SubexAST + Slot rune +} +func (ast SubexASTStoreRunes) compileWith(next SubexState, slotMap *SlotMap, inType Type, outType Type) SubexState { + id := slotMap.getRuneId(ast.Slot) + newNext := ast.Match.compileWith(&SubexStoreRunesEndState { + slot: id, + next: next, + }, slotMap, inType, RuneType) + + return &SubexCaptureRunesBeginState { + next: newNext, + } +} +func (ast SubexASTStoreRunes) String() string { + return fmt.Sprintf("(%v)$%c", ast.Match, ast.Slot) +} + // Try to run the first subex, if it fails then backtrack and use the second type SubexASTOr struct { First, Second SubexAST @@ -132,9 +151,6 @@ type SubexASTRepeat struct { Acceptable []ConvexRange } func (ast SubexASTRepeat) compileWith(next SubexState, slotMap *SlotMap, inType Type, outType Type) SubexState { - if inType != outType { - panic("Invalid types") - } var state SubexState = &SubexDeadState{} for _, convex := range ast.Acceptable { state = &SubexGroupState {state, convex.compile(ast.Content, next, slotMap, inType, outType)} @@ -151,7 +167,7 @@ type SubexASTCopyScalar struct { } func (ast SubexASTCopyScalar) compileWith(next SubexState, slotMap *SlotMap, inType Type, outType Type) SubexState { if inType != ValueType || outType != ValueType { - panic("Invalid types for SubexASTNot") + panic("Invalid types for SubexASTCopyScalar") } return &SubexCopyState{ filter: selectScalarFilter {ast.Scalar}, @@ -165,7 +181,7 @@ func (ast SubexASTCopyScalar) String() string { type SubexASTCopyAnyRune struct {} func (ast SubexASTCopyAnyRune) compileWith(next SubexState, slotMap *SlotMap, inType Type, outType Type) SubexState { if inType != RuneType || outType != RuneType { - panic("Invalid types for SubexASTNot") + panic("Invalid types for SubexASTCopyAnyRune") } return &SubexCopyRuneState { next: next, @@ -173,7 +189,7 @@ func (ast SubexASTCopyAnyRune) compileWith(next SubexState, slotMap *SlotMap, in } } func (ast SubexASTCopyAnyRune) String() string { - return "." + return ".RUNE" } type SubexASTCopyRune struct { @@ -181,19 +197,22 @@ type SubexASTCopyRune struct { } func (ast SubexASTCopyRune) compileWith(next SubexState, slotMap *SlotMap, inType Type, outType Type) SubexState { if inType != RuneType || outType != RuneType { - panic("Invalid types for SubexASTNot") + panic("Invalid types for SubexASTCopyRune") } return &SubexCopyRuneState { next: next, filter: selectRuneFilter {ast.rune}, } } +func (ast SubexASTCopyRune) String() string { + return string(ast.rune) +} // Read in a single atom that must be a boolean and output it unchanged type SubexASTCopyBool struct {} func (ast SubexASTCopyBool) compileWith(next SubexState, slotMap *SlotMap, inType Type, outType Type) SubexState { if inType != ValueType || outType != ValueType { - panic("Invalid types for SubexASTNot") + panic("Invalid types for SubexASTCopyBool") } return &SubexCopyState { next: next, @@ -208,7 +227,7 @@ func (ast SubexASTCopyBool) String() string { type SubexASTCopyNumber struct {} func (ast SubexASTCopyNumber) compileWith(next SubexState, slotMap *SlotMap, inType Type, outType Type) SubexState { if inType != ValueType || outType != ValueType { - panic("Invalid types for SubexASTNot") + panic("Invalid types for SubexASTCopyNumber") } return &SubexCopyState { next: next, @@ -219,11 +238,23 @@ func (ast SubexASTCopyNumber) String() string { return "%" } +// Read in a null, bool, number, string or empty array or map and output it unchanged +type SubexASTCopyAnySimpleValue struct {} +func (ast SubexASTCopyAnySimpleValue) compileWith(next SubexState, slotMap *SlotMap, inType Type, outType Type) SubexState { + if inType != ValueType || outType != ValueType { + panic("Invalid types for SubexASTCopyAnySimpleValue") + } + return &SubexCopyState { + next: next, + filter: simpleValueFilter{}, + } +} + // Read in any single Atom and output it unchanged type SubexASTCopyAnyValue struct {} func (ast SubexASTCopyAnyValue) compileWith(next SubexState, slotMap *SlotMap, inType Type, outType Type) SubexState { if inType != ValueType || outType != ValueType { - panic("Invalid types for SubexASTNot") + panic("Invalid types for SubexASTCopyAnyValue") } return &SubexCopyState { next: next, @@ -279,64 +310,56 @@ func (ast SubexASTOutput) String() string { } */ -type OutputValueAST interface { - compile(slotMap *SlotMap) OutputValue +type SubexASTOutputValueLiteral struct { + literal walk.Scalar } - -type OutputValueLoadAST struct { - slot rune -} -func (ast OutputValueLoadAST) compile(slotMap *SlotMap) OutputValue { - return OutputValueLoad { - slotMap.getId(ast.slot), +func (ast SubexASTOutputValueLiteral) compileWith(next SubexState, slotMap *SlotMap, inType Type, outType Type) SubexState { + if outType != ValueType { + panic("Invalid outType for SubexASTOutputValueLiteral") } -} - -type OutputValueLiteralAST struct { - scalar walk.Scalar -} -func (ast OutputValueLiteralAST) compile(slotMap *SlotMap) OutputValue { - return OutputValueLiteral { - ast.scalar, + return &SubexOutputValueLiteralState { + literal: ast.literal, + next: next, } } -type SubexASTOutputValues struct { - Replacement []OutputValueAST +type SubexASTOutputValueLoad struct { + slot rune } -func (ast SubexASTOutputValues) compileWith(next SubexState, slotMap *SlotMap, inType Type, outType Type) SubexState { +func (ast SubexASTOutputValueLoad) compileWith(next SubexState, slotMap *SlotMap, inType Type, outType Type) SubexState { if outType != ValueType { - panic("Invalid outType") + panic("Invalid outType for SubexASTOutputValueLoad") } - var content []OutputValue - for _, el := range ast.Replacement { - content = append(content, el.compile(slotMap)) - } - return &SubexOutputValuesState { - content: content, + return &SubexOutputValueLoadState { + slot: slotMap.getId(ast.slot), next: next, } } -type OutputRuneAST interface { - compile(slotMap *SlotMap) OutputRune +type SubexASTOutputRuneLiteral struct { + literal rune } - -type OutputRuneLoadAST struct { - slot rune -} -func (ast OutputRuneLoadAST) compile(slotMap *SlotMap) OutputRune { - return OutputRuneLoad {slotMap.getRuneId(ast.slot)} +func (ast SubexASTOutputRuneLiteral) compileWith(next SubexState, slotMap *SlotMap, inType Type, outType Type) SubexState { + if outType != RuneType { + panic("Invalid outType for SubexASTOutputRuneLiteral") + } + return &SubexOutputRuneLiteralState { + literal: ast.literal, + next: next, + } } -type OutputRuneLiteralAST struct { - r rune -} -func (ast OutputRuneLiteralAST) compile (slotMap *SlotMap) OutputRune { - return OutputRuneLiteral {ast.r} +type SubexASTOutputRuneLoad struct { + slot rune } - -type SubexASTOutputRunes struct { +func (ast SubexASTOutputRuneLoad) compileWith(next SubexState, slotMap *SlotMap, inType Type, outType Type) SubexState { + if outType != RuneType { + panic("Invalid outType for SubexASTOutputRuneLoad") + } + return &SubexOutputRuneLoadState { + slot: slotMap.getRuneId(ast.slot), + next: next, + } } // Run each input Atom through a map to produce an output Atom @@ -361,7 +384,7 @@ type SubexASTSum struct { } func (ast SubexASTSum) compileWith(next SubexState, slotMap *SlotMap, inType Type, outType Type) SubexState { if inType != ValueType || outType != ValueType { - panic("Invalid types for SubexASTNot") + panic("Invalid types for SubexASTSum") } return &SubexCaptureBeginState { next: ast.Content.compileWith(&SubexArithmeticEndState { @@ -380,7 +403,7 @@ type SubexASTProduct struct { } func (ast SubexASTProduct) compileWith(next SubexState, slotMap *SlotMap, inType Type, outType Type) SubexState { if inType != ValueType || outType != ValueType { - panic("Invalid types for SubexASTNot") + panic("Invalid types for SubexASTProduct") } return &SubexCaptureBeginState { next: ast.Content.compileWith(&SubexArithmeticEndState { @@ -400,7 +423,7 @@ type SubexASTNegate struct { } func (ast SubexASTNegate) compileWith(next SubexState, slotMap *SlotMap, inType Type, outType Type) SubexState { if inType != ValueType || outType != ValueType { - panic("Invalid types for SubexASTNot") + panic("Invalid types for SubexASTNegate") } return &SubexCaptureBeginState { next: ast.Content.compileWith(&SubexArithmeticEndState { @@ -446,9 +469,10 @@ func (ast SubexASTEmpty) String() string { // Discards the output from the content subex type SubexASTDiscard struct { Content SubexAST + InnerOutType Type } func (ast SubexASTDiscard) compileWith(next SubexState, slotMap *SlotMap, inType Type, outType Type) SubexState { - newNext := ast.Content.compileWith(&SubexDiscardState {next}, slotMap, inType, outType) + newNext := ast.Content.compileWith(&SubexDiscardState {next}, slotMap, inType, ast.InnerOutType) if inType == ValueType { return &SubexCaptureBeginState { next: newNext, @@ -463,65 +487,170 @@ func (ast SubexASTDiscard) String() string { return fmt.Sprintf("(%v)$_", ast.Content) } -// Go into an array, pass the content each of the values in the array to eat and then leave the array -type SubexASTEnterArray struct { +type SubexASTDestructure struct { + Destructure Structure + Structure Structure Content SubexAST } -func (ast SubexASTEnterArray) compileWith(next SubexState, slotMap *SlotMap, inType Type, outType Type) SubexState { - if inType != ValueType || outType != ValueType { - panic("Invalid types for SubexASTEnterArray") - } - return &SubexCaptureBeginState { - next: &SubexCopyState { - filter: anyArrayFilter{}, - next: &SubexDiscardState { - next: &SubexIncrementNestState { - next: &SubexCaptureBeginState { - next: ast.Content.compileWith( - &SubexDiscardTerminalState { - terminal: walk.ArrayEnd, - next: &SubexDecrementNestState { - next: &SubexConstructArrayState {next: next}, - }, - }, - slotMap, - ValueType, - ValueType, - ), - }, - }, +func (ast SubexASTDestructure) compileWith(next SubexState, slotMap *SlotMap, inType Type, outType Type) SubexState { + var innerOutType Type + var construct SubexState + switch ast.Structure { + case NoneStructure: + innerOutType = outType + construct = next + case StringStructure: + innerOutType = RuneType + construct = &SubexConstructStringState { + next: next, + } + case ArrayStructure: + innerOutType = ValueType + construct = &SubexConstructArrayState { + next: next, + } + case ArrayValuesStructure: + innerOutType = ValueType + construct = &SubexConstructArrayValuesState { + next: next, + } + case MapStructure: + innerOutType = ValueType + construct = &SubexConstructMapState { + next: next, + } + default: + panic("Invalid ast structure") + } + + var innerInType Type + var destructFooter SubexState + switch ast.Destructure { + case NoneStructure: + innerInType = inType + destructFooter = construct + case StringStructure: + innerInType = RuneType + destructFooter = &SubexDiscardTerminalState { + terminal: walk.StringEnd, + next: &SubexDecrementNestState { + next: construct, + }, + } + case ArrayStructure: + innerInType = ValueType + destructFooter = &SubexDiscardTerminalState { + terminal: walk.ArrayEnd, + next: &SubexDecrementNestState { + next: construct, }, - }, + } + case ArrayValuesStructure: + innerInType = ValueType + destructFooter = &SubexDiscardTerminalState { + terminal: walk.ArrayEnd, + next: &SubexDecrementNestState { + next: construct, + }, + } + case MapStructure: + innerInType = ValueType + destructFooter = &SubexDiscardTerminalState { + terminal: walk.MapEnd, + next: &SubexDecrementNestState { + next: construct, + }, + } + default: + panic("Invalid ast destructure") } -} -type SubexASTEnterString struct { - Content SubexAST -} -func (ast SubexASTEnterString) compileWith(next SubexState, slotMap *SlotMap, inType Type, outType Type) SubexState { - if inType != ValueType || outType != ValueType { - panic("Invalid types for SubexASTEnterString") + inner := ast.Content.compileWith( + destructFooter, + slotMap, + innerInType, + innerOutType, + ) + + var beginConstruct SubexState + switch ast.Structure { + case NoneStructure: + beginConstruct = inner + case StringStructure: + beginConstruct = &SubexCaptureRunesBeginState { + next: inner, + } + case ArrayStructure: + beginConstruct = &SubexCaptureBeginState { + next: inner, + } + case ArrayValuesStructure: + beginConstruct = &SubexCaptureBeginState { + next: inner, + } + case MapStructure: + beginConstruct = &SubexCaptureBeginState { + next: inner, + } + default: + panic("Invalid ast structure") } - return &SubexCaptureBeginState { - next: &SubexCopyState { - filter: anyStringFilter{}, - next: &SubexDiscardState { - next: &SubexIncrementNestState { - next: &SubexCaptureRunesBeginState { - next: ast.Content.compileWith( - &SubexDiscardTerminalState { - terminal: walk.StringEnd, - next: &SubexDecrementNestState { - next: &SubexConstructStringState {next: next}, - }, - }, - slotMap, - RuneType, - RuneType, - ), + + switch ast.Destructure { + case NoneStructure: + return beginConstruct + case StringStructure: + return &SubexCaptureBeginState { + next: &SubexCopyState { + filter: anyStringFilter{}, + next: &SubexDiscardState { + next: &SubexIncrementNestState { + keys: true, + next: beginConstruct, }, }, }, - }, + } + case ArrayStructure: + return &SubexCaptureBeginState { + next: &SubexCopyState { + filter: anyArrayFilter{}, + next: &SubexDiscardState { + next: &SubexIncrementNestState { + keys: true, + next: beginConstruct, + }, + }, + }, + } + case ArrayValuesStructure: + return &SubexCaptureBeginState { + next: &SubexCopyState { + filter: anyArrayFilter{}, + next: &SubexDiscardState { + next: &SubexIncrementNestState { + keys: false, + next: beginConstruct, + }, + }, + }, + } + case MapStructure: + return &SubexCaptureBeginState { + next: &SubexCopyState { + filter: anyMapFilter{}, + next: &SubexDiscardState { + next: &SubexIncrementNestState { + keys: true, + next: beginConstruct, + }, + }, + }, + } + default: + panic("Invalid destructure in ast") } } +func (ast SubexASTDestructure) String() string { + return fmt.Sprintf("%v(%v)%v", ast.Destructure, ast.Content, ast.Structure) +} diff --git a/subex/subexstate.go b/subex/subexstate.go index 4de8ae2..8f27a10 100644 --- a/subex/subexstate.go +++ b/subex/subexstate.go @@ -133,126 +133,61 @@ func (state SubexStoreEndState) epsilon(aux auxiliaryState) []SubexBranch { }} } -/* -// A part of an output literal, either an Atom or a slot from which to load -type OutputContent interface { - // Given the current store, return the ValueList produced by the TransducerOutput - buildValues(Store) walk.ValueList - // Given the current store, return the RuneList produced by the TransducerOutput - buildRunes(Store) walk.RuneList -} - -// An OutputContent which is just a Value literal -type OutputValueLiteral struct { - value walk.Value -} -func (replacement OutputValueLiteral) buildValues(store Store) walk.ValueList { - return walk.ValueList{replacement.value} -} -func (replacement OutputValueLiteral) buildRunes(store Store) walk.RuneList { - // TODO: serialise to JSON - panic("Unimplemented!") -} - -// An OutputContent which is just a rune literal -type OutputRuneLiteral struct { - rune walk.StringRuneAtom -} -func (replacement OutputRuneLiteral) buildValues(store Store) walk.ValueList { - // TODO: Try to deserialise - panic("Unimplemented!") -} -func (replacement OutputRuneLiteral) buildRunes(store Store) walk.RuneList { - return walk.RuneList {replacement.rune} -} - -// An OutputContent which is a slot that is loaded from -type OutputLoad struct { +type SubexStoreRunesEndState struct { slot int + next SubexState } -func (replacement OutputLoad) buildValues(store Store) walk.ValueList { - values, isValues := store[replacement.slot].(walk.ValueList) - if !isValues { - panic("Tried to output non-values list") - } - return values -} -func (replacement OutputLoad) buildRunes(store Store) walk.RuneList { - runes, isRunes := store[replacement.slot].(walk.RuneList) - if !isRunes { - panic("Tried to output non-runes as runes") - } - return runes +func (state SubexStoreRunesEndState) epsilon(aux auxiliaryState) []SubexBranch { + toStore, aux := aux.popOutputRunes() + aux.store = aux.store.withRunes(state.slot, toStore) + return []SubexBranch {{ + state: state.next, + aux: aux, + }} } -// Don't read in anything, just output the series of data and slots specified -type SubexOutputState struct { - content []OutputContent +type SubexOutputValueLiteralState struct { + literal walk.Scalar next SubexState } -// Given a store, return what is outputted by an epsilon transition from this state -// TODO: separate into buildValues and buildRunes -func (state SubexOutputState) build(store Store) walk.ValueList { - var result walk.ValueList - for _, part := range state.content { - result = append(result, part.buildValues(store)...) - } - return result -} -func (state SubexOutputState) eat(aux auxiliaryState, char walk.Value) []SubexBranch { - content := state.build(aux.store) - nextStates := state.next.eat(aux.topAppend(content), char) - return nextStates -} -func (state SubexOutputState) accepting(aux auxiliaryState) []OutputStack { - content := state.build(aux.store) - outputStacks := state.next.accepting(aux.topAppend(content)) - return outputStacks -} -*/ - -type OutputValue interface { - build(store Store) []walk.Value +func (state SubexOutputValueLiteralState) epsilon(aux auxiliaryState) []SubexBranch { + return []SubexBranch {{ + state: state.next, + aux: aux.topAppend([]walk.Value {state.literal}), + }} } -type OutputValueLoad struct { +type SubexOutputValueLoadState struct { slot int + next SubexState } -func (ov OutputValueLoad) build(store Store) []walk.Value { - return store.values[ov.slot] -} - -type OutputValueLiteral struct { - scalar walk.Scalar -} -func (ov OutputValueLiteral) build(store Store) []walk.Value { - return []walk.Value{ov.scalar} +func (state SubexOutputValueLoadState) epsilon(aux auxiliaryState) []SubexBranch { + return []SubexBranch {{ + state: state.next, + aux: aux.topAppend(aux.store.values[state.slot]), + }} } -type SubexOutputValuesState struct { - content []OutputValue +type SubexOutputRuneLiteralState struct { + literal rune next SubexState } -func (state SubexOutputValuesState) epsilon(aux auxiliaryState) []SubexBranch { - var content []walk.Value - for _, el := range state.content { - content = append(content, el.build(aux.store)...) - } +func (state SubexOutputRuneLiteralState) epsilon(aux auxiliaryState) []SubexBranch { return []SubexBranch {{ state: state.next, - aux: aux.topAppend(content), + aux: aux.topAppendRune([]rune {state.literal}), }} } -type OutputRune interface { -} - -type OutputRuneLoad struct { +type SubexOutputRuneLoadState struct { slot int + next SubexState } - -type OutputRuneLiteral struct { - r rune +func (state SubexOutputRuneLoadState) epsilon(aux auxiliaryState) []SubexBranch { + return []SubexBranch {{ + state: state.next, + aux: aux.topAppendRune(aux.store.runes[state.slot]), + }} } // A final state, transitions to nothing but is accepting @@ -355,6 +290,49 @@ func (state SubexConstructArrayState) epsilon(aux auxiliaryState) []SubexBranch }} } +type SubexConstructArrayValuesState struct { + next SubexState +} +func (state SubexConstructArrayValuesState) epsilon(aux auxiliaryState) []SubexBranch { + values, aux := aux.popOutput() + var array walk.ArrayValue + for _, v := range values { + array = append(array, walk.ArrayElement { + Index: 0, + Value: v, + }) + } + return []SubexBranch {{ + state: state.next, + aux: aux.topAppend([]walk.Value {array}), + }} +} + +type SubexConstructMapState struct { + next SubexState +} +func (state SubexConstructMapState) epsilon(aux auxiliaryState) []SubexBranch { + values, aux := aux.popOutput() + var m walk.MapValue + if len(values) % 2 != 0 { + panic("Tried to construct array with odd length input") + } + for i := 0; i < len(values); i += 2 { + key, isNum := values[i].(walk.StringValue) + if !isNum { + panic("Tried to construct array with non-numeric index") + } + m = append(m, walk.MapElement { + Key: string(key), + Value: values[i + 1], + }) + } + return []SubexBranch {{ + state: state.next, + aux: aux.topAppend([]walk.Value {m}), + }} +} + type SubexConstructStringState struct { next SubexState } @@ -377,12 +355,14 @@ func (state SubexConstructStringState) String() string { } type SubexIncrementNestState struct { + keys bool next SubexState } func (state SubexIncrementNestState) epsilon(aux auxiliaryState) []SubexBranch { + aux.nesting = append(aux.nesting, state.keys) return []SubexBranch {{ state: state.next, - aux: aux.incNest(), + aux: aux, }} } func (state SubexIncrementNestState) String() string { @@ -393,8 +373,10 @@ type SubexDecrementNestState struct { next SubexState } func (state SubexDecrementNestState) epsilon(aux auxiliaryState) []SubexBranch { + aux.nesting = aux.nesting[:len(aux.nesting) - 1] + // aux.nestingValue will be set in addStates return []SubexBranch {{ state: state.next, - aux: aux.decNest(), + aux: aux, }} } |