From 184368ae155fcdd50dde5c2e4b0c87e0d69acdd7 Mon Sep 17 00:00:00 2001 From: Charlie Stanton Date: Wed, 19 Apr 2023 11:18:22 +0100 Subject: Replaces the parent/child implementation for operators like store and sum with an output stack Previously a store state was a parent of another state machine that it would run inside of itself in order to capture the output to be stored. This was limited as the greedyness of the child would not be transferred to the parent. The new implementation gives states more control over the output state and turns it into a stack. By pushing to the stack before the child and popping afterwards, all of the child's output can be retrieved while the child is very much part of the complete machine. --- subex/subexstate.go | 211 ++++++++++++++++------------------------------------ 1 file changed, 63 insertions(+), 148 deletions(-) (limited to 'subex/subexstate.go') diff --git a/subex/subexstate.go b/subex/subexstate.go index 855d44a..dbc8340 100644 --- a/subex/subexstate.go +++ b/subex/subexstate.go @@ -9,106 +9,48 @@ import ( // A state of execution for the transducer type SubexState interface { // Eat a Atom and transition to any number of new states - eat(store Store, char walk.Atom) []SubexBranch + eat(store Store, outputStack OutputStack, char walk.Atom) []SubexBranch // Find accepting states reachable through epsilon transitions and return their outputs - accepting(store Store) [][]walk.Atom -} - -type SubexParentState interface { - SubexState - // Get the child - child() SubexState - // The child outputted output, what should be passed as accumulator data into the next version of the parent state - nextAcc(output []walk.Atom) interface{} - // Given the final accumulated data, run the next state after the parent, immutably borrows store - feedNext(acc interface{}, store Store, char walk.Atom) []SubexBranch - // Given the final accumulated data, get the accepted outputs from the next state, immutably borrows store - acceptNext(acc interface{}, store Store) [][]walk.Atom - // Given the next child and next accumulator data, generate the next parent - nextParent(child SubexState, acc interface{}) SubexState + accepting(store Store, outputStack OutputStack) []OutputStack } // Try first, if it fails then try second type SubexGroupState struct { first, second SubexState } -func (state SubexGroupState) eat(store Store, char walk.Atom) []SubexBranch { +func (state SubexGroupState) eat(store Store, outputStack OutputStack, char walk.Atom) []SubexBranch { otherStore := store.clone() - return append(state.first.eat(store, char), state.second.eat(otherStore, char)...) + return append(state.first.eat(store, outputStack, char), state.second.eat(otherStore, outputStack, char)...) } -func (state SubexGroupState) accepting(store Store) [][]walk.Atom { - return append(state.first.accepting(store), state.second.accepting(store)...) +func (state SubexGroupState) accepting(store Store, outputStack OutputStack) []OutputStack { + return append(state.first.accepting(store, outputStack), state.second.accepting(store, outputStack)...) } -// Helper so states that are actually collections of states distinguished by a child state -// can pass eaten characters on to their children more easily -func feedChild(parent SubexParentState, store Store, char walk.Atom) (nextStates []SubexBranch) { - child := parent.child() - accepteds := child.accepting(store) - for _, accepted := range accepteds { - acc := parent.nextAcc(accepted) - nextStates = append(nextStates, parent.feedNext(acc, store, char)...) - } - nextChildren := child.eat(store, char) - for _, nextChild := range nextChildren { - acc := parent.nextAcc(nextChild.output) - nextStates = append(nextStates, SubexBranch{ - state: parent.nextParent(nextChild.state, acc), - output: nil, - store: nextChild.store, - }) - } - return nextStates +// Push an empty value onto the OutputStack and epsilon transition to next +// This value will be added to until SubexStoreEndState is reached when it will be stored +type SubexStoreBeginState struct { + next SubexState } - -func acceptChild(parent SubexParentState, store Store) (outputs [][]walk.Atom) { - child := parent.child() - accepteds := child.accepting(store) - for _, accepted := range accepteds { - acc := parent.nextAcc(accepted) - outputs = append(outputs, parent.acceptNext(acc, store)...) - } - return outputs +func (state SubexStoreBeginState) eat(store Store, outputStack OutputStack, char walk.Atom) []SubexBranch { + return state.next.eat(store, outputStack.push(nil), char) +} +func (state SubexStoreBeginState) accepting(store Store, outputStack OutputStack) []OutputStack { + return state.next.accepting(store, outputStack.push(nil)) } -// Run the match machine and store the output in a slot for later use -// Output nothing -type SubexStoreState struct { - match SubexState +// Pop the top of the OutputStack which contains the stuff outputted since the start of the store +// This outputted data gets stored in a slot +type SubexStoreEndState struct { slot rune next SubexState - toStore []walk.Atom -} -func (state SubexStoreState) child() SubexState { - return state.match -} -func (state SubexStoreState) nextAcc(output []walk.Atom) interface{} { - return walk.ConcatData(state.toStore, output) -} -func (state SubexStoreState) feedNext(acc interface{}, store Store, char walk.Atom) []SubexBranch { - toStore := acc.([]walk.Atom) - nextStore := store.withValue(state.slot, toStore) - return state.next.eat(nextStore, char) -} -func (state SubexStoreState) acceptNext(acc interface{}, store Store) [][]walk.Atom { - toStore := acc.([]walk.Atom) - nextStore := store.withValue(state.slot, toStore) - return state.next.accepting(nextStore) -} -func (state SubexStoreState) nextParent(match SubexState, acc interface{}) SubexState { - toStore := acc.([]walk.Atom) - return &SubexStoreState { - match: match, - slot: state.slot, - next: state.next, - toStore: toStore, - } } -func (state SubexStoreState) eat(store Store, char walk.Atom) (nextStates []SubexBranch) { - return feedChild(state, store, char) +func (state SubexStoreEndState) eat(store Store, outputStack OutputStack, char walk.Atom) []SubexBranch { + toStore, newStack := outputStack.pop() + return state.next.eat(store.withValue(state.slot, toStore), newStack, char) } -func (state SubexStoreState) accepting(store Store) (outputs [][]walk.Atom) { - return acceptChild(state, store) +func (state SubexStoreEndState) accepting(store Store, outputStack OutputStack) []OutputStack { + toStore, newStack := outputStack.pop() + return state.next.accepting(store.withValue(state.slot, toStore), newStack) } // A part of an output literal, either an Atom or a slot from which to load @@ -146,38 +88,32 @@ func (state SubexOutputState) build(store Store) []walk.Atom { } return result } -func (state SubexOutputState) eat(store Store, char walk.Atom) []SubexBranch { +func (state SubexOutputState) eat(store Store, outputStack OutputStack, char walk.Atom) []SubexBranch { content := state.build(store) - nextStates := state.next.eat(store, char) - for i := range nextStates { - nextStates[i].output = walk.ConcatData(content, nextStates[i].output) - } + nextStates := state.next.eat(store, topAppend(outputStack, content), char) return nextStates } -func (state SubexOutputState) accepting(store Store) [][]walk.Atom { +func (state SubexOutputState) accepting(store Store, outputStack OutputStack) []OutputStack { content := state.build(store) - outputs := state.next.accepting(store) - for i := range outputs { - outputs[i] = walk.ConcatData(content, outputs[i]) - } - return outputs + outputStacks := state.next.accepting(store, topAppend(outputStack, content)) + return outputStacks } // A final state, transitions to nothing but is accepting type SubexNoneState struct {} -func (state SubexNoneState) eat(store Store, char walk.Atom) []SubexBranch { +func (state SubexNoneState) eat(store Store, outputStack OutputStack, char walk.Atom) []SubexBranch { return nil } -func (state SubexNoneState) accepting(store Store) [][]walk.Atom { - return [][]walk.Atom{nil} +func (state SubexNoneState) accepting(store Store, outputStack OutputStack) []OutputStack { + return []OutputStack{outputStack} } // A dead end state, handy for making internals work nicer but technically redundant type SubexDeadState struct {} -func (state SubexDeadState) eat(store Store, char walk.Atom) []SubexBranch { +func (state SubexDeadState) eat(store Store, outputStack OutputStack, char walk.Atom) []SubexBranch { return nil } -func (state SubexDeadState) accepting (store Store) [][]walk.Atom { +func (state SubexDeadState) accepting (store Store, outputStack OutputStack) []OutputStack { return nil } @@ -186,18 +122,18 @@ type SubexCopyAtomState struct { atom walk.Atom next SubexState } -func (state SubexCopyAtomState) eat(store Store, char walk.Atom) []SubexBranch { +func (state SubexCopyAtomState) eat(store Store, outputStack OutputStack, char walk.Atom) []SubexBranch { // TODO can I compare Atom values with == ? if char == state.atom { return []SubexBranch{{ state: state.next, - output: []walk.Atom{char}, + outputStack: topAppend(outputStack, []walk.Atom{char}), store: store, }} } return nil } -func (state SubexCopyAtomState) accepting(store Store) [][]walk.Atom { +func (state SubexCopyAtomState) accepting(store Store, outputStack OutputStack) []OutputStack { return nil } @@ -205,14 +141,14 @@ func (state SubexCopyAtomState) accepting(store Store) [][]walk.Atom { type SubexCopyAnyState struct { next SubexState } -func (state SubexCopyAnyState) eat(store Store, char walk.Atom) []SubexBranch { +func (state SubexCopyAnyState) eat(store Store, outputStack OutputStack, char walk.Atom) []SubexBranch { return []SubexBranch{{ state: state.next, - output: []walk.Atom{char}, + outputStack: topAppend(outputStack, []walk.Atom{char}), store: store, }} } -func (state SubexCopyAnyState) accepting(store Store) [][]walk.Atom { +func (state SubexCopyAnyState) accepting(store Store, outputStack OutputStack) []OutputStack { return nil } @@ -222,19 +158,19 @@ type SubexRangeState struct { parts map[walk.Atom]walk.Atom next SubexState } -func (state SubexRangeState) eat(store Store, char walk.Atom) []SubexBranch { +func (state SubexRangeState) eat(store Store, outputStack OutputStack, char walk.Atom) []SubexBranch { out, exists := state.parts[char] if !exists { return nil } else { return []SubexBranch{{ state: state.next, - output: []walk.Atom{out}, + outputStack: topAppend(outputStack, []walk.Atom{out}), store: store, }} } } -func (state SubexRangeState) accepting(store Store) [][]walk.Atom { +func (state SubexRangeState) accepting(store Store, outputStack OutputStack) []OutputStack { return nil } @@ -267,56 +203,35 @@ func sumValues(atoms []walk.Atom) (walk.ValueNumber, error) { return walk.ValueNumber(sum), nil } -// Run the inputState machine and sum any values output, output the sum -// Cast non numbers into numbers, branch dies if it is not castable -type SubexSumState struct { - inputState SubexState +// At the start of a sum, just pushes to the OutputStack allowing the end to capture what was output in the middle +// Tries to cast values to numbers to sum them and rejects if values are not castable +type SubexSumBeginState struct { next SubexState - acc []walk.Atom } -func (state SubexSumState) child() SubexState { - return state.inputState +func (state SubexSumBeginState) eat(store Store, outputStack OutputStack, char walk.Atom) []SubexBranch { + return state.next.eat(store, outputStack.push(nil), char) } -func (state SubexSumState) nextAcc(output []walk.Atom) interface{} { - return walk.ConcatData(state.acc, output) +func (state SubexSumBeginState) accepting(store Store, outputStack OutputStack) []OutputStack { + return state.next.accepting(store, outputStack.push(nil)) } -func (state SubexSumState) feedNext(acc interface{}, store Store, char walk.Atom) []SubexBranch { - childOutput := acc.([]walk.Atom) - total, err := sumValues(childOutput) + +// At the end of a sum, pops what has been output since the start, sums and outputs it +type SubexSumEndState struct { + next SubexState +} +func (state SubexSumEndState) eat(store Store, outputStack OutputStack, char walk.Atom) []SubexBranch { + toSum, newStack := outputStack.pop() + sum, err := sumValues(toSum) if err != nil { return nil } - output := []walk.Atom{total} - nextStates := state.next.eat(store.clone(), char) - for i := range nextStates { - nextStates[i].output = walk.ConcatData(output, nextStates[i].output) - } - return nextStates + return state.next.eat(store, topAppend(newStack, []walk.Atom{sum}), char) } -func (state SubexSumState) acceptNext(acc interface{}, store Store) [][]walk.Atom { - childOutput := acc.([]walk.Atom) - total, err := sumValues(childOutput) +func (state SubexSumEndState) accepting(store Store, outputStack OutputStack) []OutputStack { + toSum, newStack := outputStack.pop() + sum, err := sumValues(toSum) if err != nil { return nil } - output := []walk.Atom{total} - outputs := state.next.accepting(store.clone()) - for i := range outputs { - outputs[i] = walk.ConcatData(output, outputs[i]) - } - return outputs -} -func (state SubexSumState) nextParent(child SubexState, acc interface{}) SubexState { - childOutput := acc.([]walk.Atom) - return &SubexSumState { - inputState: child, - next: state.next, - acc: childOutput, - } -} -func (state SubexSumState) eat(store Store, char walk.Atom) (nextStates []SubexBranch) { - return feedChild(state, store, char) -} -func (state SubexSumState) accepting(store Store) (outputs [][]walk.Atom) { - return acceptChild(state, store) + return state.next.accepting(store, topAppend(newStack, []walk.Atom{sum})) } -- cgit v1.2.3