From 8cf10efe3b5a1bcc70bc6e5590ee63fd5eb00c5b Mon Sep 17 00:00:00 2001 From: Charlie Stanton Date: Wed, 19 Jul 2023 11:57:59 +0100 Subject: Huge refactor to a more value based system, doing away with terminals. Also introduces unit testing --- subex/main.go | 167 ++++++++++++++++++++++++++++++++++++++++++++++------------ 1 file changed, 133 insertions(+), 34 deletions(-) (limited to 'subex/main.go') diff --git a/subex/main.go b/subex/main.go index ebd87cb..e2cec0b 100644 --- a/subex/main.go +++ b/subex/main.go @@ -11,15 +11,15 @@ type Transducer struct { type StoreItem struct {} // Where slots are stored -type Store [][]walk.Atom +type Store []walk.OutputList // Return a new store with all the data from this one func (store Store) clone() Store { - newStore := make([][]walk.Atom, len(store)) + newStore := make([]walk.OutputList, len(store)) copy(newStore, store) return newStore } // Return a copy of this store but with an additional slot set -func (store Store) withValue(key int, value []walk.Atom) Store { +func (store Store) withValue(key int, value walk.OutputList) Store { newStore := store.clone() newStore[key] = value return newStore @@ -46,7 +46,7 @@ func CompileTransducer(transducerAst SubexAST) Transducer { nextId: 0, ids: make(map[rune]int), } - initial := transducerAst.compileWith(&SubexNoneState{}, &slotMap) + initial := transducerAst.compileWith(&SubexNoneState{}, &slotMap, false) return Transducer { storeSize: slotMap.nextId, initialState: initial, @@ -55,54 +55,119 @@ func CompileTransducer(transducerAst SubexAST) Transducer { // An immutable stack for outputting to type OutputStack struct { - head []walk.Atom + head walk.OutputList tail *OutputStack } -func (stack OutputStack) pop() ([]walk.Atom, OutputStack) { +func (stack OutputStack) pop() (walk.OutputList, OutputStack) { return stack.head, *stack.tail } -func (stack OutputStack) push(atoms []walk.Atom) OutputStack { +func (stack OutputStack) push(atoms walk.OutputList) OutputStack { return OutputStack { head: atoms, tail: &stack, } } -func (stack OutputStack) replace(atoms []walk.Atom) OutputStack { +func (stack OutputStack) replace(atoms walk.OutputList) OutputStack { return OutputStack { head: atoms, tail: stack.tail, } } -func (stack OutputStack) peek() []walk.Atom { +func (stack OutputStack) peek() walk.OutputList { return stack.head } -func topAppend(outputStack OutputStack, atoms []walk.Atom) OutputStack { - head := outputStack.peek() - return outputStack.replace(walk.ConcatData(head, atoms)) +func topAppend(outputStack OutputStack, values []walk.Value) OutputStack { + head, isValues := outputStack.peek().(walk.ValueList) + if !isValues { + panic("Tried to append a value to an output of non-values") + } + head = append(walk.ValueList{}, head...) + head = append(head, values...) + return outputStack.replace(head) } -// One branch of subex execution -type SubexBranch struct { +func topAppendRunes(outputStack OutputStack, runes walk.RuneList) OutputStack { + head, isRunes := outputStack.peek().(walk.RuneList) + if !isRunes { + panic("Tried to append runes to a non-rune list") + } + head = append(walk.RuneList{}, head...) + head = append(head, runes...) + return outputStack.replace(head) +} + +// Addition state that goes along with a subex state in an execution branch +type auxiliaryState struct { // Content of slots in this branch store Store - // State in this branch - state SubexState // The output stack. At the end of the program, whatever is on top of this will be output // States may push or pop to the stack as they wish, creating sort of a call stack that allows states to capture the output of other states outputStack OutputStack + // How deeply nested the current execution is inside of the overall value + // i.e. starts at zero, is incremented to one when entering an array + nesting int +} + +func (aux auxiliaryState) cloneStore() auxiliaryState { + aux.store = aux.store.clone() + return aux +} + +func (aux auxiliaryState) withValue(slot int, value walk.OutputList) auxiliaryState { + aux.store = aux.store.withValue(slot, value) + return aux +} + +func (aux auxiliaryState) pushOutput(data walk.OutputList) auxiliaryState { + aux.outputStack = aux.outputStack.push(data) + return aux +} + +func (aux auxiliaryState) popOutput() (walk.OutputList, auxiliaryState) { + data, output := aux.outputStack.pop() + aux.outputStack = output + return data, aux +} + +func (aux auxiliaryState) topAppend(values walk.OutputList) auxiliaryState { + switch output := values.(type) { + case walk.ValueList: + aux.outputStack = topAppend(aux.outputStack, output) + case walk.RuneList: + aux.outputStack = topAppendRunes(aux.outputStack, output) + } + return aux +} + +func (aux auxiliaryState) incNest() auxiliaryState { + aux.nesting++ + return aux +} + +func (aux auxiliaryState) decNest() auxiliaryState { + aux.nesting-- + return aux +} + +// One branch of subex execution +type SubexBranch struct { + // State in this branch + state SubexState + // Axiliary state + aux auxiliaryState } // Read a single character and return all the branches resulting from this branch consuming it -func (pair SubexBranch) eat(char walk.Atom) []SubexBranch { - return pair.state.eat(pair.store, pair.outputStack, char) +func (pair SubexBranch) eat(char walk.Edible) []SubexBranch { + return pair.state.eat(pair.aux, char) } func (pair SubexBranch) accepting() []OutputStack { - return pair.state.accepting(pair.store, pair.outputStack) + return pair.state.accepting(pair.aux) } func equalStates(left SubexBranch, right SubexBranch) bool { // Only care about if they are the same pointer - return left.state == right.state + return left.state == right.state && left.aux.nesting == right.aux.nesting } // If two branches have the same state, only the first has a chance of being successful @@ -121,33 +186,67 @@ func pruneStates(states []SubexBranch) []SubexBranch { return states[:uniqueStates] } -// Run the subex transducer -func RunTransducer(transducer Transducer, input []walk.Atom) (output []walk.Atom, err bool) { - states := []SubexBranch{{ - state: transducer.initialState, - outputStack: OutputStack { - head: nil, - tail: nil, - }, - store: make([][]walk.Atom, transducer.storeSize), - }} +func processInput(states []SubexBranch, input walk.StructureIter, nesting int) []SubexBranch { + if len(states) == 0 { + return states + } var tmp []SubexBranch newStates := make([]SubexBranch, 0, 2) - for _, piece := range input { + for { + piece := input.Next() + if piece == nil { + break + } + + // TODO: break if there are no states at this nesting level left + // TODO: What to do if there are remaining nested states after all pieces have been used? for _, state := range states { - newStates = append(newStates, state.eat(piece)...) + if state.aux.nesting == nesting { + newStates = append(newStates, state.eat(piece)...) + } else { + newStates = append(newStates, state) + } } + + structure, isStructure := piece.(walk.Structure) + if isStructure { + iter := structure.Iter() + newStates = processInput(newStates, iter, nesting + 1) + } + tmp = states states = pruneStates(newStates) newStates = tmp[:0] if len(states) == 0 { - return nil, true + return states } } + return states +} + +// Run the subex transducer +func RunTransducer(transducer Transducer, input walk.ValueList) (output []walk.Value, err bool) { + states := []SubexBranch{{ + state: transducer.initialState, + aux: auxiliaryState { + outputStack: OutputStack { + head: walk.ValueList{}, + tail: nil, + }, + store: make([]walk.OutputList, transducer.storeSize), + nesting: 0, + }, + }} + + states = processInput(states, walk.NewValueIter(input), 0) + for _, state := range states { acceptingStacks := state.accepting() for _, stack := range acceptingStacks { - output := stack.head + output, isValues := stack.head.(walk.ValueList) + if !isValues { + panic("Tried to output a non-values list") + } return output, false } } -- cgit v1.2.3