From a2636b27fdadb2b7951fa35fe301e8e6b41fc28a Mon Sep 17 00:00:00 2001 From: Charlie Stanton Date: Wed, 22 Feb 2023 20:52:20 +0000 Subject: Modify subex to take JSON split into "data" Currently no way to reassemble the data on the other side Much of the potential data cannot be interacted with meaningfully, only the string functionality is implemented Should rename data to something else --- subex/main.go | 89 ++++++++++++++++++++++++++++++++++++----------------- subex/parse.go | 41 ++++++++++++------------ subex/subexast.go | 9 ++++-- subex/subexstate.go | 83 +++++++++++++++++++++++++++++-------------------- 4 files changed, 137 insertions(+), 85 deletions(-) (limited to 'subex') diff --git a/subex/main.go b/subex/main.go index 3ae0618..9dbe5df 100644 --- a/subex/main.go +++ b/subex/main.go @@ -3,24 +3,36 @@ package subex import ( "os" "fmt" - "io" + "bufio" + "main/walk" ) +// A part of an insertion, either a datum or a slot from which to load +// TODO rename this type TransducerOutput interface { - build(Store) string + // Given the current store, return the []Datum produced by the TransducerOutput + build(Store) []walk.Datum } -type TransducerReplacementRune rune -func (replacement TransducerReplacementRune) build(store Store) string { - return string(replacement) +// A TransducerOutput which is just a datum literal +type TransducerReplacementRune struct { + datum walk.Datum +} +func (replacement TransducerReplacementRune) build(store Store) []walk.Datum { + return []walk.Datum{replacement.datum} } -type TransducerReplacementLoad rune -func (replacement TransducerReplacementLoad) build(store Store) string { - return store[rune(replacement)] +// A TransducerOutput which is a slot that is loaded from +type TransducerReplacementLoad struct { + datum walk.Datum +} +func (replacement TransducerReplacementLoad) build(store Store) []walk.Datum { + return store[replacement.datum] } -type Store map[rune]string +// Where slots are stored +type Store map[walk.Datum][]walk.Datum +// Return a new store with all the data from this one func (store Store) clone() Store { newStore := make(Store) for key, val := range store { @@ -28,29 +40,36 @@ func (store Store) clone() Store { } return newStore } -func (store Store) withValue(key rune, value string) Store { +// Return a copy of this store but with an additional slot set +func (store Store) withValue(key walk.Datum, value []walk.Datum) Store { newStore := store.clone() newStore[key] = value return newStore } +// Compile the SubexAST into a transducer SubexState that can be run func CompileTransducer(transducerAst SubexAST) SubexState { return transducerAst.compileWith(SubexNoneState{}) } +// One branch of subex execution type SubexBranch struct { + // Content of slots in this branch store Store + // State in this branch state SubexState - output string + // Output so far in this branch + output []walk.Datum } -func (pair SubexBranch) eat(char rune) []SubexBranch { +// Read a single character and return all the branches resulting from this branch consuming it +func (pair SubexBranch) eat(char walk.Datum) []SubexBranch { states := pair.state.eat(pair.store, char) for i := range states { - states[i].output = pair.output + states[i].output + states[i].output = append(pair.output, states[i].output...) } return states } -func (pair SubexBranch) accepting() []string { +func (pair SubexBranch) accepting() [][]walk.Datum { return pair.state.accepting(pair.store) } @@ -59,6 +78,8 @@ func equalStates(left SubexBranch, right SubexBranch) bool { return left.state == right.state } +// If two branches have the same state, only the first has a chance of being successful +// This function removes all of the pointless execution branches to save execution time func pruneStates(states []SubexBranch) (newStates []SubexBranch) { outer: for _, state := range states { for _, newState := range newStates { @@ -71,44 +92,54 @@ func pruneStates(states []SubexBranch) (newStates []SubexBranch) { return newStates } -func RunTransducer(transducer SubexState, input string) (output string, err bool) { +// Run the subex transducer +func RunTransducer(transducer SubexState, input <-chan walk.Datum) (output []walk.Datum, err bool) { states := []SubexBranch{{ state: transducer, - output: "", + output: nil, store: make(Store), }} - for _, char := range input { - fmt.Printf("Running with %d states\n", len(states)) + for piece := range input { var newStates []SubexBranch for _, state := range states { - newStates = append(newStates, state.eat(char)...) + newStates = append(newStates, state.eat(piece)...) } states = pruneStates(newStates) } for _, state := range states { outputEnds := state.accepting() for _, outputEnd := range outputEnds { - return state.output + outputEnd, false + return append(state.output, outputEnd...), false } } - return "", true + return nil, true } func Main() { if len(os.Args) != 2 { panic("Expected: program [subex]") } - inputBytes, inputErr := io.ReadAll(os.Stdin) - input := string(inputBytes) - if inputErr != nil { - fmt.Println("Error reading") + stdin := bufio.NewReader(os.Stdin); + jsonStream := walk.Json(stdin); + var tokens []walk.WalkValue; + for token := range jsonStream { + tokens = append(tokens, token.Value); } program := os.Args[1] ast := Parse(program) transducer := CompileTransducer(ast) - output, err := RunTransducer(transducer, input) - if err { - output = input + pieces := make(chan walk.Datum) + go func(out chan<- walk.Datum, input []walk.WalkValue) { + for _, value := range input { + value.Pieces(out) + } + close(out) + }(pieces, tokens) + output, err := RunTransducer(transducer, pieces) + // TODO recombine data into values and then convert into items with empty paths + if !err { + fmt.Print(output) + } else { + fmt.Print("Error") } - fmt.Print(output) } diff --git a/subex/parse.go b/subex/parse.go index af575eb..0c79dc3 100644 --- a/subex/parse.go +++ b/subex/parse.go @@ -1,5 +1,9 @@ package subex +import ( + "main/walk" +) + func parseReplacement(l *RuneReader) (output []TransducerOutput) { loop: for { r := l.next() @@ -13,17 +17,18 @@ func parseReplacement(l *RuneReader) (output []TransducerOutput) { if slot == eof { panic("Missing slot character") } - output = append(output, TransducerReplacementLoad(slot)) + output = append(output, TransducerReplacementLoad{datum: slot}) default: - output = append(output, TransducerReplacementRune(r)) + output = append(output, TransducerReplacementRune{datum: r}) } } return output } -func parseRangeSubex(l *RuneReader) map[rune]rune { - parts := make(map[rune]rune) - var froms []rune +// Parse the contents of a range subex [] into a map +func parseRangeSubex(l *RuneReader) map[walk.Datum]walk.Datum { + parts := make(map[walk.Datum]walk.Datum) + var froms []walk.Datum var hasTo bool for { fromsStart := l.next() @@ -34,43 +39,41 @@ func parseRangeSubex(l *RuneReader) map[rune]rune { hasTo = true break } - var fromsEnd rune if l.accept("-") { - fromsEnd = l.next() + fromsEnd := l.next() if fromsEnd == ']' || fromsEnd == '=' { l.rewind() fromsEnd = fromsStart } + for i := fromsStart; i <= fromsEnd; i += 1 { + froms = append(froms, i) + } } else { - fromsEnd = fromsStart - } - for i := fromsStart; i <= fromsEnd; i += 1 { - froms = append(froms, i) + froms = append(froms, fromsStart) } } if len(froms) == 0 { panic("Missing from part of range expression") } - var tos []rune + var tos []walk.Datum if hasTo { for { tosStart := l.next() if tosStart == ']' { break } - var tosEnd rune if l.accept("-") { - tosEnd = l.next() + tosEnd := l.next() if tosEnd == ']' { l.rewind() tosEnd = tosStart } + for i := tosStart; i <= tosEnd; i += 1 { + tos = append(tos, i) + } } else { - tosEnd = tosStart - } - for i := tosStart; i <= tosEnd; i += 1 { - tos = append(tos, i) + tos = append(tos, tosStart) } } } else { @@ -122,7 +125,7 @@ func parseSubex(l *RuneReader, minPower int) SubexAST { case '.': lhs = SubexASTCopyAny{} default: - lhs = SubexASTCopyRune(r) + lhs = SubexASTCopyRune{datum: r} } loop: for { if minPower <= 0 { diff --git a/subex/subexast.go b/subex/subexast.go index c583245..0afd3e3 100644 --- a/subex/subexast.go +++ b/subex/subexast.go @@ -2,6 +2,7 @@ package subex import ( "fmt" + "main/walk" ) type SubexAST interface { @@ -90,10 +91,12 @@ func (ast SubexASTRepeat) compileWith(next SubexState) SubexState { return next } -type SubexASTCopyRune rune +type SubexASTCopyRune struct { + datum walk.Datum +} func (ast SubexASTCopyRune) compileWith(next SubexState) SubexState { return &SubexCopyRuneState{ - rune: rune(ast), + rune: ast.datum, next: next, } } @@ -153,7 +156,7 @@ func (ast SubexASTJoin) compileWith(next SubexState) SubexState { } type SubexASTRange struct { - parts map[rune]rune + parts map[walk.Datum]walk.Datum } func (ast SubexASTRange) compileWith(next SubexState) SubexState { return &SubexRangeState { diff --git a/subex/subexstate.go b/subex/subexstate.go index 2e613e8..9e0d61a 100644 --- a/subex/subexstate.go +++ b/subex/subexstate.go @@ -1,35 +1,41 @@ package subex import ( - "strings" + "main/walk" ) +// A state of execution for the transducer type SubexState interface { - eat(store Store, char rune) []SubexBranch - accepting(store Store) []string + // Eat a datum and transition to any number of new states + eat(store Store, char walk.Datum) []SubexBranch + // Find accepting states reachable through epsilon transitions and return their outputs + accepting(store Store) [][]walk.Datum } +// Try first, if it fails then try second type SubexGroupState struct { first, second SubexState } -func (state SubexGroupState) eat(store Store, char rune) []SubexBranch { +func (state SubexGroupState) eat(store Store, char walk.Datum) []SubexBranch { otherStore := store.clone() return append(state.first.eat(store, char), state.second.eat(otherStore, char)...) } -func (state SubexGroupState) accepting(store Store) []string { +func (state SubexGroupState) accepting(store Store) [][]walk.Datum { return append(state.first.accepting(store), state.second.accepting(store)...) } +// Run the match machine and store the output in a slot for later use +// Output nothing type SubexStoreState struct { match SubexState slot rune next SubexState - toStore string + toStore []walk.Datum } -func (state SubexStoreState) eat(store Store, char rune) (nextStates []SubexBranch) { +func (state SubexStoreState) eat(store Store, char walk.Datum) (nextStates []SubexBranch) { acceptedOutputs := state.match.accepting(store) for _, acceptedOutput := range acceptedOutputs { - nextStore := store.withValue(state.slot, state.toStore + acceptedOutput) + nextStore := store.withValue(state.slot, append(state.toStore, acceptedOutput...)) nextStates = append(nextStates, state.next.eat(nextStore.clone(), char)...) } nextMatchStates := state.match.eat(store.clone(), char) @@ -39,107 +45,116 @@ func (state SubexStoreState) eat(store Store, char rune) (nextStates []SubexBran match: matchState.state, slot: state.slot, next: state.next, - toStore: state.toStore + matchState.output, + toStore: append(state.toStore, matchState.output...), }, - output: "", + output: nil, store: store.clone(), }) } return nextStates } -func (state SubexStoreState) accepting(store Store) (outputs []string) { +func (state SubexStoreState) accepting(store Store) (outputs [][]walk.Datum) { acceptedOutputs := state.match.accepting(store) for _, acceptedOutput := range acceptedOutputs { - nextStore := store.withValue(state.slot, state.toStore + acceptedOutput) + nextStore := store.withValue(state.slot, append(state.toStore, acceptedOutput...)) outputs = append(outputs, state.next.accepting(nextStore)...) } return outputs } +// Don't read in anything, just output the series of data and slots specified type SubexOutputState struct { content []TransducerOutput next SubexState } -func (state SubexOutputState) build(store Store) string { - var builder strings.Builder +// Given a store, return what is outputted by an epsilon transition from this state +func (state SubexOutputState) build(store Store) []walk.Datum { + var result []walk.Datum for _, part := range state.content { - builder.WriteString(part.build(store)) + result = append(result, part.build(store)...) } - return builder.String() + return result } -func (state SubexOutputState) eat(store Store, char rune) []SubexBranch { +func (state SubexOutputState) eat(store Store, char walk.Datum) []SubexBranch { content := state.build(store) nextStates := state.next.eat(store, char) for i := range nextStates { - nextStates[i].output = content + nextStates[i].output + nextStates[i].output = append(content, nextStates[i].output...) } return nextStates } -func (state SubexOutputState) accepting(store Store) []string { +func (state SubexOutputState) accepting(store Store) [][]walk.Datum { content := state.build(store) outputs := state.next.accepting(store) for i := range outputs { - outputs[i] = content + outputs[i] + outputs[i] = append(content, outputs[i]...) } return outputs } +// A final state, transitions to nothing but is accepting type SubexNoneState struct {} -func (state SubexNoneState) eat(store Store, char rune) []SubexBranch { +func (state SubexNoneState) eat(store Store, char walk.Datum) []SubexBranch { return nil } -func (state SubexNoneState) accepting(store Store) []string { - return []string{""} +func (state SubexNoneState) accepting(store Store) [][]walk.Datum { + return [][]walk.Datum{nil} } +// Read in a specific datum and output it +// TODO rename to better reflect datum instead of rune type SubexCopyRuneState struct { - rune rune + rune walk.Datum next SubexState } -func (state SubexCopyRuneState) eat(store Store, char rune) []SubexBranch { +func (state SubexCopyRuneState) eat(store Store, char walk.Datum) []SubexBranch { + // TODO can I compare Datum values with == ? if char == state.rune { return []SubexBranch{{ state: state.next, - output: string(char), + output: []walk.Datum{char}, store: store, }} } return nil } -func (state SubexCopyRuneState) accepting(store Store) []string { +func (state SubexCopyRuneState) accepting(store Store) [][]walk.Datum { return nil } +// Read in any datum and output it type SubexCopyAnyState struct { next SubexState } -func (state SubexCopyAnyState) eat(store Store, char rune) []SubexBranch { +func (state SubexCopyAnyState) eat(store Store, char walk.Datum) []SubexBranch { return []SubexBranch{{ state: state.next, - output: string(char), + output: []walk.Datum{char}, store: store, }} } -func (state SubexCopyAnyState) accepting(store Store) []string { +func (state SubexCopyAnyState) accepting(store Store) [][]walk.Datum { return nil } +// Read in a datum and apply a map to generate a datum to output +// If the input isn't in the map transition to nothing type SubexRangeState struct { - parts map[rune]rune + parts map[walk.Datum]walk.Datum next SubexState } -func (state SubexRangeState) eat(store Store, char rune) []SubexBranch { +func (state SubexRangeState) eat(store Store, char walk.Datum) []SubexBranch { out, exists := state.parts[char] if !exists { return nil } else { return []SubexBranch{{ state: state.next, - output: string(out), + output: []walk.Datum{out}, store: store, }} } } -func (state SubexRangeState) accepting(store Store) []string { +func (state SubexRangeState) accepting(store Store) [][]walk.Datum { return nil } -- cgit v1.2.3