<- Back to shtanton's homepage
aboutsummaryrefslogtreecommitdiff
path: root/subex
diff options
context:
space:
mode:
authorCharlie Stanton <charlie@shtanton.xyz>2023-02-22 20:52:20 +0000
committerCharlie Stanton <charlie@shtanton.xyz>2023-02-22 20:52:20 +0000
commita2636b27fdadb2b7951fa35fe301e8e6b41fc28a (patch)
tree35561560d067c9987a626c4a77ea3f688547a015 /subex
parent3bd45dc49a35b82dcc4ae93796c3e152d799bc0b (diff)
downloadstred-go-a2636b27fdadb2b7951fa35fe301e8e6b41fc28a.tar
Modify subex to take JSON split into "data"
Currently no way to reassemble the data on the other side Much of the potential data cannot be interacted with meaningfully, only the string functionality is implemented Should rename data to something else
Diffstat (limited to 'subex')
-rw-r--r--subex/main.go89
-rw-r--r--subex/parse.go41
-rw-r--r--subex/subexast.go9
-rw-r--r--subex/subexstate.go83
4 files changed, 137 insertions, 85 deletions
diff --git a/subex/main.go b/subex/main.go
index 3ae0618..9dbe5df 100644
--- a/subex/main.go
+++ b/subex/main.go
@@ -3,24 +3,36 @@ package subex
import (
"os"
"fmt"
- "io"
+ "bufio"
+ "main/walk"
)
+// A part of an insertion, either a datum or a slot from which to load
+// TODO rename this
type TransducerOutput interface {
- build(Store) string
+ // Given the current store, return the []Datum produced by the TransducerOutput
+ build(Store) []walk.Datum
}
-type TransducerReplacementRune rune
-func (replacement TransducerReplacementRune) build(store Store) string {
- return string(replacement)
+// A TransducerOutput which is just a datum literal
+type TransducerReplacementRune struct {
+ datum walk.Datum
+}
+func (replacement TransducerReplacementRune) build(store Store) []walk.Datum {
+ return []walk.Datum{replacement.datum}
}
-type TransducerReplacementLoad rune
-func (replacement TransducerReplacementLoad) build(store Store) string {
- return store[rune(replacement)]
+// A TransducerOutput which is a slot that is loaded from
+type TransducerReplacementLoad struct {
+ datum walk.Datum
+}
+func (replacement TransducerReplacementLoad) build(store Store) []walk.Datum {
+ return store[replacement.datum]
}
-type Store map[rune]string
+// Where slots are stored
+type Store map[walk.Datum][]walk.Datum
+// Return a new store with all the data from this one
func (store Store) clone() Store {
newStore := make(Store)
for key, val := range store {
@@ -28,29 +40,36 @@ func (store Store) clone() Store {
}
return newStore
}
-func (store Store) withValue(key rune, value string) Store {
+// Return a copy of this store but with an additional slot set
+func (store Store) withValue(key walk.Datum, value []walk.Datum) Store {
newStore := store.clone()
newStore[key] = value
return newStore
}
+// Compile the SubexAST into a transducer SubexState that can be run
func CompileTransducer(transducerAst SubexAST) SubexState {
return transducerAst.compileWith(SubexNoneState{})
}
+// One branch of subex execution
type SubexBranch struct {
+ // Content of slots in this branch
store Store
+ // State in this branch
state SubexState
- output string
+ // Output so far in this branch
+ output []walk.Datum
}
-func (pair SubexBranch) eat(char rune) []SubexBranch {
+// Read a single character and return all the branches resulting from this branch consuming it
+func (pair SubexBranch) eat(char walk.Datum) []SubexBranch {
states := pair.state.eat(pair.store, char)
for i := range states {
- states[i].output = pair.output + states[i].output
+ states[i].output = append(pair.output, states[i].output...)
}
return states
}
-func (pair SubexBranch) accepting() []string {
+func (pair SubexBranch) accepting() [][]walk.Datum {
return pair.state.accepting(pair.store)
}
@@ -59,6 +78,8 @@ func equalStates(left SubexBranch, right SubexBranch) bool {
return left.state == right.state
}
+// If two branches have the same state, only the first has a chance of being successful
+// This function removes all of the pointless execution branches to save execution time
func pruneStates(states []SubexBranch) (newStates []SubexBranch) {
outer: for _, state := range states {
for _, newState := range newStates {
@@ -71,44 +92,54 @@ func pruneStates(states []SubexBranch) (newStates []SubexBranch) {
return newStates
}
-func RunTransducer(transducer SubexState, input string) (output string, err bool) {
+// Run the subex transducer
+func RunTransducer(transducer SubexState, input <-chan walk.Datum) (output []walk.Datum, err bool) {
states := []SubexBranch{{
state: transducer,
- output: "",
+ output: nil,
store: make(Store),
}}
- for _, char := range input {
- fmt.Printf("Running with %d states\n", len(states))
+ for piece := range input {
var newStates []SubexBranch
for _, state := range states {
- newStates = append(newStates, state.eat(char)...)
+ newStates = append(newStates, state.eat(piece)...)
}
states = pruneStates(newStates)
}
for _, state := range states {
outputEnds := state.accepting()
for _, outputEnd := range outputEnds {
- return state.output + outputEnd, false
+ return append(state.output, outputEnd...), false
}
}
- return "", true
+ return nil, true
}
func Main() {
if len(os.Args) != 2 {
panic("Expected: program [subex]")
}
- inputBytes, inputErr := io.ReadAll(os.Stdin)
- input := string(inputBytes)
- if inputErr != nil {
- fmt.Println("Error reading")
+ stdin := bufio.NewReader(os.Stdin);
+ jsonStream := walk.Json(stdin);
+ var tokens []walk.WalkValue;
+ for token := range jsonStream {
+ tokens = append(tokens, token.Value);
}
program := os.Args[1]
ast := Parse(program)
transducer := CompileTransducer(ast)
- output, err := RunTransducer(transducer, input)
- if err {
- output = input
+ pieces := make(chan walk.Datum)
+ go func(out chan<- walk.Datum, input []walk.WalkValue) {
+ for _, value := range input {
+ value.Pieces(out)
+ }
+ close(out)
+ }(pieces, tokens)
+ output, err := RunTransducer(transducer, pieces)
+ // TODO recombine data into values and then convert into items with empty paths
+ if !err {
+ fmt.Print(output)
+ } else {
+ fmt.Print("Error")
}
- fmt.Print(output)
}
diff --git a/subex/parse.go b/subex/parse.go
index af575eb..0c79dc3 100644
--- a/subex/parse.go
+++ b/subex/parse.go
@@ -1,5 +1,9 @@
package subex
+import (
+ "main/walk"
+)
+
func parseReplacement(l *RuneReader) (output []TransducerOutput) {
loop: for {
r := l.next()
@@ -13,17 +17,18 @@ func parseReplacement(l *RuneReader) (output []TransducerOutput) {
if slot == eof {
panic("Missing slot character")
}
- output = append(output, TransducerReplacementLoad(slot))
+ output = append(output, TransducerReplacementLoad{datum: slot})
default:
- output = append(output, TransducerReplacementRune(r))
+ output = append(output, TransducerReplacementRune{datum: r})
}
}
return output
}
-func parseRangeSubex(l *RuneReader) map[rune]rune {
- parts := make(map[rune]rune)
- var froms []rune
+// Parse the contents of a range subex [] into a map
+func parseRangeSubex(l *RuneReader) map[walk.Datum]walk.Datum {
+ parts := make(map[walk.Datum]walk.Datum)
+ var froms []walk.Datum
var hasTo bool
for {
fromsStart := l.next()
@@ -34,43 +39,41 @@ func parseRangeSubex(l *RuneReader) map[rune]rune {
hasTo = true
break
}
- var fromsEnd rune
if l.accept("-") {
- fromsEnd = l.next()
+ fromsEnd := l.next()
if fromsEnd == ']' || fromsEnd == '=' {
l.rewind()
fromsEnd = fromsStart
}
+ for i := fromsStart; i <= fromsEnd; i += 1 {
+ froms = append(froms, i)
+ }
} else {
- fromsEnd = fromsStart
- }
- for i := fromsStart; i <= fromsEnd; i += 1 {
- froms = append(froms, i)
+ froms = append(froms, fromsStart)
}
}
if len(froms) == 0 {
panic("Missing from part of range expression")
}
- var tos []rune
+ var tos []walk.Datum
if hasTo {
for {
tosStart := l.next()
if tosStart == ']' {
break
}
- var tosEnd rune
if l.accept("-") {
- tosEnd = l.next()
+ tosEnd := l.next()
if tosEnd == ']' {
l.rewind()
tosEnd = tosStart
}
+ for i := tosStart; i <= tosEnd; i += 1 {
+ tos = append(tos, i)
+ }
} else {
- tosEnd = tosStart
- }
- for i := tosStart; i <= tosEnd; i += 1 {
- tos = append(tos, i)
+ tos = append(tos, tosStart)
}
}
} else {
@@ -122,7 +125,7 @@ func parseSubex(l *RuneReader, minPower int) SubexAST {
case '.':
lhs = SubexASTCopyAny{}
default:
- lhs = SubexASTCopyRune(r)
+ lhs = SubexASTCopyRune{datum: r}
}
loop: for {
if minPower <= 0 {
diff --git a/subex/subexast.go b/subex/subexast.go
index c583245..0afd3e3 100644
--- a/subex/subexast.go
+++ b/subex/subexast.go
@@ -2,6 +2,7 @@ package subex
import (
"fmt"
+ "main/walk"
)
type SubexAST interface {
@@ -90,10 +91,12 @@ func (ast SubexASTRepeat) compileWith(next SubexState) SubexState {
return next
}
-type SubexASTCopyRune rune
+type SubexASTCopyRune struct {
+ datum walk.Datum
+}
func (ast SubexASTCopyRune) compileWith(next SubexState) SubexState {
return &SubexCopyRuneState{
- rune: rune(ast),
+ rune: ast.datum,
next: next,
}
}
@@ -153,7 +156,7 @@ func (ast SubexASTJoin) compileWith(next SubexState) SubexState {
}
type SubexASTRange struct {
- parts map[rune]rune
+ parts map[walk.Datum]walk.Datum
}
func (ast SubexASTRange) compileWith(next SubexState) SubexState {
return &SubexRangeState {
diff --git a/subex/subexstate.go b/subex/subexstate.go
index 2e613e8..9e0d61a 100644
--- a/subex/subexstate.go
+++ b/subex/subexstate.go
@@ -1,35 +1,41 @@
package subex
import (
- "strings"
+ "main/walk"
)
+// A state of execution for the transducer
type SubexState interface {
- eat(store Store, char rune) []SubexBranch
- accepting(store Store) []string
+ // Eat a datum and transition to any number of new states
+ eat(store Store, char walk.Datum) []SubexBranch
+ // Find accepting states reachable through epsilon transitions and return their outputs
+ accepting(store Store) [][]walk.Datum
}
+// Try first, if it fails then try second
type SubexGroupState struct {
first, second SubexState
}
-func (state SubexGroupState) eat(store Store, char rune) []SubexBranch {
+func (state SubexGroupState) eat(store Store, char walk.Datum) []SubexBranch {
otherStore := store.clone()
return append(state.first.eat(store, char), state.second.eat(otherStore, char)...)
}
-func (state SubexGroupState) accepting(store Store) []string {
+func (state SubexGroupState) accepting(store Store) [][]walk.Datum {
return append(state.first.accepting(store), state.second.accepting(store)...)
}
+// Run the match machine and store the output in a slot for later use
+// Output nothing
type SubexStoreState struct {
match SubexState
slot rune
next SubexState
- toStore string
+ toStore []walk.Datum
}
-func (state SubexStoreState) eat(store Store, char rune) (nextStates []SubexBranch) {
+func (state SubexStoreState) eat(store Store, char walk.Datum) (nextStates []SubexBranch) {
acceptedOutputs := state.match.accepting(store)
for _, acceptedOutput := range acceptedOutputs {
- nextStore := store.withValue(state.slot, state.toStore + acceptedOutput)
+ nextStore := store.withValue(state.slot, append(state.toStore, acceptedOutput...))
nextStates = append(nextStates, state.next.eat(nextStore.clone(), char)...)
}
nextMatchStates := state.match.eat(store.clone(), char)
@@ -39,107 +45,116 @@ func (state SubexStoreState) eat(store Store, char rune) (nextStates []SubexBran
match: matchState.state,
slot: state.slot,
next: state.next,
- toStore: state.toStore + matchState.output,
+ toStore: append(state.toStore, matchState.output...),
},
- output: "",
+ output: nil,
store: store.clone(),
})
}
return nextStates
}
-func (state SubexStoreState) accepting(store Store) (outputs []string) {
+func (state SubexStoreState) accepting(store Store) (outputs [][]walk.Datum) {
acceptedOutputs := state.match.accepting(store)
for _, acceptedOutput := range acceptedOutputs {
- nextStore := store.withValue(state.slot, state.toStore + acceptedOutput)
+ nextStore := store.withValue(state.slot, append(state.toStore, acceptedOutput...))
outputs = append(outputs, state.next.accepting(nextStore)...)
}
return outputs
}
+// Don't read in anything, just output the series of data and slots specified
type SubexOutputState struct {
content []TransducerOutput
next SubexState
}
-func (state SubexOutputState) build(store Store) string {
- var builder strings.Builder
+// Given a store, return what is outputted by an epsilon transition from this state
+func (state SubexOutputState) build(store Store) []walk.Datum {
+ var result []walk.Datum
for _, part := range state.content {
- builder.WriteString(part.build(store))
+ result = append(result, part.build(store)...)
}
- return builder.String()
+ return result
}
-func (state SubexOutputState) eat(store Store, char rune) []SubexBranch {
+func (state SubexOutputState) eat(store Store, char walk.Datum) []SubexBranch {
content := state.build(store)
nextStates := state.next.eat(store, char)
for i := range nextStates {
- nextStates[i].output = content + nextStates[i].output
+ nextStates[i].output = append(content, nextStates[i].output...)
}
return nextStates
}
-func (state SubexOutputState) accepting(store Store) []string {
+func (state SubexOutputState) accepting(store Store) [][]walk.Datum {
content := state.build(store)
outputs := state.next.accepting(store)
for i := range outputs {
- outputs[i] = content + outputs[i]
+ outputs[i] = append(content, outputs[i]...)
}
return outputs
}
+// A final state, transitions to nothing but is accepting
type SubexNoneState struct {}
-func (state SubexNoneState) eat(store Store, char rune) []SubexBranch {
+func (state SubexNoneState) eat(store Store, char walk.Datum) []SubexBranch {
return nil
}
-func (state SubexNoneState) accepting(store Store) []string {
- return []string{""}
+func (state SubexNoneState) accepting(store Store) [][]walk.Datum {
+ return [][]walk.Datum{nil}
}
+// Read in a specific datum and output it
+// TODO rename to better reflect datum instead of rune
type SubexCopyRuneState struct {
- rune rune
+ rune walk.Datum
next SubexState
}
-func (state SubexCopyRuneState) eat(store Store, char rune) []SubexBranch {
+func (state SubexCopyRuneState) eat(store Store, char walk.Datum) []SubexBranch {
+ // TODO can I compare Datum values with == ?
if char == state.rune {
return []SubexBranch{{
state: state.next,
- output: string(char),
+ output: []walk.Datum{char},
store: store,
}}
}
return nil
}
-func (state SubexCopyRuneState) accepting(store Store) []string {
+func (state SubexCopyRuneState) accepting(store Store) [][]walk.Datum {
return nil
}
+// Read in any datum and output it
type SubexCopyAnyState struct {
next SubexState
}
-func (state SubexCopyAnyState) eat(store Store, char rune) []SubexBranch {
+func (state SubexCopyAnyState) eat(store Store, char walk.Datum) []SubexBranch {
return []SubexBranch{{
state: state.next,
- output: string(char),
+ output: []walk.Datum{char},
store: store,
}}
}
-func (state SubexCopyAnyState) accepting(store Store) []string {
+func (state SubexCopyAnyState) accepting(store Store) [][]walk.Datum {
return nil
}
+// Read in a datum and apply a map to generate a datum to output
+// If the input isn't in the map transition to nothing
type SubexRangeState struct {
- parts map[rune]rune
+ parts map[walk.Datum]walk.Datum
next SubexState
}
-func (state SubexRangeState) eat(store Store, char rune) []SubexBranch {
+func (state SubexRangeState) eat(store Store, char walk.Datum) []SubexBranch {
out, exists := state.parts[char]
if !exists {
return nil
} else {
return []SubexBranch{{
state: state.next,
- output: string(out),
+ output: []walk.Datum{out},
store: store,
}}
}
}
-func (state SubexRangeState) accepting(store Store) []string {
+func (state SubexRangeState) accepting(store Store) [][]walk.Datum {
return nil
}