<- Back to shtanton's homepage
aboutsummaryrefslogtreecommitdiff
path: root/subex
diff options
context:
space:
mode:
Diffstat (limited to 'subex')
-rw-r--r--subex/main.go66
-rw-r--r--subex/parse.go36
-rw-r--r--subex/subexast.go27
-rw-r--r--subex/subexstate.go65
4 files changed, 105 insertions, 89 deletions
diff --git a/subex/main.go b/subex/main.go
index 138de9a..fd59047 100644
--- a/subex/main.go
+++ b/subex/main.go
@@ -8,31 +8,33 @@ import (
"strings"
)
-// A part of an insertion, either a datum or a slot from which to load
+// A part of an insertion, either an Atom or a slot from which to load
// TODO rename this
type TransducerOutput interface {
- // Given the current store, return the []Datum produced by the TransducerOutput
- build(Store) []walk.Datum
+ // Given the current store, return the []Atom produced by the TransducerOutput
+ build(Store) []walk.Atom
}
-// A TransducerOutput which is just a datum literal
-type TransducerReplacementRune struct {
- datum walk.Datum
+// A TransducerOutput which is just an Atom literal
+type TransducerReplacementAtom struct {
+ atom walk.Atom
}
-func (replacement TransducerReplacementRune) build(store Store) []walk.Datum {
- return []walk.Datum{replacement.datum}
+func (replacement TransducerReplacementAtom) build(store Store) []walk.Atom {
+ return []walk.Atom{replacement.atom}
}
+// TODO should be a single field called slot with a type of rune
// A TransducerOutput which is a slot that is loaded from
type TransducerReplacementLoad struct {
- datum walk.Datum
+ atom walk.Atom
}
-func (replacement TransducerReplacementLoad) build(store Store) []walk.Datum {
- return store[replacement.datum]
+func (replacement TransducerReplacementLoad) build(store Store) []walk.Atom {
+ return store[replacement.atom]
}
// Where slots are stored
-type Store map[walk.Datum][]walk.Datum
+// TODO should map from runes as only runes can be slots
+type Store map[walk.Atom][]walk.Atom
// Return a new store with all the data from this one
func (store Store) clone() Store {
newStore := make(Store)
@@ -42,7 +44,7 @@ func (store Store) clone() Store {
return newStore
}
// Return a copy of this store but with an additional slot set
-func (store Store) withValue(key walk.Datum, value []walk.Datum) Store {
+func (store Store) withValue(key walk.Atom, value []walk.Atom) Store {
newStore := store.clone()
newStore[key] = value
return newStore
@@ -60,17 +62,17 @@ type SubexBranch struct {
// State in this branch
state SubexState
// Output so far in this branch
- output []walk.Datum
+ output []walk.Atom
}
// Read a single character and return all the branches resulting from this branch consuming it
-func (pair SubexBranch) eat(char walk.Datum) []SubexBranch {
+func (pair SubexBranch) eat(char walk.Atom) []SubexBranch {
states := pair.state.eat(pair.store, char)
for i := range states {
states[i].output = walk.ConcatData(pair.output, states[i].output)
}
return states
}
-func (pair SubexBranch) accepting() [][]walk.Datum {
+func (pair SubexBranch) accepting() [][]walk.Atom {
return pair.state.accepting(pair.store)
}
@@ -94,7 +96,7 @@ func pruneStates(states []SubexBranch) (newStates []SubexBranch) {
}
// Run the subex transducer
-func RunTransducer(transducer SubexState, input <-chan walk.Datum) (output []walk.Datum, err bool) {
+func RunTransducer(transducer SubexState, input <-chan walk.Atom) (output []walk.Atom, err bool) {
states := []SubexBranch{{
state: transducer,
output: nil,
@@ -129,8 +131,8 @@ func Main() {
program := os.Args[1]
ast := Parse(program)
transducer := CompileTransducer(ast)
- pieces := make(chan walk.Datum)
- go func(out chan<- walk.Datum, input []walk.WalkValue) {
+ pieces := make(chan walk.Atom)
+ go func(out chan<- walk.Atom, input []walk.WalkValue) {
for _, value := range input {
value.Pieces(out)
}
@@ -138,21 +140,21 @@ func Main() {
}(pieces, tokens)
output, err := RunTransducer(transducer, pieces)
if !err {
- dataIn := make(chan walk.Datum)
- go func(out chan<- walk.Datum, in []walk.Datum) {
- for _, datum := range in {
- out<-datum
+ dataIn := make(chan walk.Atom)
+ go func(out chan<- walk.Atom, in []walk.Atom) {
+ for _, atom := range in {
+ out<-atom
}
close(out)
}(dataIn, output)
valueOut := make(chan walk.WalkValue)
- go func(out chan<- walk.WalkValue, in <-chan walk.Datum) {
+ go func(out chan<- walk.WalkValue, in <-chan walk.Atom) {
for {
- datum, hasDatum := <-in
- if !hasDatum {
+ atom, hasAtom := <-in
+ if !hasAtom {
break
}
- switch v := datum.(type) {
+ switch v := atom.(type) {
case walk.TerminalValue:
out<-v
continue
@@ -171,22 +173,22 @@ func Main() {
panic("Error! subex output an EndString before BeginString")
case walk.StartString:
default:
- panic("Unknown datum type")
+ panic("Unknown atom type")
}
// Handle string start
var builder strings.Builder
loop: for {
- datum, hasDatum := <-in
- if !hasDatum {
+ atom, hasAtom := <-in
+ if !hasAtom {
panic("Missing EndString")
}
- switch v := datum.(type) {
+ switch v := atom.(type) {
case walk.EndString:
break loop
case rune:
builder.WriteRune(v)
default:
- panic("Invalid datum in string")
+ panic("Invalid atom in string")
}
}
out<-walk.ValueString(builder.String())
diff --git a/subex/parse.go b/subex/parse.go
index 59b784d..1d64c20 100644
--- a/subex/parse.go
+++ b/subex/parse.go
@@ -4,7 +4,7 @@ import (
"main/walk"
)
-func expectBracket(l *RuneReader, ifLeft walk.Datum, ifRight walk.Datum) walk.Datum {
+func expectBracket(l *RuneReader, ifLeft walk.Atom, ifRight walk.Atom) walk.Atom {
switch l.next() {
case '(':
return ifLeft
@@ -15,8 +15,8 @@ func expectBracket(l *RuneReader, ifLeft walk.Datum, ifRight walk.Datum) walk.Da
}
}
-// Having just read termType, read in a bracket and return the corresponding walk.Datum
-func parseTerminatorDatumLiteral(termType rune, l *RuneReader) walk.Datum {
+// Having just read termType, read in a bracket and return the corresponding Atom
+func parseTerminatorAtomLiteral(termType rune, l *RuneReader) walk.Atom {
switch termType {
case '@':
return expectBracket(l, walk.ArrayBegin, walk.ArrayEnd)
@@ -43,21 +43,21 @@ func parseReplacement(l *RuneReader) (output []TransducerOutput) {
if slot == eof {
panic("Missing slot character")
}
- output = append(output, TransducerReplacementLoad{datum: slot})
+ output = append(output, TransducerReplacementLoad{atom: slot})
case '@', '~', '#':
- output = append(output, TransducerReplacementRune{datum: parseTerminatorDatumLiteral(r, l)})
+ output = append(output, TransducerReplacementAtom{atom: parseTerminatorAtomLiteral(r, l)})
default:
- output = append(output, TransducerReplacementRune{datum: r})
+ output = append(output, TransducerReplacementAtom{atom: r})
}
}
return output
}
// Parse the contents of a range subex [] into a map
-func parseRangeSubex(l *RuneReader) map[walk.Datum]walk.Datum {
+func parseRangeSubex(l *RuneReader) map[walk.Atom]walk.Atom {
// TODO escaping
- parts := make(map[walk.Datum]walk.Datum)
- var froms []walk.Datum
+ parts := make(map[walk.Atom]walk.Atom)
+ var froms []walk.Atom
var hasTo bool
for {
fromsStart := l.next()
@@ -68,9 +68,9 @@ func parseRangeSubex(l *RuneReader) map[walk.Datum]walk.Datum {
hasTo = true
break
} else {
- datum := parseTerminatorDatumLiteral(fromsStart, l)
- if datum != nil {
- froms = append(froms, datum)
+ atom := parseTerminatorAtomLiteral(fromsStart, l)
+ if atom != nil {
+ froms = append(froms, atom)
continue
}
}
@@ -91,16 +91,16 @@ func parseRangeSubex(l *RuneReader) map[walk.Datum]walk.Datum {
panic("Missing from part of range expression")
}
- var tos []walk.Datum
+ var tos []walk.Atom
if hasTo {
for {
tosStart := l.next()
if tosStart == ']' {
break
} else {
- datum := parseTerminatorDatumLiteral(tosStart, l)
- if datum != nil {
- tos = append(tos, datum)
+ atom := parseTerminatorAtomLiteral(tosStart, l)
+ if atom != nil {
+ tos = append(tos, atom)
continue
}
}
@@ -166,9 +166,9 @@ func parseSubex(l *RuneReader, minPower int) SubexAST {
case '.':
lhs = SubexASTCopyAny{}
case '@', '#', '~':
- lhs = SubexASTCopyRune{datum: parseTerminatorDatumLiteral(r, l)}
+ lhs = SubexASTCopyAtom{atom: parseTerminatorAtomLiteral(r, l)}
default:
- lhs = SubexASTCopyRune{datum: r}
+ lhs = SubexASTCopyAtom{atom: r}
}
loop: for {
if minPower <= 0 {
diff --git a/subex/subexast.go b/subex/subexast.go
index 0afd3e3..c49f215 100644
--- a/subex/subexast.go
+++ b/subex/subexast.go
@@ -5,10 +5,12 @@ import (
"main/walk"
)
+// A node in the AST of a subex
type SubexAST interface {
compileWith(next SubexState) SubexState
}
+// Process the first subex, then the second, splitting the input text in two
type SubexASTConcat struct {
first, second SubexAST
}
@@ -19,6 +21,7 @@ func (ast SubexASTConcat) String() string {
return fmt.Sprintf("(%v)(%v)", ast.first, ast.second)
}
+// Processing a subex and storing the output in a slot instead of outputting it
type SubexASTStore struct {
match SubexAST
slot rune
@@ -34,6 +37,7 @@ func (ast SubexASTStore) String() string {
return fmt.Sprintf("$%c(%v)", ast.slot, ast.match)
}
+// Try to run the first subex, if it fails then backtrack and use the second
type SubexASTOr struct {
first, second SubexAST
}
@@ -44,6 +48,7 @@ func (ast SubexASTOr) compileWith(next SubexState) SubexState {
}
}
+// Run the content subex as many times as possible as the input is read in
type SubexASTMaximise struct {
content SubexAST
}
@@ -59,6 +64,7 @@ func (ast SubexASTMaximise) String() string {
return fmt.Sprintf("(%v)*", ast.content)
}
+// Run the content subex as few times as possible as the input is read in
type SubexASTMinimise struct {
content SubexAST
}
@@ -74,6 +80,7 @@ func (ast SubexASTMinimise) String() string {
return fmt.Sprintf("(%v)-", ast.content)
}
+// Run the subex as many times as possible but at least min times and at most max times
type SubexASTRepeat struct {
content SubexAST
min, max int
@@ -91,16 +98,18 @@ func (ast SubexASTRepeat) compileWith(next SubexState) SubexState {
return next
}
-type SubexASTCopyRune struct {
- datum walk.Datum
+// Read in a single specific Atom and output it unchanged
+type SubexASTCopyAtom struct {
+ atom walk.Atom
}
-func (ast SubexASTCopyRune) compileWith(next SubexState) SubexState {
- return &SubexCopyRuneState{
- rune: ast.datum,
+func (ast SubexASTCopyAtom) compileWith(next SubexState) SubexState {
+ return &SubexCopyAtomState{
+ atom: ast.atom,
next: next,
}
}
+// Read in any single Atom and output it unchanged
type SubexASTCopyAny struct {}
func (ast SubexASTCopyAny) compileWith(next SubexState) SubexState {
return &SubexCopyAnyState{next}
@@ -109,6 +118,7 @@ func (ast SubexASTCopyAny) String() string {
return "."
}
+// Output a series of Atoms without reading anything from input
type SubexASTOutput struct {
replacement []TransducerOutput
}
@@ -119,6 +129,7 @@ func (ast SubexASTOutput) compileWith(next SubexState) SubexState {
}
}
+// Try to use a subex but just skip over this if it doesn't match
type SubexASTTry struct {
content SubexAST
}
@@ -129,6 +140,7 @@ func (ast SubexASTTry) compileWith(next SubexState) SubexState {
}
}
+// Try to skip over this subex but use it should that not match
type SubexASTMaybe struct {
content SubexAST
}
@@ -139,6 +151,7 @@ func (ast SubexASTMaybe) compileWith(next SubexState) SubexState {
}
}
+// Read in a repeated subex separated by a delimiter. Greedy
type SubexASTJoin struct {
content, delimiter SubexAST
}
@@ -155,8 +168,10 @@ func (ast SubexASTJoin) compileWith(next SubexState) SubexState {
}
}
+// Run each input Atom through a map to produce an output Atom
+// Atoms not in the map cause this to not match
type SubexASTRange struct {
- parts map[walk.Datum]walk.Datum
+ parts map[walk.Atom]walk.Atom
}
func (ast SubexASTRange) compileWith(next SubexState) SubexState {
return &SubexRangeState {
diff --git a/subex/subexstate.go b/subex/subexstate.go
index 415714f..12ace42 100644
--- a/subex/subexstate.go
+++ b/subex/subexstate.go
@@ -6,21 +6,21 @@ import (
// A state of execution for the transducer
type SubexState interface {
- // Eat a datum and transition to any number of new states
- eat(store Store, char walk.Datum) []SubexBranch
+ // Eat a Atom and transition to any number of new states
+ eat(store Store, char walk.Atom) []SubexBranch
// Find accepting states reachable through epsilon transitions and return their outputs
- accepting(store Store) [][]walk.Datum
+ accepting(store Store) [][]walk.Atom
}
// Try first, if it fails then try second
type SubexGroupState struct {
first, second SubexState
}
-func (state SubexGroupState) eat(store Store, char walk.Datum) []SubexBranch {
+func (state SubexGroupState) eat(store Store, char walk.Atom) []SubexBranch {
otherStore := store.clone()
return append(state.first.eat(store, char), state.second.eat(otherStore, char)...)
}
-func (state SubexGroupState) accepting(store Store) [][]walk.Datum {
+func (state SubexGroupState) accepting(store Store) [][]walk.Atom {
return append(state.first.accepting(store), state.second.accepting(store)...)
}
@@ -30,9 +30,9 @@ type SubexStoreState struct {
match SubexState
slot rune
next SubexState
- toStore []walk.Datum
+ toStore []walk.Atom
}
-func (state SubexStoreState) eat(store Store, char walk.Datum) (nextStates []SubexBranch) {
+func (state SubexStoreState) eat(store Store, char walk.Atom) (nextStates []SubexBranch) {
acceptedOutputs := state.match.accepting(store)
for _, acceptedOutput := range acceptedOutputs {
nextStore := store.withValue(state.slot, walk.ConcatData(state.toStore, acceptedOutput))
@@ -53,7 +53,7 @@ func (state SubexStoreState) eat(store Store, char walk.Datum) (nextStates []Sub
}
return nextStates
}
-func (state SubexStoreState) accepting(store Store) (outputs [][]walk.Datum) {
+func (state SubexStoreState) accepting(store Store) (outputs [][]walk.Atom) {
acceptedOutputs := state.match.accepting(store)
for _, acceptedOutput := range acceptedOutputs {
nextStore := store.withValue(state.slot, walk.ConcatData(state.toStore, acceptedOutput))
@@ -68,14 +68,14 @@ type SubexOutputState struct {
next SubexState
}
// Given a store, return what is outputted by an epsilon transition from this state
-func (state SubexOutputState) build(store Store) []walk.Datum {
- var result []walk.Datum
+func (state SubexOutputState) build(store Store) []walk.Atom {
+ var result []walk.Atom
for _, part := range state.content {
result = append(result, part.build(store)...)
}
return result
}
-func (state SubexOutputState) eat(store Store, char walk.Datum) []SubexBranch {
+func (state SubexOutputState) eat(store Store, char walk.Atom) []SubexBranch {
content := state.build(store)
nextStates := state.next.eat(store, char)
for i := range nextStates {
@@ -83,7 +83,7 @@ func (state SubexOutputState) eat(store Store, char walk.Datum) []SubexBranch {
}
return nextStates
}
-func (state SubexOutputState) accepting(store Store) [][]walk.Datum {
+func (state SubexOutputState) accepting(store Store) [][]walk.Atom {
content := state.build(store)
outputs := state.next.accepting(store)
for i := range outputs {
@@ -94,67 +94,66 @@ func (state SubexOutputState) accepting(store Store) [][]walk.Datum {
// A final state, transitions to nothing but is accepting
type SubexNoneState struct {}
-func (state SubexNoneState) eat(store Store, char walk.Datum) []SubexBranch {
+func (state SubexNoneState) eat(store Store, char walk.Atom) []SubexBranch {
return nil
}
-func (state SubexNoneState) accepting(store Store) [][]walk.Datum {
- return [][]walk.Datum{nil}
+func (state SubexNoneState) accepting(store Store) [][]walk.Atom {
+ return [][]walk.Atom{nil}
}
-// Read in a specific datum and output it
-// TODO rename to better reflect datum instead of rune
-type SubexCopyRuneState struct {
- rune walk.Datum
+// Read in a specific Atom and output it
+type SubexCopyAtomState struct {
+ atom walk.Atom
next SubexState
}
-func (state SubexCopyRuneState) eat(store Store, char walk.Datum) []SubexBranch {
- // TODO can I compare Datum values with == ?
- if char == state.rune {
+func (state SubexCopyAtomState) eat(store Store, char walk.Atom) []SubexBranch {
+ // TODO can I compare Atom values with == ?
+ if char == state.atom {
return []SubexBranch{{
state: state.next,
- output: []walk.Datum{char},
+ output: []walk.Atom{char},
store: store,
}}
}
return nil
}
-func (state SubexCopyRuneState) accepting(store Store) [][]walk.Datum {
+func (state SubexCopyAtomState) accepting(store Store) [][]walk.Atom {
return nil
}
-// Read in any datum and output it
+// Read in any Atom and output it
type SubexCopyAnyState struct {
next SubexState
}
-func (state SubexCopyAnyState) eat(store Store, char walk.Datum) []SubexBranch {
+func (state SubexCopyAnyState) eat(store Store, char walk.Atom) []SubexBranch {
return []SubexBranch{{
state: state.next,
- output: []walk.Datum{char},
+ output: []walk.Atom{char},
store: store,
}}
}
-func (state SubexCopyAnyState) accepting(store Store) [][]walk.Datum {
+func (state SubexCopyAnyState) accepting(store Store) [][]walk.Atom {
return nil
}
-// Read in a datum and apply a map to generate a datum to output
+// Read in an Atom and apply a map to generate an Atom to output
// If the input isn't in the map transition to nothing
type SubexRangeState struct {
- parts map[walk.Datum]walk.Datum
+ parts map[walk.Atom]walk.Atom
next SubexState
}
-func (state SubexRangeState) eat(store Store, char walk.Datum) []SubexBranch {
+func (state SubexRangeState) eat(store Store, char walk.Atom) []SubexBranch {
out, exists := state.parts[char]
if !exists {
return nil
} else {
return []SubexBranch{{
state: state.next,
- output: []walk.Datum{out},
+ output: []walk.Atom{out},
store: store,
}}
}
}
-func (state SubexRangeState) accepting(store Store) [][]walk.Datum {
+func (state SubexRangeState) accepting(store Store) [][]walk.Atom {
return nil
}