From 62b61b1ea2bc8c5e5d447ddc4529b7977439804a Mon Sep 17 00:00:00 2001 From: Charlie Stanton Date: Sun, 5 Mar 2023 09:08:01 +0000 Subject: Renames walk.Datum to walk.Atom --- subex/main.go | 66 +++++++++++++++++++++++++++-------------------------- subex/parse.go | 36 ++++++++++++++--------------- subex/subexast.go | 27 +++++++++++++++++----- subex/subexstate.go | 65 ++++++++++++++++++++++++++-------------------------- 4 files changed, 105 insertions(+), 89 deletions(-) (limited to 'subex') diff --git a/subex/main.go b/subex/main.go index 138de9a..fd59047 100644 --- a/subex/main.go +++ b/subex/main.go @@ -8,31 +8,33 @@ import ( "strings" ) -// A part of an insertion, either a datum or a slot from which to load +// A part of an insertion, either an Atom or a slot from which to load // TODO rename this type TransducerOutput interface { - // Given the current store, return the []Datum produced by the TransducerOutput - build(Store) []walk.Datum + // Given the current store, return the []Atom produced by the TransducerOutput + build(Store) []walk.Atom } -// A TransducerOutput which is just a datum literal -type TransducerReplacementRune struct { - datum walk.Datum +// A TransducerOutput which is just an Atom literal +type TransducerReplacementAtom struct { + atom walk.Atom } -func (replacement TransducerReplacementRune) build(store Store) []walk.Datum { - return []walk.Datum{replacement.datum} +func (replacement TransducerReplacementAtom) build(store Store) []walk.Atom { + return []walk.Atom{replacement.atom} } +// TODO should be a single field called slot with a type of rune // A TransducerOutput which is a slot that is loaded from type TransducerReplacementLoad struct { - datum walk.Datum + atom walk.Atom } -func (replacement TransducerReplacementLoad) build(store Store) []walk.Datum { - return store[replacement.datum] +func (replacement TransducerReplacementLoad) build(store Store) []walk.Atom { + return store[replacement.atom] } // Where slots are stored -type Store map[walk.Datum][]walk.Datum +// TODO should map from runes as only runes can be slots +type Store map[walk.Atom][]walk.Atom // Return a new store with all the data from this one func (store Store) clone() Store { newStore := make(Store) @@ -42,7 +44,7 @@ func (store Store) clone() Store { return newStore } // Return a copy of this store but with an additional slot set -func (store Store) withValue(key walk.Datum, value []walk.Datum) Store { +func (store Store) withValue(key walk.Atom, value []walk.Atom) Store { newStore := store.clone() newStore[key] = value return newStore @@ -60,17 +62,17 @@ type SubexBranch struct { // State in this branch state SubexState // Output so far in this branch - output []walk.Datum + output []walk.Atom } // Read a single character and return all the branches resulting from this branch consuming it -func (pair SubexBranch) eat(char walk.Datum) []SubexBranch { +func (pair SubexBranch) eat(char walk.Atom) []SubexBranch { states := pair.state.eat(pair.store, char) for i := range states { states[i].output = walk.ConcatData(pair.output, states[i].output) } return states } -func (pair SubexBranch) accepting() [][]walk.Datum { +func (pair SubexBranch) accepting() [][]walk.Atom { return pair.state.accepting(pair.store) } @@ -94,7 +96,7 @@ func pruneStates(states []SubexBranch) (newStates []SubexBranch) { } // Run the subex transducer -func RunTransducer(transducer SubexState, input <-chan walk.Datum) (output []walk.Datum, err bool) { +func RunTransducer(transducer SubexState, input <-chan walk.Atom) (output []walk.Atom, err bool) { states := []SubexBranch{{ state: transducer, output: nil, @@ -129,8 +131,8 @@ func Main() { program := os.Args[1] ast := Parse(program) transducer := CompileTransducer(ast) - pieces := make(chan walk.Datum) - go func(out chan<- walk.Datum, input []walk.WalkValue) { + pieces := make(chan walk.Atom) + go func(out chan<- walk.Atom, input []walk.WalkValue) { for _, value := range input { value.Pieces(out) } @@ -138,21 +140,21 @@ func Main() { }(pieces, tokens) output, err := RunTransducer(transducer, pieces) if !err { - dataIn := make(chan walk.Datum) - go func(out chan<- walk.Datum, in []walk.Datum) { - for _, datum := range in { - out<-datum + dataIn := make(chan walk.Atom) + go func(out chan<- walk.Atom, in []walk.Atom) { + for _, atom := range in { + out<-atom } close(out) }(dataIn, output) valueOut := make(chan walk.WalkValue) - go func(out chan<- walk.WalkValue, in <-chan walk.Datum) { + go func(out chan<- walk.WalkValue, in <-chan walk.Atom) { for { - datum, hasDatum := <-in - if !hasDatum { + atom, hasAtom := <-in + if !hasAtom { break } - switch v := datum.(type) { + switch v := atom.(type) { case walk.TerminalValue: out<-v continue @@ -171,22 +173,22 @@ func Main() { panic("Error! subex output an EndString before BeginString") case walk.StartString: default: - panic("Unknown datum type") + panic("Unknown atom type") } // Handle string start var builder strings.Builder loop: for { - datum, hasDatum := <-in - if !hasDatum { + atom, hasAtom := <-in + if !hasAtom { panic("Missing EndString") } - switch v := datum.(type) { + switch v := atom.(type) { case walk.EndString: break loop case rune: builder.WriteRune(v) default: - panic("Invalid datum in string") + panic("Invalid atom in string") } } out<-walk.ValueString(builder.String()) diff --git a/subex/parse.go b/subex/parse.go index 59b784d..1d64c20 100644 --- a/subex/parse.go +++ b/subex/parse.go @@ -4,7 +4,7 @@ import ( "main/walk" ) -func expectBracket(l *RuneReader, ifLeft walk.Datum, ifRight walk.Datum) walk.Datum { +func expectBracket(l *RuneReader, ifLeft walk.Atom, ifRight walk.Atom) walk.Atom { switch l.next() { case '(': return ifLeft @@ -15,8 +15,8 @@ func expectBracket(l *RuneReader, ifLeft walk.Datum, ifRight walk.Datum) walk.Da } } -// Having just read termType, read in a bracket and return the corresponding walk.Datum -func parseTerminatorDatumLiteral(termType rune, l *RuneReader) walk.Datum { +// Having just read termType, read in a bracket and return the corresponding Atom +func parseTerminatorAtomLiteral(termType rune, l *RuneReader) walk.Atom { switch termType { case '@': return expectBracket(l, walk.ArrayBegin, walk.ArrayEnd) @@ -43,21 +43,21 @@ func parseReplacement(l *RuneReader) (output []TransducerOutput) { if slot == eof { panic("Missing slot character") } - output = append(output, TransducerReplacementLoad{datum: slot}) + output = append(output, TransducerReplacementLoad{atom: slot}) case '@', '~', '#': - output = append(output, TransducerReplacementRune{datum: parseTerminatorDatumLiteral(r, l)}) + output = append(output, TransducerReplacementAtom{atom: parseTerminatorAtomLiteral(r, l)}) default: - output = append(output, TransducerReplacementRune{datum: r}) + output = append(output, TransducerReplacementAtom{atom: r}) } } return output } // Parse the contents of a range subex [] into a map -func parseRangeSubex(l *RuneReader) map[walk.Datum]walk.Datum { +func parseRangeSubex(l *RuneReader) map[walk.Atom]walk.Atom { // TODO escaping - parts := make(map[walk.Datum]walk.Datum) - var froms []walk.Datum + parts := make(map[walk.Atom]walk.Atom) + var froms []walk.Atom var hasTo bool for { fromsStart := l.next() @@ -68,9 +68,9 @@ func parseRangeSubex(l *RuneReader) map[walk.Datum]walk.Datum { hasTo = true break } else { - datum := parseTerminatorDatumLiteral(fromsStart, l) - if datum != nil { - froms = append(froms, datum) + atom := parseTerminatorAtomLiteral(fromsStart, l) + if atom != nil { + froms = append(froms, atom) continue } } @@ -91,16 +91,16 @@ func parseRangeSubex(l *RuneReader) map[walk.Datum]walk.Datum { panic("Missing from part of range expression") } - var tos []walk.Datum + var tos []walk.Atom if hasTo { for { tosStart := l.next() if tosStart == ']' { break } else { - datum := parseTerminatorDatumLiteral(tosStart, l) - if datum != nil { - tos = append(tos, datum) + atom := parseTerminatorAtomLiteral(tosStart, l) + if atom != nil { + tos = append(tos, atom) continue } } @@ -166,9 +166,9 @@ func parseSubex(l *RuneReader, minPower int) SubexAST { case '.': lhs = SubexASTCopyAny{} case '@', '#', '~': - lhs = SubexASTCopyRune{datum: parseTerminatorDatumLiteral(r, l)} + lhs = SubexASTCopyAtom{atom: parseTerminatorAtomLiteral(r, l)} default: - lhs = SubexASTCopyRune{datum: r} + lhs = SubexASTCopyAtom{atom: r} } loop: for { if minPower <= 0 { diff --git a/subex/subexast.go b/subex/subexast.go index 0afd3e3..c49f215 100644 --- a/subex/subexast.go +++ b/subex/subexast.go @@ -5,10 +5,12 @@ import ( "main/walk" ) +// A node in the AST of a subex type SubexAST interface { compileWith(next SubexState) SubexState } +// Process the first subex, then the second, splitting the input text in two type SubexASTConcat struct { first, second SubexAST } @@ -19,6 +21,7 @@ func (ast SubexASTConcat) String() string { return fmt.Sprintf("(%v)(%v)", ast.first, ast.second) } +// Processing a subex and storing the output in a slot instead of outputting it type SubexASTStore struct { match SubexAST slot rune @@ -34,6 +37,7 @@ func (ast SubexASTStore) String() string { return fmt.Sprintf("$%c(%v)", ast.slot, ast.match) } +// Try to run the first subex, if it fails then backtrack and use the second type SubexASTOr struct { first, second SubexAST } @@ -44,6 +48,7 @@ func (ast SubexASTOr) compileWith(next SubexState) SubexState { } } +// Run the content subex as many times as possible as the input is read in type SubexASTMaximise struct { content SubexAST } @@ -59,6 +64,7 @@ func (ast SubexASTMaximise) String() string { return fmt.Sprintf("(%v)*", ast.content) } +// Run the content subex as few times as possible as the input is read in type SubexASTMinimise struct { content SubexAST } @@ -74,6 +80,7 @@ func (ast SubexASTMinimise) String() string { return fmt.Sprintf("(%v)-", ast.content) } +// Run the subex as many times as possible but at least min times and at most max times type SubexASTRepeat struct { content SubexAST min, max int @@ -91,16 +98,18 @@ func (ast SubexASTRepeat) compileWith(next SubexState) SubexState { return next } -type SubexASTCopyRune struct { - datum walk.Datum +// Read in a single specific Atom and output it unchanged +type SubexASTCopyAtom struct { + atom walk.Atom } -func (ast SubexASTCopyRune) compileWith(next SubexState) SubexState { - return &SubexCopyRuneState{ - rune: ast.datum, +func (ast SubexASTCopyAtom) compileWith(next SubexState) SubexState { + return &SubexCopyAtomState{ + atom: ast.atom, next: next, } } +// Read in any single Atom and output it unchanged type SubexASTCopyAny struct {} func (ast SubexASTCopyAny) compileWith(next SubexState) SubexState { return &SubexCopyAnyState{next} @@ -109,6 +118,7 @@ func (ast SubexASTCopyAny) String() string { return "." } +// Output a series of Atoms without reading anything from input type SubexASTOutput struct { replacement []TransducerOutput } @@ -119,6 +129,7 @@ func (ast SubexASTOutput) compileWith(next SubexState) SubexState { } } +// Try to use a subex but just skip over this if it doesn't match type SubexASTTry struct { content SubexAST } @@ -129,6 +140,7 @@ func (ast SubexASTTry) compileWith(next SubexState) SubexState { } } +// Try to skip over this subex but use it should that not match type SubexASTMaybe struct { content SubexAST } @@ -139,6 +151,7 @@ func (ast SubexASTMaybe) compileWith(next SubexState) SubexState { } } +// Read in a repeated subex separated by a delimiter. Greedy type SubexASTJoin struct { content, delimiter SubexAST } @@ -155,8 +168,10 @@ func (ast SubexASTJoin) compileWith(next SubexState) SubexState { } } +// Run each input Atom through a map to produce an output Atom +// Atoms not in the map cause this to not match type SubexASTRange struct { - parts map[walk.Datum]walk.Datum + parts map[walk.Atom]walk.Atom } func (ast SubexASTRange) compileWith(next SubexState) SubexState { return &SubexRangeState { diff --git a/subex/subexstate.go b/subex/subexstate.go index 415714f..12ace42 100644 --- a/subex/subexstate.go +++ b/subex/subexstate.go @@ -6,21 +6,21 @@ import ( // A state of execution for the transducer type SubexState interface { - // Eat a datum and transition to any number of new states - eat(store Store, char walk.Datum) []SubexBranch + // Eat a Atom and transition to any number of new states + eat(store Store, char walk.Atom) []SubexBranch // Find accepting states reachable through epsilon transitions and return their outputs - accepting(store Store) [][]walk.Datum + accepting(store Store) [][]walk.Atom } // Try first, if it fails then try second type SubexGroupState struct { first, second SubexState } -func (state SubexGroupState) eat(store Store, char walk.Datum) []SubexBranch { +func (state SubexGroupState) eat(store Store, char walk.Atom) []SubexBranch { otherStore := store.clone() return append(state.first.eat(store, char), state.second.eat(otherStore, char)...) } -func (state SubexGroupState) accepting(store Store) [][]walk.Datum { +func (state SubexGroupState) accepting(store Store) [][]walk.Atom { return append(state.first.accepting(store), state.second.accepting(store)...) } @@ -30,9 +30,9 @@ type SubexStoreState struct { match SubexState slot rune next SubexState - toStore []walk.Datum + toStore []walk.Atom } -func (state SubexStoreState) eat(store Store, char walk.Datum) (nextStates []SubexBranch) { +func (state SubexStoreState) eat(store Store, char walk.Atom) (nextStates []SubexBranch) { acceptedOutputs := state.match.accepting(store) for _, acceptedOutput := range acceptedOutputs { nextStore := store.withValue(state.slot, walk.ConcatData(state.toStore, acceptedOutput)) @@ -53,7 +53,7 @@ func (state SubexStoreState) eat(store Store, char walk.Datum) (nextStates []Sub } return nextStates } -func (state SubexStoreState) accepting(store Store) (outputs [][]walk.Datum) { +func (state SubexStoreState) accepting(store Store) (outputs [][]walk.Atom) { acceptedOutputs := state.match.accepting(store) for _, acceptedOutput := range acceptedOutputs { nextStore := store.withValue(state.slot, walk.ConcatData(state.toStore, acceptedOutput)) @@ -68,14 +68,14 @@ type SubexOutputState struct { next SubexState } // Given a store, return what is outputted by an epsilon transition from this state -func (state SubexOutputState) build(store Store) []walk.Datum { - var result []walk.Datum +func (state SubexOutputState) build(store Store) []walk.Atom { + var result []walk.Atom for _, part := range state.content { result = append(result, part.build(store)...) } return result } -func (state SubexOutputState) eat(store Store, char walk.Datum) []SubexBranch { +func (state SubexOutputState) eat(store Store, char walk.Atom) []SubexBranch { content := state.build(store) nextStates := state.next.eat(store, char) for i := range nextStates { @@ -83,7 +83,7 @@ func (state SubexOutputState) eat(store Store, char walk.Datum) []SubexBranch { } return nextStates } -func (state SubexOutputState) accepting(store Store) [][]walk.Datum { +func (state SubexOutputState) accepting(store Store) [][]walk.Atom { content := state.build(store) outputs := state.next.accepting(store) for i := range outputs { @@ -94,67 +94,66 @@ func (state SubexOutputState) accepting(store Store) [][]walk.Datum { // A final state, transitions to nothing but is accepting type SubexNoneState struct {} -func (state SubexNoneState) eat(store Store, char walk.Datum) []SubexBranch { +func (state SubexNoneState) eat(store Store, char walk.Atom) []SubexBranch { return nil } -func (state SubexNoneState) accepting(store Store) [][]walk.Datum { - return [][]walk.Datum{nil} +func (state SubexNoneState) accepting(store Store) [][]walk.Atom { + return [][]walk.Atom{nil} } -// Read in a specific datum and output it -// TODO rename to better reflect datum instead of rune -type SubexCopyRuneState struct { - rune walk.Datum +// Read in a specific Atom and output it +type SubexCopyAtomState struct { + atom walk.Atom next SubexState } -func (state SubexCopyRuneState) eat(store Store, char walk.Datum) []SubexBranch { - // TODO can I compare Datum values with == ? - if char == state.rune { +func (state SubexCopyAtomState) eat(store Store, char walk.Atom) []SubexBranch { + // TODO can I compare Atom values with == ? + if char == state.atom { return []SubexBranch{{ state: state.next, - output: []walk.Datum{char}, + output: []walk.Atom{char}, store: store, }} } return nil } -func (state SubexCopyRuneState) accepting(store Store) [][]walk.Datum { +func (state SubexCopyAtomState) accepting(store Store) [][]walk.Atom { return nil } -// Read in any datum and output it +// Read in any Atom and output it type SubexCopyAnyState struct { next SubexState } -func (state SubexCopyAnyState) eat(store Store, char walk.Datum) []SubexBranch { +func (state SubexCopyAnyState) eat(store Store, char walk.Atom) []SubexBranch { return []SubexBranch{{ state: state.next, - output: []walk.Datum{char}, + output: []walk.Atom{char}, store: store, }} } -func (state SubexCopyAnyState) accepting(store Store) [][]walk.Datum { +func (state SubexCopyAnyState) accepting(store Store) [][]walk.Atom { return nil } -// Read in a datum and apply a map to generate a datum to output +// Read in an Atom and apply a map to generate an Atom to output // If the input isn't in the map transition to nothing type SubexRangeState struct { - parts map[walk.Datum]walk.Datum + parts map[walk.Atom]walk.Atom next SubexState } -func (state SubexRangeState) eat(store Store, char walk.Datum) []SubexBranch { +func (state SubexRangeState) eat(store Store, char walk.Atom) []SubexBranch { out, exists := state.parts[char] if !exists { return nil } else { return []SubexBranch{{ state: state.next, - output: []walk.Datum{out}, + output: []walk.Atom{out}, store: store, }} } } -func (state SubexRangeState) accepting(store Store) [][]walk.Datum { +func (state SubexRangeState) accepting(store Store) [][]walk.Atom { return nil } -- cgit v1.2.3