<- Back to shtanton's homepage
aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorCharlie Stanton <charlie@shtanton.xyz>2023-02-19 08:59:16 +0000
committerCharlie Stanton <charlie@shtanton.xyz>2023-02-19 08:59:16 +0000
commitfba426b3910f16c8abc6f819da3138f03e5f0b1a (patch)
tree9ce7473194a7ac4d97278cff3e95e58fd3277c72
parent3636825c64bb6c172b0858d7a08c30acfcd68bdd (diff)
downloadstred-go-fba426b3910f16c8abc6f819da3138f03e5f0b1a.tar
Introduces subex processing
Doesn't integrate it at all yet
-rw-r--r--main/main.go9
-rw-r--r--subex/lex.go34
-rw-r--r--subex/main.go114
-rw-r--r--subex/parse.go175
-rw-r--r--subex/subexast.go163
-rw-r--r--subex/subexstate.go145
6 files changed, 636 insertions, 4 deletions
diff --git a/main/main.go b/main/main.go
index 08514ab..3a701ed 100644
--- a/main/main.go
+++ b/main/main.go
@@ -6,6 +6,7 @@ import (
"fmt"
"strings"
"unicode/utf8"
+ "main/subex"
)
type PathSegment interface {}
@@ -619,13 +620,13 @@ func runTransducer(transducer TransducerState, input string) (output string, err
func main() {
if len(os.Args) != 3 {
- panic("Expected: program [input] [subex]")
+ panic("Expected: stred [input] [subex]")
}
input := os.Args[1]
program := os.Args[2]
- ast := parse(program)
- transducer := compileTransducer(ast)
- output, err := runTransducer(transducer, input)
+ ast := subex.Parse(program)
+ transducer := subex.CompileTransducer(ast)
+ output, err := subex.RunTransducer(transducer, input)
if err {
output = input
}
diff --git a/subex/lex.go b/subex/lex.go
new file mode 100644
index 0000000..f020b23
--- /dev/null
+++ b/subex/lex.go
@@ -0,0 +1,34 @@
+package subex
+
+import (
+ "unicode/utf8"
+)
+
+const eof rune = -1
+type RuneReader struct {
+ input string
+ pos, width int
+}
+func (l *RuneReader) next() rune {
+ if l.pos >= len(l.input) {
+ l.width = 0
+ return eof
+ }
+ var r rune
+ r, l.width = utf8.DecodeRuneInString(l.input[l.pos:])
+ l.pos += l.width
+ return r
+}
+func (l *RuneReader) accept(chars string) bool {
+ r := l.next()
+ for _, char := range chars {
+ if char == r {
+ return true
+ }
+ }
+ l.rewind()
+ return false
+}
+func (l *RuneReader) rewind() {
+ l.pos -= l.width
+}
diff --git a/subex/main.go b/subex/main.go
new file mode 100644
index 0000000..3ae0618
--- /dev/null
+++ b/subex/main.go
@@ -0,0 +1,114 @@
+package subex
+
+import (
+ "os"
+ "fmt"
+ "io"
+)
+
+type TransducerOutput interface {
+ build(Store) string
+}
+
+type TransducerReplacementRune rune
+func (replacement TransducerReplacementRune) build(store Store) string {
+ return string(replacement)
+}
+
+type TransducerReplacementLoad rune
+func (replacement TransducerReplacementLoad) build(store Store) string {
+ return store[rune(replacement)]
+}
+
+type Store map[rune]string
+func (store Store) clone() Store {
+ newStore := make(Store)
+ for key, val := range store {
+ newStore[key] = val
+ }
+ return newStore
+}
+func (store Store) withValue(key rune, value string) Store {
+ newStore := store.clone()
+ newStore[key] = value
+ return newStore
+}
+
+func CompileTransducer(transducerAst SubexAST) SubexState {
+ return transducerAst.compileWith(SubexNoneState{})
+}
+
+type SubexBranch struct {
+ store Store
+ state SubexState
+ output string
+}
+func (pair SubexBranch) eat(char rune) []SubexBranch {
+ states := pair.state.eat(pair.store, char)
+ for i := range states {
+ states[i].output = pair.output + states[i].output
+ }
+ return states
+}
+func (pair SubexBranch) accepting() []string {
+ return pair.state.accepting(pair.store)
+}
+
+func equalStates(left SubexBranch, right SubexBranch) bool {
+ // Only care about if they are the same pointer
+ return left.state == right.state
+}
+
+func pruneStates(states []SubexBranch) (newStates []SubexBranch) {
+ outer: for _, state := range states {
+ for _, newState := range newStates {
+ if equalStates(state, newState) {
+ continue outer
+ }
+ }
+ newStates = append(newStates, state)
+ }
+ return newStates
+}
+
+func RunTransducer(transducer SubexState, input string) (output string, err bool) {
+ states := []SubexBranch{{
+ state: transducer,
+ output: "",
+ store: make(Store),
+ }}
+ for _, char := range input {
+ fmt.Printf("Running with %d states\n", len(states))
+ var newStates []SubexBranch
+ for _, state := range states {
+ newStates = append(newStates, state.eat(char)...)
+ }
+ states = pruneStates(newStates)
+ }
+ for _, state := range states {
+ outputEnds := state.accepting()
+ for _, outputEnd := range outputEnds {
+ return state.output + outputEnd, false
+ }
+ }
+ return "", true
+}
+
+func Main() {
+ if len(os.Args) != 2 {
+ panic("Expected: program [subex]")
+ }
+ inputBytes, inputErr := io.ReadAll(os.Stdin)
+ input := string(inputBytes)
+ if inputErr != nil {
+ fmt.Println("Error reading")
+ }
+ program := os.Args[1]
+ ast := Parse(program)
+ transducer := CompileTransducer(ast)
+ output, err := RunTransducer(transducer, input)
+ if err {
+ output = input
+ }
+ fmt.Print(output)
+}
diff --git a/subex/parse.go b/subex/parse.go
new file mode 100644
index 0000000..af575eb
--- /dev/null
+++ b/subex/parse.go
@@ -0,0 +1,175 @@
+package subex
+
+func parseReplacement(l *RuneReader) (output []TransducerOutput) {
+ loop: for {
+ r := l.next()
+ switch r {
+ case eof:
+ panic("Missing closing \"")
+ case '"':
+ break loop
+ case '$':
+ slot := l.next()
+ if slot == eof {
+ panic("Missing slot character")
+ }
+ output = append(output, TransducerReplacementLoad(slot))
+ default:
+ output = append(output, TransducerReplacementRune(r))
+ }
+ }
+ return output
+}
+
+func parseRangeSubex(l *RuneReader) map[rune]rune {
+ parts := make(map[rune]rune)
+ var froms []rune
+ var hasTo bool
+ for {
+ fromsStart := l.next()
+ if fromsStart == ']' {
+ hasTo = false
+ break
+ } else if fromsStart == '=' {
+ hasTo = true
+ break
+ }
+ var fromsEnd rune
+ if l.accept("-") {
+ fromsEnd = l.next()
+ if fromsEnd == ']' || fromsEnd == '=' {
+ l.rewind()
+ fromsEnd = fromsStart
+ }
+ } else {
+ fromsEnd = fromsStart
+ }
+ for i := fromsStart; i <= fromsEnd; i += 1 {
+ froms = append(froms, i)
+ }
+ }
+ if len(froms) == 0 {
+ panic("Missing from part of range expression")
+ }
+
+ var tos []rune
+ if hasTo {
+ for {
+ tosStart := l.next()
+ if tosStart == ']' {
+ break
+ }
+ var tosEnd rune
+ if l.accept("-") {
+ tosEnd = l.next()
+ if tosEnd == ']' {
+ l.rewind()
+ tosEnd = tosStart
+ }
+ } else {
+ tosEnd = tosStart
+ }
+ for i := tosStart; i <= tosEnd; i += 1 {
+ tos = append(tos, i)
+ }
+ }
+ } else {
+ tos = froms
+ }
+ if len(tos) == 0 {
+ panic("Missing to part of range expression")
+ }
+
+ for i, from := range froms {
+ parts[from] = tos[i % len(tos)]
+ }
+ return parts
+}
+
+func parseSubex(l *RuneReader, minPower int) SubexAST {
+ var lhs SubexAST
+ r := l.next()
+ switch r {
+ case eof:
+ return nil
+ case '(':
+ lhs = parseSubex(l, 0)
+ if !l.accept(")") {
+ panic("Missing matching )")
+ }
+ case '[':
+ rangeParts := parseRangeSubex(l)
+ lhs = SubexASTRange {rangeParts}
+ case ')', '*', '-', '|', '!', '?', ';':
+ l.rewind()
+ return nil
+ case '$':
+ slot := l.next()
+ if slot == eof {
+ panic("Missing slot character")
+ }
+ match := parseSubex(l, 100)
+ if match == nil {
+ panic("Missing regex for store")
+ }
+ lhs = SubexASTStore{
+ match: match,
+ slot: slot,
+ }
+ case '"':
+ replacement := parseReplacement(l)
+ lhs = SubexASTOutput{replacement}
+ case '.':
+ lhs = SubexASTCopyAny{}
+ default:
+ lhs = SubexASTCopyRune(r)
+ }
+ loop: for {
+ if minPower <= 0 {
+ next := parseSubex(l, 1)
+ if next != nil {
+ lhs = SubexASTConcat{lhs, next}
+ continue loop
+ }
+ }
+ r := l.next()
+ switch {
+ case r == '*' && minPower <= 8:
+ lhs = SubexASTMaximise{lhs}
+ case r == '-' && minPower <= 8:
+ lhs = SubexASTMinimise{lhs}
+ case r == '!' && minPower <= 8:
+ lhs = SubexASTTry{lhs}
+ case r == '?' && minPower <= 8:
+ lhs = SubexASTMaybe{lhs}
+ case r == '|' && minPower <= 4:
+ rhs := parseSubex(l, 5)
+ if rhs == nil {
+ panic("Missing subex after |")
+ }
+ lhs = SubexASTOr{lhs, rhs}
+ case r == ';' && minPower <= 2:
+ rhs := parseSubex(l, 3)
+ if rhs == nil {
+ panic("Missing subex after ;")
+ }
+ lhs = SubexASTJoin{
+ content: lhs,
+ delimiter: rhs,
+ }
+ default:
+ l.rewind()
+ break loop
+ }
+ }
+ return lhs
+}
+
+func Parse(input string) SubexAST {
+ l := RuneReader {
+ input: input,
+ pos: 0,
+ width: 0,
+ }
+ return parseSubex(&l, 0)
+}
diff --git a/subex/subexast.go b/subex/subexast.go
new file mode 100644
index 0000000..c583245
--- /dev/null
+++ b/subex/subexast.go
@@ -0,0 +1,163 @@
+package subex
+
+import (
+ "fmt"
+)
+
+type SubexAST interface {
+ compileWith(next SubexState) SubexState
+}
+
+type SubexASTConcat struct {
+ first, second SubexAST
+}
+func (ast SubexASTConcat) compileWith(next SubexState) SubexState {
+ return ast.first.compileWith(ast.second.compileWith(next))
+}
+func (ast SubexASTConcat) String() string {
+ return fmt.Sprintf("(%v)(%v)", ast.first, ast.second)
+}
+
+type SubexASTStore struct {
+ match SubexAST
+ slot rune
+}
+func (ast SubexASTStore) compileWith(next SubexState) SubexState {
+ return &SubexStoreState {
+ match: ast.match.compileWith(&SubexNoneState{}),
+ slot: ast.slot,
+ next: next,
+ }
+}
+func (ast SubexASTStore) String() string {
+ return fmt.Sprintf("$%c(%v)", ast.slot, ast.match)
+}
+
+type SubexASTOr struct {
+ first, second SubexAST
+}
+func (ast SubexASTOr) compileWith(next SubexState) SubexState {
+ return &SubexGroupState {
+ ast.first.compileWith(next),
+ ast.second.compileWith(next),
+ }
+}
+
+type SubexASTMaximise struct {
+ content SubexAST
+}
+func (ast SubexASTMaximise) compileWith(next SubexState) SubexState {
+ state := &SubexGroupState {
+ nil,
+ next,
+ }
+ state.first = ast.content.compileWith(state)
+ return state
+}
+func (ast SubexASTMaximise) String() string {
+ return fmt.Sprintf("(%v)*", ast.content)
+}
+
+type SubexASTMinimise struct {
+ content SubexAST
+}
+func (ast SubexASTMinimise) compileWith(next SubexState) SubexState {
+ state := &SubexGroupState {
+ next,
+ nil,
+ }
+ state.second = ast.content.compileWith(state)
+ return state
+}
+func (ast SubexASTMinimise) String() string {
+ return fmt.Sprintf("(%v)-", ast.content)
+}
+
+type SubexASTRepeat struct {
+ content SubexAST
+ min, max int
+}
+func (ast SubexASTRepeat) compileWith(next SubexState) SubexState {
+ for i := ast.min; i < ast.max; i += 1 {
+ next = &SubexGroupState {
+ ast.content.compileWith(next),
+ next,
+ }
+ }
+ for i := 0; i < ast.min; i += 1 {
+ next = ast.content.compileWith(next)
+ }
+ return next
+}
+
+type SubexASTCopyRune rune
+func (ast SubexASTCopyRune) compileWith(next SubexState) SubexState {
+ return &SubexCopyRuneState{
+ rune: rune(ast),
+ next: next,
+ }
+}
+
+type SubexASTCopyAny struct {}
+func (ast SubexASTCopyAny) compileWith(next SubexState) SubexState {
+ return &SubexCopyAnyState{next}
+}
+func (ast SubexASTCopyAny) String() string {
+ return "."
+}
+
+type SubexASTOutput struct {
+ replacement []TransducerOutput
+}
+func (ast SubexASTOutput) compileWith(next SubexState) SubexState {
+ return &SubexOutputState{
+ content: ast.replacement,
+ next: next,
+ }
+}
+
+type SubexASTTry struct {
+ content SubexAST
+}
+func (ast SubexASTTry) compileWith(next SubexState) SubexState {
+ return &SubexGroupState {
+ ast.content.compileWith(next),
+ next,
+ }
+}
+
+type SubexASTMaybe struct {
+ content SubexAST
+}
+func (ast SubexASTMaybe) compileWith(next SubexState) SubexState {
+ return &SubexGroupState {
+ next,
+ ast.content.compileWith(next),
+ }
+}
+
+type SubexASTJoin struct {
+ content, delimiter SubexAST
+}
+func (ast SubexASTJoin) compileWith(next SubexState) SubexState {
+ afterContentState := &SubexGroupState {
+ nil,
+ next,
+ }
+ manyContentsState := ast.content.compileWith(afterContentState)
+ afterContentState.first = ast.delimiter.compileWith(manyContentsState)
+ return &SubexGroupState {
+ manyContentsState,
+ next,
+ }
+}
+
+type SubexASTRange struct {
+ parts map[rune]rune
+}
+func (ast SubexASTRange) compileWith(next SubexState) SubexState {
+ return &SubexRangeState {
+ parts: ast.parts,
+ next: next,
+ }
+}
diff --git a/subex/subexstate.go b/subex/subexstate.go
new file mode 100644
index 0000000..2e613e8
--- /dev/null
+++ b/subex/subexstate.go
@@ -0,0 +1,145 @@
+package subex
+
+import (
+ "strings"
+)
+
+type SubexState interface {
+ eat(store Store, char rune) []SubexBranch
+ accepting(store Store) []string
+}
+
+type SubexGroupState struct {
+ first, second SubexState
+}
+func (state SubexGroupState) eat(store Store, char rune) []SubexBranch {
+ otherStore := store.clone()
+ return append(state.first.eat(store, char), state.second.eat(otherStore, char)...)
+}
+func (state SubexGroupState) accepting(store Store) []string {
+ return append(state.first.accepting(store), state.second.accepting(store)...)
+}
+
+type SubexStoreState struct {
+ match SubexState
+ slot rune
+ next SubexState
+ toStore string
+}
+func (state SubexStoreState) eat(store Store, char rune) (nextStates []SubexBranch) {
+ acceptedOutputs := state.match.accepting(store)
+ for _, acceptedOutput := range acceptedOutputs {
+ nextStore := store.withValue(state.slot, state.toStore + acceptedOutput)
+ nextStates = append(nextStates, state.next.eat(nextStore.clone(), char)...)
+ }
+ nextMatchStates := state.match.eat(store.clone(), char)
+ for _, matchState := range nextMatchStates {
+ nextStates = append(nextStates, SubexBranch {
+ state: SubexStoreState {
+ match: matchState.state,
+ slot: state.slot,
+ next: state.next,
+ toStore: state.toStore + matchState.output,
+ },
+ output: "",
+ store: store.clone(),
+ })
+ }
+ return nextStates
+}
+func (state SubexStoreState) accepting(store Store) (outputs []string) {
+ acceptedOutputs := state.match.accepting(store)
+ for _, acceptedOutput := range acceptedOutputs {
+ nextStore := store.withValue(state.slot, state.toStore + acceptedOutput)
+ outputs = append(outputs, state.next.accepting(nextStore)...)
+ }
+ return outputs
+}
+
+type SubexOutputState struct {
+ content []TransducerOutput
+ next SubexState
+}
+func (state SubexOutputState) build(store Store) string {
+ var builder strings.Builder
+ for _, part := range state.content {
+ builder.WriteString(part.build(store))
+ }
+ return builder.String()
+}
+func (state SubexOutputState) eat(store Store, char rune) []SubexBranch {
+ content := state.build(store)
+ nextStates := state.next.eat(store, char)
+ for i := range nextStates {
+ nextStates[i].output = content + nextStates[i].output
+ }
+ return nextStates
+}
+func (state SubexOutputState) accepting(store Store) []string {
+ content := state.build(store)
+ outputs := state.next.accepting(store)
+ for i := range outputs {
+ outputs[i] = content + outputs[i]
+ }
+ return outputs
+}
+
+type SubexNoneState struct {}
+func (state SubexNoneState) eat(store Store, char rune) []SubexBranch {
+ return nil
+}
+func (state SubexNoneState) accepting(store Store) []string {
+ return []string{""}
+}
+
+type SubexCopyRuneState struct {
+ rune rune
+ next SubexState
+}
+func (state SubexCopyRuneState) eat(store Store, char rune) []SubexBranch {
+ if char == state.rune {
+ return []SubexBranch{{
+ state: state.next,
+ output: string(char),
+ store: store,
+ }}
+ }
+ return nil
+}
+func (state SubexCopyRuneState) accepting(store Store) []string {
+ return nil
+}
+
+type SubexCopyAnyState struct {
+ next SubexState
+}
+func (state SubexCopyAnyState) eat(store Store, char rune) []SubexBranch {
+ return []SubexBranch{{
+ state: state.next,
+ output: string(char),
+ store: store,
+ }}
+}
+func (state SubexCopyAnyState) accepting(store Store) []string {
+ return nil
+}
+
+type SubexRangeState struct {
+ parts map[rune]rune
+ next SubexState
+}
+func (state SubexRangeState) eat(store Store, char rune) []SubexBranch {
+ out, exists := state.parts[char]
+ if !exists {
+ return nil
+ } else {
+ return []SubexBranch{{
+ state: state.next,
+ output: string(out),
+ store: store,
+ }}
+ }
+}
+func (state SubexRangeState) accepting(store Store) []string {
+ return nil
+}