<- Back to shtanton's homepage
summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorCharlie Stanton <charlie@shtanton.xyz>2022-09-21 21:05:34 +0100
committerCharlie Stanton <charlie@shtanton.xyz>2022-09-21 21:05:34 +0100
commit0a8690993d572a50b95dd4f1c1903ed00ddb9c2b (patch)
tree2ab207544c88ff19308e22c8b79c3ea349c97faa
downloadsubex-0a8690993d572a50b95dd4f1c1903ed00ddb9c2b.tar
Initial commit
Parses and executes substitute expressions (subexes) So far subex has the following operations: - Concatenation of a and b with ab - Or with | - Repeat maximally with * - Repeat minimally with - - Copy a specific character 'a' - Copy any character '.' - Store text matching a regex into slot 's': `$s(regex)` - Output text in "" including loading from slots with '$' Regexes support all the same operations as subexes minus storing and outputting This first implementation gives very little thought to efficiency Example: ./main 'according to all known laws of aviation' '$1(.-)$m(( .* )| ).*"$m$1"' This swaps the first and last words of the input string
-rw-r--r--go.mod3
-rw-r--r--main/lex.go34
-rw-r--r--main/main.go86
-rw-r--r--main/parse.go141
-rw-r--r--main/regexast.go72
-rw-r--r--main/regexstate.go48
-rw-r--r--main/subexast.go117
-rw-r--r--main/subexstate.go123
8 files changed, 624 insertions, 0 deletions
diff --git a/go.mod b/go.mod
new file mode 100644
index 0000000..6967dfd
--- /dev/null
+++ b/go.mod
@@ -0,0 +1,3 @@
+module main
+
+go 1.18
diff --git a/main/lex.go b/main/lex.go
new file mode 100644
index 0000000..c08f402
--- /dev/null
+++ b/main/lex.go
@@ -0,0 +1,34 @@
+package main
+
+import (
+ "unicode/utf8"
+)
+
+const eof rune = -1
+type RuneReader struct {
+ input string
+ pos, width int
+}
+func (l *RuneReader) next() rune {
+ if l.pos >= len(l.input) {
+ l.width = 0
+ return eof
+ }
+ var r rune
+ r, l.width = utf8.DecodeRuneInString(l.input[l.pos:])
+ l.pos += l.width
+ return r
+}
+func (l *RuneReader) accept(chars string) bool {
+ r := l.next()
+ for _, char := range chars {
+ if char == r {
+ return true
+ }
+ }
+ l.rewind()
+ return false
+}
+func (l *RuneReader) rewind() {
+ l.pos -= l.width
+}
diff --git a/main/main.go b/main/main.go
new file mode 100644
index 0000000..be43d90
--- /dev/null
+++ b/main/main.go
@@ -0,0 +1,86 @@
+package main
+
+import (
+ "os"
+ "fmt"
+)
+
+type TransducerOutput interface {
+ build(Store) string
+}
+
+type TransducerReplacementRune rune
+func (replacement TransducerReplacementRune) build(store Store) string {
+ return string(replacement)
+}
+
+type TransducerReplacementLoad rune
+func (replacement TransducerReplacementLoad) build(store Store) string {
+ return store[rune(replacement)]
+}
+
+type Store map[rune]string
+func (store Store) clone() Store {
+ newStore := make(Store)
+ for key, val := range store {
+ newStore[key] = val
+ }
+ return newStore
+}
+
+func compileTransducer(transducerAst SubexAST) SubexState {
+ return transducerAst.compileWith(SubexNoneState{})
+}
+
+type SubexBranch struct {
+ store Store
+ state SubexState
+ output string
+}
+func (pair SubexBranch) eat(char rune) []SubexBranch {
+ states := pair.state.eat(pair.store, char)
+ for i := range states {
+ states[i].output = pair.output + states[i].output
+ }
+ return states
+}
+func (pair SubexBranch) accepting() []string {
+ return pair.state.accepting(pair.store)
+}
+
+func runTransducer(transducer SubexState, input string) (output string, err bool) {
+ states := []SubexBranch{{
+ state: transducer,
+ output: "",
+ store: make(Store),
+ }}
+ for _, char := range input {
+ var newStates []SubexBranch
+ for _, state := range states {
+ newStates = append(newStates, state.eat(char)...)
+ }
+ states = newStates
+ }
+ for _, state := range states {
+ outputEnds := state.accepting()
+ for _, outputEnd := range outputEnds {
+ return state.output + outputEnd, false
+ }
+ }
+ return "", true
+}
+
+func main() {
+ if len(os.Args) != 3 {
+ panic("Expected: program [input] [subex]")
+ }
+ input := os.Args[1]
+ program := os.Args[2]
+ ast := parse(program)
+ transducer := compileTransducer(ast)
+ output, err := runTransducer(transducer, input)
+ if err {
+ output = input
+ }
+ fmt.Println(output)
+}
diff --git a/main/parse.go b/main/parse.go
new file mode 100644
index 0000000..a9bd4b5
--- /dev/null
+++ b/main/parse.go
@@ -0,0 +1,141 @@
+package main
+
+func parseReplacement(l *RuneReader) (output []TransducerOutput) {
+ loop: for {
+ r := l.next()
+ switch r {
+ case eof:
+ panic("Missing closing \"")
+ case '"':
+ break loop
+ case '$':
+ slot := l.next()
+ if slot == eof {
+ panic("Missing slot character")
+ }
+ output = append(output, TransducerReplacementLoad(slot))
+ default:
+ output = append(output, TransducerReplacementRune(r))
+ }
+ }
+ return output
+}
+
+func parseRegex(l *RuneReader, minPower int) RegexAST {
+ var lhs RegexAST
+ r := l.next()
+ switch r {
+ case eof:
+ return nil
+ case ')', '*', '-', '|':
+ l.rewind()
+ return nil
+ case '(':
+ lhs = parseRegex(l, 0)
+ if !l.accept(")") {
+ panic("Missing matching )")
+ }
+ case '.':
+ lhs = RegexASTAny{}
+ default:
+ lhs = RegexASTRune(r)
+ }
+ loop: for {
+ if minPower <= 0 {
+ next := parseRegex(l, 1)
+ if next != nil {
+ lhs = RegexASTConcat{lhs, next}
+ continue loop
+ }
+ }
+ r := l.next()
+ switch {
+ case r == '*' && minPower <= 4:
+ lhs = RegexASTMaximise{lhs}
+ case r == '-' && minPower <= 4:
+ lhs = RegexASTMinimise{lhs}
+ case r == '|' && minPower <= 2:
+ rhs := parseRegex(l, 3)
+ if rhs == nil {
+ panic("Missing regex after |")
+ }
+ lhs = RegexASTOr{lhs, rhs}
+ default:
+ l.rewind()
+ break loop
+ }
+ }
+ return lhs
+}
+
+func parseSubex(l *RuneReader, minPower int) SubexAST {
+ var lhs SubexAST
+ r := l.next()
+ switch r {
+ case eof:
+ return nil
+ case '(':
+ lhs = parseSubex(l, 0)
+ if !l.accept(")") {
+ panic("Missing matching )")
+ }
+ case ')', '*', '-', '|':
+ l.rewind()
+ return nil
+ case '$':
+ slot := l.next()
+ if slot == eof {
+ panic("Missing slot character")
+ }
+ match := parseRegex(l, 100)
+ if match == nil {
+ panic("Missing regex for store")
+ }
+ lhs = SubexASTStore{
+ match: match,
+ slot: slot,
+ }
+ case '"':
+ replacement := parseReplacement(l)
+ lhs = SubexASTOutput{replacement}
+ case '.':
+ lhs = SubexASTCopyAny{}
+ default:
+ lhs = SubexASTCopyRune(r)
+ }
+ loop: for {
+ if minPower <= 0 {
+ next := parseSubex(l, 1)
+ if next != nil {
+ lhs = SubexASTConcat{lhs, next}
+ continue loop
+ }
+ }
+ r := l.next()
+ switch {
+ case r == '*' && minPower <= 4:
+ lhs = SubexASTMaximise{lhs}
+ case r == '-' && minPower <= 4:
+ lhs = SubexASTMinimise{lhs}
+ case r == '|' && minPower <= 2:
+ rhs := parseSubex(l, 3)
+ if rhs == nil {
+ panic("Missing subex after |")
+ }
+ lhs = SubexASTOr{lhs, rhs}
+ default:
+ l.rewind()
+ break loop
+ }
+ }
+ return lhs
+}
+
+func parse(input string) SubexAST {
+ l := RuneReader {
+ input: input,
+ pos: 0,
+ width: 0,
+ }
+ return parseSubex(&l, 0)
+}
diff --git a/main/regexast.go b/main/regexast.go
new file mode 100644
index 0000000..0aab053
--- /dev/null
+++ b/main/regexast.go
@@ -0,0 +1,72 @@
+package main
+
+import (
+ "fmt"
+)
+
+type RegexAST interface {
+ compileWith(next RegexState) RegexState
+}
+
+type RegexASTRune rune
+func (ast RegexASTRune) compileWith(next RegexState) RegexState {
+ return RegexRuneState{
+ rune: rune(ast),
+ next: next,
+ }
+}
+func (ast RegexASTRune) String() string {
+ return string(rune(ast))
+}
+
+type RegexASTAny struct {}
+func (ast RegexASTAny) compileWith(next RegexState) RegexState {
+ return RegexAnyState{next}
+}
+func (ast RegexASTAny) String() string {
+ return "."
+}
+
+type RegexASTConcat struct {
+ first, second RegexAST
+}
+func (ast RegexASTConcat) compileWith(next RegexState) RegexState {
+ return ast.first.compileWith(ast.second.compileWith(next))
+}
+func (ast RegexASTConcat) String() string {
+ return fmt.Sprintf("Concat{%v, %v}", ast.first, ast.second)
+}
+
+type RegexASTOr struct {
+ first, second RegexAST
+}
+func (ast RegexASTOr) compileWith(next RegexState) RegexState {
+ return RegexGroupState{
+ ast.first.compileWith(next),
+ ast.second.compileWith(next),
+ }
+}
+
+type RegexASTMaximise struct {
+ content RegexAST
+}
+func (ast RegexASTMaximise) compileWith(next RegexState) RegexState {
+ state := &RegexGroupState{
+ nil,
+ next,
+ }
+ state.first = ast.content.compileWith(state)
+ return state
+}
+
+type RegexASTMinimise struct {
+ content RegexAST
+}
+func (ast RegexASTMinimise) compileWith(next RegexState) RegexState {
+ state := &RegexGroupState{
+ next,
+ nil,
+ }
+ state.second = ast.content.compileWith(state)
+ return state
+}
diff --git a/main/regexstate.go b/main/regexstate.go
new file mode 100644
index 0000000..16d5581
--- /dev/null
+++ b/main/regexstate.go
@@ -0,0 +1,48 @@
+package main
+
+type RegexState interface {
+ eat(char rune) []RegexState
+ accepting() bool
+}
+
+type RegexNoneState struct {}
+func (state RegexNoneState) eat(char rune) []RegexState {
+ return nil
+}
+func (state RegexNoneState) accepting() bool {
+ return true
+}
+
+type RegexAnyState struct {
+ next RegexState
+}
+func (state RegexAnyState) eat(char rune) []RegexState {
+ return []RegexState{state.next}
+}
+func (state RegexAnyState) accepting() bool {
+ return false
+}
+
+type RegexRuneState struct {
+ rune rune
+ next RegexState
+}
+func (state RegexRuneState) eat(char rune) []RegexState {
+ if char == state.rune {
+ return []RegexState{state.next}
+ }
+ return nil
+}
+func (state RegexRuneState) accepting() bool {
+ return false
+}
+
+type RegexGroupState struct {
+ first, second RegexState
+}
+func (state RegexGroupState) eat(char rune) []RegexState {
+ return append(state.first.eat(char), state.second.eat(char)...)
+}
+func (state RegexGroupState) accepting() bool {
+ return state.first.accepting() || state.second.accepting()
+}
diff --git a/main/subexast.go b/main/subexast.go
new file mode 100644
index 0000000..7e2f33c
--- /dev/null
+++ b/main/subexast.go
@@ -0,0 +1,117 @@
+package main
+
+import (
+ "fmt"
+)
+
+type SubexAST interface {
+ compileWith(next SubexState) SubexState
+}
+
+type SubexASTConcat struct {
+ first, second SubexAST
+}
+func (ast SubexASTConcat) compileWith(next SubexState) SubexState {
+ return ast.first.compileWith(ast.second.compileWith(next))
+}
+func (ast SubexASTConcat) String() string {
+ return fmt.Sprintf("(%v)(%v)", ast.first, ast.second)
+}
+
+type SubexASTStore struct {
+ match RegexAST
+ slot rune
+}
+func (ast SubexASTStore) compileWith(next SubexState) SubexState {
+ return SubexStoreState {
+ match: ast.match.compileWith(RegexNoneState{}),
+ slot: ast.slot,
+ next: next,
+ }
+}
+func (ast SubexASTStore) String() string {
+ return fmt.Sprintf("$%c(%v)", ast.slot, ast.match)
+}
+
+type SubexASTOr struct {
+ first, second SubexAST
+}
+func (ast SubexASTOr) compileWith(next SubexState) SubexState {
+ return SubexGroupState {
+ ast.first.compileWith(next),
+ ast.second.compileWith(next),
+ }
+}
+
+type SubexASTMaximise struct {
+ content SubexAST
+}
+func (ast SubexASTMaximise) compileWith(next SubexState) SubexState {
+ state := &SubexGroupState {
+ nil,
+ next,
+ }
+ state.first = ast.content.compileWith(state)
+ return state
+}
+func (ast SubexASTMaximise) String() string {
+ return fmt.Sprintf("(%v)*", ast.content)
+}
+
+type SubexASTMinimise struct {
+ content SubexAST
+}
+func (ast SubexASTMinimise) compileWith(next SubexState) SubexState {
+ state := &SubexGroupState {
+ next,
+ nil,
+ }
+ state.second = ast.content.compileWith(state)
+ return state
+}
+func (ast SubexASTMinimise) String() string {
+ return fmt.Sprintf("(%v)-", ast.content)
+}
+
+type SubexASTRepeat struct {
+ content SubexAST
+ min, max int
+}
+func (ast SubexASTRepeat) compileWith(next SubexState) SubexState {
+ for i := ast.min; i < ast.max; i += 1 {
+ next = SubexGroupState {
+ ast.content.compileWith(next),
+ next,
+ }
+ }
+ for i := 0; i < ast.min; i += 1 {
+ next = ast.content.compileWith(next)
+ }
+ return next
+}
+
+type SubexASTCopyRune rune
+func (ast SubexASTCopyRune) compileWith(next SubexState) SubexState {
+ return SubexCopyRuneState{
+ rune: rune(ast),
+ next: next,
+ }
+}
+
+type SubexASTCopyAny struct {}
+func (ast SubexASTCopyAny) compileWith(next SubexState) SubexState {
+ return SubexCopyAnyState{next}
+}
+func (ast SubexASTCopyAny) String() string {
+ return "."
+}
+
+type SubexASTOutput struct {
+ replacement []TransducerOutput
+}
+func (ast SubexASTOutput) compileWith(next SubexState) SubexState {
+ return SubexOutputState{
+ content: ast.replacement,
+ next: next,
+ }
+}
diff --git a/main/subexstate.go b/main/subexstate.go
new file mode 100644
index 0000000..cc697f0
--- /dev/null
+++ b/main/subexstate.go
@@ -0,0 +1,123 @@
+package main
+
+import (
+ "strings"
+)
+
+type SubexState interface {
+ eat(store Store, char rune) []SubexBranch
+ accepting(store Store) []string
+}
+
+type SubexGroupState struct {
+ first, second SubexState
+}
+func (state SubexGroupState) eat(store Store, char rune) []SubexBranch {
+ otherStore := store.clone()
+ return append(state.first.eat(store, char), state.second.eat(otherStore, char)...)
+}
+func (state SubexGroupState) accepting(store Store) []string {
+ return append(state.first.accepting(store), state.second.accepting(store)...)
+}
+
+type SubexStoreState struct {
+ match RegexState
+ slot rune
+ next SubexState
+ input string
+}
+func (state SubexStoreState) eat(store Store, char rune) []SubexBranch {
+ var nextStates []SubexBranch
+ if state.match.accepting() {
+ store[state.slot] = state.input
+ nextStates = state.next.eat(store, char)
+ }
+ nextRegexStates := state.match.eat(char)
+ for _, regexState := range nextRegexStates {
+ nextStates = append(nextStates, SubexBranch {
+ state: SubexStoreState {
+ match: regexState,
+ slot: state.slot,
+ next: state.next,
+ input: state.input + string(char),
+ },
+ output: "",
+ store: store.clone(),
+ })
+ }
+ return nextStates
+}
+func (state SubexStoreState) accepting(store Store) []string {
+ if state.match.accepting() {
+ return state.next.accepting(store)
+ }
+ return nil
+}
+
+type SubexOutputState struct {
+ content []TransducerOutput
+ next SubexState
+}
+func (state SubexOutputState) build(store Store) string {
+ var builder strings.Builder
+ for _, part := range state.content {
+ builder.WriteString(part.build(store))
+ }
+ return builder.String()
+}
+func (state SubexOutputState) eat(store Store, char rune) []SubexBranch {
+ content := state.build(store)
+ nextStates := state.next.eat(store, char)
+ for i := range nextStates {
+ nextStates[i].output = content + nextStates[i].output
+ }
+ return nextStates
+}
+func (state SubexOutputState) accepting(store Store) []string {
+ content := state.build(store)
+ outputs := state.next.accepting(store)
+ for i := range outputs {
+ outputs[i] = content + outputs[i]
+ }
+ return outputs
+}
+
+type SubexNoneState struct {}
+func (state SubexNoneState) eat(store Store, char rune) []SubexBranch {
+ return nil
+}
+func (state SubexNoneState) accepting(store Store) []string {
+ return []string{""}
+}
+
+type SubexCopyRuneState struct {
+ rune rune
+ next SubexState
+}
+func (state SubexCopyRuneState) eat(store Store, char rune) []SubexBranch {
+ if char == state.rune {
+ return []SubexBranch{{
+ state: state.next,
+ output: string(char),
+ store: store,
+ }}
+ }
+ return nil
+}
+func (state SubexCopyRuneState) accepting(store Store) []string {
+ return nil
+}
+
+type SubexCopyAnyState struct {
+ next SubexState
+}
+func (state SubexCopyAnyState) eat(store Store, char rune) []SubexBranch {
+ return []SubexBranch{{
+ state: state.next,
+ output: string(char),
+ store: store,
+ }}
+}
+func (state SubexCopyAnyState) accepting(store Store) []string {
+ return nil
+}