From fba426b3910f16c8abc6f819da3138f03e5f0b1a Mon Sep 17 00:00:00 2001 From: Charlie Stanton Date: Sun, 19 Feb 2023 08:59:16 +0000 Subject: Introduces subex processing Doesn't integrate it at all yet --- subex/lex.go | 34 ++++++++++ subex/main.go | 114 ++++++++++++++++++++++++++++++++++ subex/parse.go | 175 ++++++++++++++++++++++++++++++++++++++++++++++++++++ subex/subexast.go | 163 ++++++++++++++++++++++++++++++++++++++++++++++++ subex/subexstate.go | 145 +++++++++++++++++++++++++++++++++++++++++++ 5 files changed, 631 insertions(+) create mode 100644 subex/lex.go create mode 100644 subex/main.go create mode 100644 subex/parse.go create mode 100644 subex/subexast.go create mode 100644 subex/subexstate.go (limited to 'subex') diff --git a/subex/lex.go b/subex/lex.go new file mode 100644 index 0000000..f020b23 --- /dev/null +++ b/subex/lex.go @@ -0,0 +1,34 @@ +package subex + +import ( + "unicode/utf8" +) + +const eof rune = -1 +type RuneReader struct { + input string + pos, width int +} +func (l *RuneReader) next() rune { + if l.pos >= len(l.input) { + l.width = 0 + return eof + } + var r rune + r, l.width = utf8.DecodeRuneInString(l.input[l.pos:]) + l.pos += l.width + return r +} +func (l *RuneReader) accept(chars string) bool { + r := l.next() + for _, char := range chars { + if char == r { + return true + } + } + l.rewind() + return false +} +func (l *RuneReader) rewind() { + l.pos -= l.width +} diff --git a/subex/main.go b/subex/main.go new file mode 100644 index 0000000..3ae0618 --- /dev/null +++ b/subex/main.go @@ -0,0 +1,114 @@ +package subex + +import ( + "os" + "fmt" + "io" +) + +type TransducerOutput interface { + build(Store) string +} + +type TransducerReplacementRune rune +func (replacement TransducerReplacementRune) build(store Store) string { + return string(replacement) +} + +type TransducerReplacementLoad rune +func (replacement TransducerReplacementLoad) build(store Store) string { + return store[rune(replacement)] +} + +type Store map[rune]string +func (store Store) clone() Store { + newStore := make(Store) + for key, val := range store { + newStore[key] = val + } + return newStore +} +func (store Store) withValue(key rune, value string) Store { + newStore := store.clone() + newStore[key] = value + return newStore +} + +func CompileTransducer(transducerAst SubexAST) SubexState { + return transducerAst.compileWith(SubexNoneState{}) +} + +type SubexBranch struct { + store Store + state SubexState + output string +} +func (pair SubexBranch) eat(char rune) []SubexBranch { + states := pair.state.eat(pair.store, char) + for i := range states { + states[i].output = pair.output + states[i].output + } + return states +} +func (pair SubexBranch) accepting() []string { + return pair.state.accepting(pair.store) +} + +func equalStates(left SubexBranch, right SubexBranch) bool { + // Only care about if they are the same pointer + return left.state == right.state +} + +func pruneStates(states []SubexBranch) (newStates []SubexBranch) { + outer: for _, state := range states { + for _, newState := range newStates { + if equalStates(state, newState) { + continue outer + } + } + newStates = append(newStates, state) + } + return newStates +} + +func RunTransducer(transducer SubexState, input string) (output string, err bool) { + states := []SubexBranch{{ + state: transducer, + output: "", + store: make(Store), + }} + for _, char := range input { + fmt.Printf("Running with %d states\n", len(states)) + var newStates []SubexBranch + for _, state := range states { + newStates = append(newStates, state.eat(char)...) + } + states = pruneStates(newStates) + } + for _, state := range states { + outputEnds := state.accepting() + for _, outputEnd := range outputEnds { + return state.output + outputEnd, false + } + } + return "", true +} + +func Main() { + if len(os.Args) != 2 { + panic("Expected: program [subex]") + } + inputBytes, inputErr := io.ReadAll(os.Stdin) + input := string(inputBytes) + if inputErr != nil { + fmt.Println("Error reading") + } + program := os.Args[1] + ast := Parse(program) + transducer := CompileTransducer(ast) + output, err := RunTransducer(transducer, input) + if err { + output = input + } + fmt.Print(output) +} diff --git a/subex/parse.go b/subex/parse.go new file mode 100644 index 0000000..af575eb --- /dev/null +++ b/subex/parse.go @@ -0,0 +1,175 @@ +package subex + +func parseReplacement(l *RuneReader) (output []TransducerOutput) { + loop: for { + r := l.next() + switch r { + case eof: + panic("Missing closing \"") + case '"': + break loop + case '$': + slot := l.next() + if slot == eof { + panic("Missing slot character") + } + output = append(output, TransducerReplacementLoad(slot)) + default: + output = append(output, TransducerReplacementRune(r)) + } + } + return output +} + +func parseRangeSubex(l *RuneReader) map[rune]rune { + parts := make(map[rune]rune) + var froms []rune + var hasTo bool + for { + fromsStart := l.next() + if fromsStart == ']' { + hasTo = false + break + } else if fromsStart == '=' { + hasTo = true + break + } + var fromsEnd rune + if l.accept("-") { + fromsEnd = l.next() + if fromsEnd == ']' || fromsEnd == '=' { + l.rewind() + fromsEnd = fromsStart + } + } else { + fromsEnd = fromsStart + } + for i := fromsStart; i <= fromsEnd; i += 1 { + froms = append(froms, i) + } + } + if len(froms) == 0 { + panic("Missing from part of range expression") + } + + var tos []rune + if hasTo { + for { + tosStart := l.next() + if tosStart == ']' { + break + } + var tosEnd rune + if l.accept("-") { + tosEnd = l.next() + if tosEnd == ']' { + l.rewind() + tosEnd = tosStart + } + } else { + tosEnd = tosStart + } + for i := tosStart; i <= tosEnd; i += 1 { + tos = append(tos, i) + } + } + } else { + tos = froms + } + if len(tos) == 0 { + panic("Missing to part of range expression") + } + + for i, from := range froms { + parts[from] = tos[i % len(tos)] + } + return parts +} + +func parseSubex(l *RuneReader, minPower int) SubexAST { + var lhs SubexAST + r := l.next() + switch r { + case eof: + return nil + case '(': + lhs = parseSubex(l, 0) + if !l.accept(")") { + panic("Missing matching )") + } + case '[': + rangeParts := parseRangeSubex(l) + lhs = SubexASTRange {rangeParts} + case ')', '*', '-', '|', '!', '?', ';': + l.rewind() + return nil + case '$': + slot := l.next() + if slot == eof { + panic("Missing slot character") + } + match := parseSubex(l, 100) + if match == nil { + panic("Missing regex for store") + } + lhs = SubexASTStore{ + match: match, + slot: slot, + } + case '"': + replacement := parseReplacement(l) + lhs = SubexASTOutput{replacement} + case '.': + lhs = SubexASTCopyAny{} + default: + lhs = SubexASTCopyRune(r) + } + loop: for { + if minPower <= 0 { + next := parseSubex(l, 1) + if next != nil { + lhs = SubexASTConcat{lhs, next} + continue loop + } + } + r := l.next() + switch { + case r == '*' && minPower <= 8: + lhs = SubexASTMaximise{lhs} + case r == '-' && minPower <= 8: + lhs = SubexASTMinimise{lhs} + case r == '!' && minPower <= 8: + lhs = SubexASTTry{lhs} + case r == '?' && minPower <= 8: + lhs = SubexASTMaybe{lhs} + case r == '|' && minPower <= 4: + rhs := parseSubex(l, 5) + if rhs == nil { + panic("Missing subex after |") + } + lhs = SubexASTOr{lhs, rhs} + case r == ';' && minPower <= 2: + rhs := parseSubex(l, 3) + if rhs == nil { + panic("Missing subex after ;") + } + lhs = SubexASTJoin{ + content: lhs, + delimiter: rhs, + } + default: + l.rewind() + break loop + } + } + return lhs +} + +func Parse(input string) SubexAST { + l := RuneReader { + input: input, + pos: 0, + width: 0, + } + return parseSubex(&l, 0) +} diff --git a/subex/subexast.go b/subex/subexast.go new file mode 100644 index 0000000..c583245 --- /dev/null +++ b/subex/subexast.go @@ -0,0 +1,163 @@ +package subex + +import ( + "fmt" +) + +type SubexAST interface { + compileWith(next SubexState) SubexState +} + +type SubexASTConcat struct { + first, second SubexAST +} +func (ast SubexASTConcat) compileWith(next SubexState) SubexState { + return ast.first.compileWith(ast.second.compileWith(next)) +} +func (ast SubexASTConcat) String() string { + return fmt.Sprintf("(%v)(%v)", ast.first, ast.second) +} + +type SubexASTStore struct { + match SubexAST + slot rune +} +func (ast SubexASTStore) compileWith(next SubexState) SubexState { + return &SubexStoreState { + match: ast.match.compileWith(&SubexNoneState{}), + slot: ast.slot, + next: next, + } +} +func (ast SubexASTStore) String() string { + return fmt.Sprintf("$%c(%v)", ast.slot, ast.match) +} + +type SubexASTOr struct { + first, second SubexAST +} +func (ast SubexASTOr) compileWith(next SubexState) SubexState { + return &SubexGroupState { + ast.first.compileWith(next), + ast.second.compileWith(next), + } +} + +type SubexASTMaximise struct { + content SubexAST +} +func (ast SubexASTMaximise) compileWith(next SubexState) SubexState { + state := &SubexGroupState { + nil, + next, + } + state.first = ast.content.compileWith(state) + return state +} +func (ast SubexASTMaximise) String() string { + return fmt.Sprintf("(%v)*", ast.content) +} + +type SubexASTMinimise struct { + content SubexAST +} +func (ast SubexASTMinimise) compileWith(next SubexState) SubexState { + state := &SubexGroupState { + next, + nil, + } + state.second = ast.content.compileWith(state) + return state +} +func (ast SubexASTMinimise) String() string { + return fmt.Sprintf("(%v)-", ast.content) +} + +type SubexASTRepeat struct { + content SubexAST + min, max int +} +func (ast SubexASTRepeat) compileWith(next SubexState) SubexState { + for i := ast.min; i < ast.max; i += 1 { + next = &SubexGroupState { + ast.content.compileWith(next), + next, + } + } + for i := 0; i < ast.min; i += 1 { + next = ast.content.compileWith(next) + } + return next +} + +type SubexASTCopyRune rune +func (ast SubexASTCopyRune) compileWith(next SubexState) SubexState { + return &SubexCopyRuneState{ + rune: rune(ast), + next: next, + } +} + +type SubexASTCopyAny struct {} +func (ast SubexASTCopyAny) compileWith(next SubexState) SubexState { + return &SubexCopyAnyState{next} +} +func (ast SubexASTCopyAny) String() string { + return "." +} + +type SubexASTOutput struct { + replacement []TransducerOutput +} +func (ast SubexASTOutput) compileWith(next SubexState) SubexState { + return &SubexOutputState{ + content: ast.replacement, + next: next, + } +} + +type SubexASTTry struct { + content SubexAST +} +func (ast SubexASTTry) compileWith(next SubexState) SubexState { + return &SubexGroupState { + ast.content.compileWith(next), + next, + } +} + +type SubexASTMaybe struct { + content SubexAST +} +func (ast SubexASTMaybe) compileWith(next SubexState) SubexState { + return &SubexGroupState { + next, + ast.content.compileWith(next), + } +} + +type SubexASTJoin struct { + content, delimiter SubexAST +} +func (ast SubexASTJoin) compileWith(next SubexState) SubexState { + afterContentState := &SubexGroupState { + nil, + next, + } + manyContentsState := ast.content.compileWith(afterContentState) + afterContentState.first = ast.delimiter.compileWith(manyContentsState) + return &SubexGroupState { + manyContentsState, + next, + } +} + +type SubexASTRange struct { + parts map[rune]rune +} +func (ast SubexASTRange) compileWith(next SubexState) SubexState { + return &SubexRangeState { + parts: ast.parts, + next: next, + } +} diff --git a/subex/subexstate.go b/subex/subexstate.go new file mode 100644 index 0000000..2e613e8 --- /dev/null +++ b/subex/subexstate.go @@ -0,0 +1,145 @@ +package subex + +import ( + "strings" +) + +type SubexState interface { + eat(store Store, char rune) []SubexBranch + accepting(store Store) []string +} + +type SubexGroupState struct { + first, second SubexState +} +func (state SubexGroupState) eat(store Store, char rune) []SubexBranch { + otherStore := store.clone() + return append(state.first.eat(store, char), state.second.eat(otherStore, char)...) +} +func (state SubexGroupState) accepting(store Store) []string { + return append(state.first.accepting(store), state.second.accepting(store)...) +} + +type SubexStoreState struct { + match SubexState + slot rune + next SubexState + toStore string +} +func (state SubexStoreState) eat(store Store, char rune) (nextStates []SubexBranch) { + acceptedOutputs := state.match.accepting(store) + for _, acceptedOutput := range acceptedOutputs { + nextStore := store.withValue(state.slot, state.toStore + acceptedOutput) + nextStates = append(nextStates, state.next.eat(nextStore.clone(), char)...) + } + nextMatchStates := state.match.eat(store.clone(), char) + for _, matchState := range nextMatchStates { + nextStates = append(nextStates, SubexBranch { + state: SubexStoreState { + match: matchState.state, + slot: state.slot, + next: state.next, + toStore: state.toStore + matchState.output, + }, + output: "", + store: store.clone(), + }) + } + return nextStates +} +func (state SubexStoreState) accepting(store Store) (outputs []string) { + acceptedOutputs := state.match.accepting(store) + for _, acceptedOutput := range acceptedOutputs { + nextStore := store.withValue(state.slot, state.toStore + acceptedOutput) + outputs = append(outputs, state.next.accepting(nextStore)...) + } + return outputs +} + +type SubexOutputState struct { + content []TransducerOutput + next SubexState +} +func (state SubexOutputState) build(store Store) string { + var builder strings.Builder + for _, part := range state.content { + builder.WriteString(part.build(store)) + } + return builder.String() +} +func (state SubexOutputState) eat(store Store, char rune) []SubexBranch { + content := state.build(store) + nextStates := state.next.eat(store, char) + for i := range nextStates { + nextStates[i].output = content + nextStates[i].output + } + return nextStates +} +func (state SubexOutputState) accepting(store Store) []string { + content := state.build(store) + outputs := state.next.accepting(store) + for i := range outputs { + outputs[i] = content + outputs[i] + } + return outputs +} + +type SubexNoneState struct {} +func (state SubexNoneState) eat(store Store, char rune) []SubexBranch { + return nil +} +func (state SubexNoneState) accepting(store Store) []string { + return []string{""} +} + +type SubexCopyRuneState struct { + rune rune + next SubexState +} +func (state SubexCopyRuneState) eat(store Store, char rune) []SubexBranch { + if char == state.rune { + return []SubexBranch{{ + state: state.next, + output: string(char), + store: store, + }} + } + return nil +} +func (state SubexCopyRuneState) accepting(store Store) []string { + return nil +} + +type SubexCopyAnyState struct { + next SubexState +} +func (state SubexCopyAnyState) eat(store Store, char rune) []SubexBranch { + return []SubexBranch{{ + state: state.next, + output: string(char), + store: store, + }} +} +func (state SubexCopyAnyState) accepting(store Store) []string { + return nil +} + +type SubexRangeState struct { + parts map[rune]rune + next SubexState +} +func (state SubexRangeState) eat(store Store, char rune) []SubexBranch { + out, exists := state.parts[char] + if !exists { + return nil + } else { + return []SubexBranch{{ + state: state.next, + output: string(out), + store: store, + }} + } +} +func (state SubexRangeState) accepting(store Store) []string { + return nil +} -- cgit v1.2.3