From b81f564164a512fc3dba155c7bd25613b201c070 Mon Sep 17 00:00:00 2001 From: Charlie Stanton Date: Wed, 21 Sep 2022 19:37:22 +0100 Subject: Implements the first version of subex --- main/main.go | 564 +++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 564 insertions(+) (limited to 'main/main.go') diff --git a/main/main.go b/main/main.go index 5503fb1..7bb5152 100644 --- a/main/main.go +++ b/main/main.go @@ -3,6 +3,9 @@ package main import ( "os" "bufio" + "fmt" + "strings" + "unicode/utf8" ) type PathSegment interface {} @@ -36,7 +39,568 @@ type ProgramState struct { program []Command } +//const eof rune = -1 +type RuneReader struct { + input string + pos, width int +} +func (l *RuneReader) next() rune { + if l.pos >= len(l.input) { + l.width = 0 + return eof + } + var r rune + r, l.width = utf8.DecodeRuneInString(l.input[l.pos:]) + l.pos += l.width + return r +} +func (l *RuneReader) accept(chars string) bool { + r := l.next() + for _, char := range chars { + if char == r { + return true + } + } + l.rewind() + return false +} +func (l *RuneReader) rewind() { + l.pos -= l.width +} + +func parseReplacement(l *RuneReader) (output []TransducerOutput) { + loop: for { + r := l.next() + switch r { + case eof: + panic("Missing closing \"") + case '"': + break loop + case '$': + slot := l.next() + if slot == eof { + panic("Missing slot character") + } + output = append(output, TransducerReplacementLoad(slot)) + default: + output = append(output, TransducerReplacementRune(r)) + } + } + return output +} + +func parseRegex(l *RuneReader, minPower int) RegexAST { + var lhs RegexAST + r := l.next() + switch r { + case eof: + return nil + case ')', '*', '-': + l.rewind() + return nil + case '(': + lhs = parseRegex(l, 0) + if !l.accept(")") { + panic("Missing matching )") + } + case '.': + lhs = RegexASTAny{} + default: + lhs = RegexASTRune(r) + } + loop: for { + if minPower <= 0 { + next := parseRegex(l, 1) + if next != nil { + lhs = RegexASTConcat{lhs, next} + continue loop + } + } + r := l.next() + switch { + case r == '*' && minPower <= 4: + lhs = RegexASTMaximise{lhs} + case r == '-' && minPower <= 4: + lhs = RegexASTMinimise{lhs} + default: + l.rewind() + break loop + } + } + return lhs +} + +func parseSubex(l *RuneReader, minPower int) TransducerAST { + var lhs TransducerAST + r := l.next() + switch r { + case eof: + return nil + case '(': + lhs = parseSubex(l, 0) + if !l.accept(")") { + panic("Missing matching )") + } + case ')', '*', '-': + l.rewind() + return nil + case '$': + slot := l.next() + if slot == eof { + panic("Missing slot character") + } + match := parseRegex(l, 100) + if match == nil { + panic("Missing regex for store") + } + lhs = TransducerASTStore{ + match: match, + slot: slot, + } + case '"': + replacement := parseReplacement(l) + lhs = TransducerASTOutput{replacement} + case '.': + lhs = TransducerASTCopyAny{} + default: + lhs = TransducerASTCopyRune(r) + } + loop: for { + if minPower <= 0 { + next := parseSubex(l, 1) + if next != nil { + lhs = TransducerASTConcat{lhs, next} + continue loop + } + } + r := l.next() + switch { + case r == '*' && minPower <= 4: + lhs = TransducerASTMaximise{lhs} + case r == '-' && minPower <= 4: + lhs = TransducerASTMinimise{lhs} + default: + l.rewind() + break loop + } + } + return lhs +} + +func parse(input string) TransducerAST { + l := RuneReader { + input: input, + pos: 0, + width: 0, + } + return parseSubex(&l, 0) +} + +type TransducerOutput interface { + build(Store) string +} + +type TransducerReplacementRune rune +func (replacement TransducerReplacementRune) build(store Store) string { + return string(replacement) +} + +type TransducerReplacementLoad rune +func (replacement TransducerReplacementLoad) build(store Store) string { + return store[rune(replacement)] +} + +type RegexState interface { + eat(char rune) []RegexState + accepting() bool +} + +type RegexNoneState struct {} +func (state RegexNoneState) eat(char rune) []RegexState { + return nil +} +func (state RegexNoneState) accepting() bool { + return true +} + +type RegexAnyState struct { + next RegexState +} +func (state RegexAnyState) eat(char rune) []RegexState { + return []RegexState{state.next} +} +func (state RegexAnyState) accepting() bool { + return false +} + +type RegexRuneState struct { + rune rune + next RegexState +} +func (state RegexRuneState) eat(char rune) []RegexState { + if char == state.rune { + return []RegexState{state.next} + } + return nil +} +func (state RegexRuneState) accepting() bool { + return false +} + +type RegexGroupState struct { + first, second RegexState +} +func (state RegexGroupState) eat(char rune) []RegexState { + return append(state.first.eat(char), state.second.eat(char)...) +} +func (state RegexGroupState) accepting() bool { + return state.first.accepting() || state.second.accepting() +} + +type RegexAST interface { + compileWith(next RegexState) RegexState +} + +type RegexASTRune rune +func (ast RegexASTRune) compileWith(next RegexState) RegexState { + return RegexRuneState{ + rune: rune(ast), + next: next, + } +} +func (ast RegexASTRune) String() string { + return string(rune(ast)) +} + +type RegexASTAny struct {} +func (ast RegexASTAny) compileWith(next RegexState) RegexState { + return RegexAnyState{next} +} +func (ast RegexASTAny) String() string { + return "." +} + +type RegexASTConcat struct { + first, second RegexAST +} +func (ast RegexASTConcat) compileWith(next RegexState) RegexState { + return ast.first.compileWith(ast.second.compileWith(next)) +} +func (ast RegexASTConcat) String() string { + return fmt.Sprintf("Concat{%v, %v}", ast.first, ast.second) +} + +type RegexASTMaximise struct { + content RegexAST +} +func (ast RegexASTMaximise) compileWith(next RegexState) RegexState { + state := &RegexGroupState{ + nil, + next, + } + state.first = ast.content.compileWith(state) + return state +} + +type RegexASTMinimise struct { + content RegexAST +} +func (ast RegexASTMinimise) compileWith(next RegexState) RegexState { + state := &RegexGroupState{ + next, + nil, + } + state.second = ast.content.compileWith(state) + return state +} + +type Store map[rune]string +func (store Store) clone() Store { + newStore := make(Store) + for key, val := range store { + newStore[key] = val + } + return newStore +} + +type TransducerState interface { + eat(store Store, char rune) []TransducerStatePair + accepting(store Store) []string +} + +type TransducerGroupState struct { + first, second TransducerState +} +func (state TransducerGroupState) eat(store Store, char rune) []TransducerStatePair { + otherStore := store.clone() + return append(state.first.eat(store, char), state.second.eat(otherStore, char)...) +} +func (state TransducerGroupState) accepting(store Store) []string { + return append(state.first.accepting(store), state.second.accepting(store)...) +} + +type TransducerStoreState struct { + match RegexState + slot rune + next TransducerState + input string +} +func (state TransducerStoreState) eat(store Store, char rune) []TransducerStatePair { + var nextStates []TransducerStatePair + if state.match.accepting() { + store[state.slot] = state.input + nextStates = state.next.eat(store, char) + } + nextRegexStates := state.match.eat(char) + for _, regexState := range nextRegexStates { + nextStates = append(nextStates, TransducerStatePair { + state: TransducerStoreState { + match: regexState, + slot: state.slot, + next: state.next, + input: state.input + string(char), + }, + output: "", + store: store.clone(), + }) + } + return nextStates +} +func (state TransducerStoreState) accepting(store Store) []string { + if state.match.accepting() { + return state.next.accepting(store) + } + return nil +} + +type TransducerOutputState struct { + content []TransducerOutput + next TransducerState +} +func (state TransducerOutputState) build(store Store) string { + var builder strings.Builder + for _, part := range state.content { + builder.WriteString(part.build(store)) + } + return builder.String() +} +func (state TransducerOutputState) eat(store Store, char rune) []TransducerStatePair { + content := state.build(store) + nextStates := state.next.eat(store, char) + for i := range nextStates { + nextStates[i].output = content + nextStates[i].output + } + return nextStates +} +func (state TransducerOutputState) accepting(store Store) []string { + content := state.build(store) + outputs := state.next.accepting(store) + for i := range outputs { + outputs[i] = content + outputs[i] + } + return outputs +} + +type TransducerNoneState struct {} +func (state TransducerNoneState) eat(store Store, char rune) []TransducerStatePair { + return nil +} +func (state TransducerNoneState) accepting(store Store) []string { + return []string{""} +} + +type TransducerCopyRuneState struct { + rune rune + next TransducerState +} +func (state TransducerCopyRuneState) eat(store Store, char rune) []TransducerStatePair { + if char == state.rune { + return []TransducerStatePair{{ + state: state.next, + output: string(char), + store: store, + }} + } + return nil +} +func (state TransducerCopyRuneState) accepting(store Store) []string { + return nil +} + +type TransducerCopyAnyState struct { + next TransducerState +} +func (state TransducerCopyAnyState) eat(store Store, char rune) []TransducerStatePair { + return []TransducerStatePair{{ + state: state.next, + output: string(char), + store: store, + }} +} +func (state TransducerCopyAnyState) accepting(store Store) []string { + return nil +} + +type TransducerAST interface { + compileWithNext(next TransducerState) TransducerState +} + +type TransducerASTConcat struct { + first, second TransducerAST +} +func (ast TransducerASTConcat) compileWithNext(next TransducerState) TransducerState { + return ast.first.compileWithNext(ast.second.compileWithNext(next)) +} +func (ast TransducerASTConcat) String() string { + return fmt.Sprintf("(%v)(%v)", ast.first, ast.second) +} + +type TransducerASTStore struct { + match RegexAST + slot rune +} +func (ast TransducerASTStore) compileWithNext(next TransducerState) TransducerState { + return TransducerStoreState { + match: ast.match.compileWith(RegexNoneState{}), + slot: ast.slot, + next: next, + } +} +func (ast TransducerASTStore) String() string { + return fmt.Sprintf("$%c(%v)", ast.slot, ast.match) +} + +type TransducerASTMaximise struct { + content TransducerAST +} +func (ast TransducerASTMaximise) compileWithNext(next TransducerState) TransducerState { + state := &TransducerGroupState { + nil, + next, + } + state.first = ast.content.compileWithNext(state) + return state +} +func (ast TransducerASTMaximise) String() string { + return fmt.Sprintf("(%v)*", ast.content) +} + +type TransducerASTMinimise struct { + content TransducerAST +} +func (ast TransducerASTMinimise) compileWithNext(next TransducerState) TransducerState { + state := &TransducerGroupState { + next, + nil, + } + state.second = ast.content.compileWithNext(state) + return state +} +func (ast TransducerASTMinimise) String() string { + return fmt.Sprintf("(%v)-", ast.content) +} + +type TransducerASTRepeat struct { + content TransducerAST + min, max int +} +func (ast TransducerASTRepeat) compileWithNext(next TransducerState) TransducerState { + for i := ast.min; i < ast.max; i += 1 { + next = TransducerGroupState { + ast.content.compileWithNext(next), + next, + } + } + for i := 0; i < ast.min; i += 1 { + next = ast.content.compileWithNext(next) + } + return next +} + +type TransducerASTCopyRune rune +func (ast TransducerASTCopyRune) compileWithNext(next TransducerState) TransducerState { + return TransducerCopyRuneState{ + rune: rune(ast), + next: next, + } +} + +type TransducerASTCopyAny struct {} +func (ast TransducerASTCopyAny) compileWithNext(next TransducerState) TransducerState { + return TransducerCopyAnyState{next} +} +func (ast TransducerASTCopyAny) String() string { + return "." +} + +type TransducerASTOutput struct { + replacement []TransducerOutput +} +func (ast TransducerASTOutput) compileWithNext(next TransducerState) TransducerState { + return TransducerOutputState{ + content: ast.replacement, + next: next, + } +} + +func compileTransducer(transducerAst TransducerAST) TransducerState { + return transducerAst.compileWithNext(TransducerNoneState{}) +} + +type TransducerStatePair struct { + store Store + state TransducerState + output string +} +func (pair TransducerStatePair) eat(char rune) []TransducerStatePair { + states := pair.state.eat(pair.store, char) + for i := range states { + states[i].output = pair.output + states[i].output + } + return states +} +func (pair TransducerStatePair) accepting() []string { + return pair.state.accepting(pair.store) +} + +func runTransducer(transducer TransducerState, input string) (output string, err bool) { + states := []TransducerStatePair{{ + state: transducer, + output: "", + store: make(Store), + }} + for _, char := range input { + var newStates []TransducerStatePair + for _, state := range states { + newStates = append(newStates, state.eat(char)...) + } + states = newStates + } + for _, state := range states { + outputEnds := state.accepting() + for _, outputEnd := range outputEnds { + return state.output + outputEnd, false + } + } + return "", true +} + func main() { + if len(os.Args) != 3 { + panic("Expected: program [input] [subex]") + } + input := os.Args[1] + program := os.Args[2] + ast := parse(program) + transducer := compileTransducer(ast) + output, err := runTransducer(transducer, input) + if err { + output = input + } + fmt.Println(output) +} + +func mainISH() { quiet := false var input string hasInput := false -- cgit v1.2.3