From b2d1d965dee8cc2c1e063067d53a3c8e28a46d6c Mon Sep 17 00:00:00 2001 From: Charlie Stanton Date: Sat, 24 Dec 2022 10:04:42 +0000 Subject: Adds the character range mapping syntax Ranges of characters can be mapped with [] For example, capitalisation of a letter: [a-z=A-Z] Caesar cipher shift of 1: [a-zA-Z=b-zaB-ZA] --- main/parse.go | 68 ++++++++++++++++++++++++++++++++++++++++++++++++++++++ main/subexast.go | 10 ++++++++ main/subexstate.go | 20 ++++++++++++++++ 3 files changed, 98 insertions(+) diff --git a/main/parse.go b/main/parse.go index 59104c1..f155453 100644 --- a/main/parse.go +++ b/main/parse.go @@ -21,6 +21,71 @@ func parseReplacement(l *RuneReader) (output []TransducerOutput) { return output } +func parseRangeSubex(l *RuneReader) map[rune]rune { + parts := make(map[rune]rune) + var froms []rune + var hasTo bool + for { + fromsStart := l.next() + if fromsStart == ']' { + hasTo = false + break + } else if fromsStart == '=' { + hasTo = true + break + } + var fromsEnd rune + if l.accept("-") { + fromsEnd = l.next() + if fromsEnd == ']' || fromsEnd == '=' { + l.rewind() + fromsEnd = fromsStart + } + } else { + fromsEnd = fromsStart + } + for i := fromsStart; i <= fromsEnd; i += 1 { + froms = append(froms, i) + } + } + if len(froms) == 0 { + panic("Missing from part of range expression") + } + + var tos []rune + if hasTo { + for { + tosStart := l.next() + if tosStart == ']' { + break + } + var tosEnd rune + if l.accept("-") { + tosEnd = l.next() + if tosEnd == ']' { + l.rewind() + tosEnd = tosStart + } + } else { + tosEnd = tosStart + } + for i := tosStart; i <= tosEnd; i += 1 { + tos = append(tos, i) + } + } + } else { + tos = froms + } + if len(tos) == 0 { + panic("Missing to part of range expression") + } + + for i, from := range froms { + parts[from] = tos[i % len(tos)] + } + return parts +} + func parseSubex(l *RuneReader, minPower int) SubexAST { var lhs SubexAST r := l.next() @@ -32,6 +97,9 @@ func parseSubex(l *RuneReader, minPower int) SubexAST { if !l.accept(")") { panic("Missing matching )") } + case '[': + rangeParts := parseRangeSubex(l) + lhs = SubexASTRange {rangeParts} case ')', '*', '-', '|', '!', '?', ';': l.rewind() return nil diff --git a/main/subexast.go b/main/subexast.go index 040bc9d..aabdcd0 100644 --- a/main/subexast.go +++ b/main/subexast.go @@ -151,3 +151,13 @@ func (ast SubexASTJoin) compileWith(next SubexState) SubexState { next, } } + +type SubexASTRange struct { + parts map[rune]rune +} +func (ast SubexASTRange) compileWith(next SubexState) SubexState { + return &SubexRangeState { + parts: ast.parts, + next: next, + } +} diff --git a/main/subexstate.go b/main/subexstate.go index 00b9e75..880be38 100644 --- a/main/subexstate.go +++ b/main/subexstate.go @@ -123,3 +123,23 @@ func (state SubexCopyAnyState) eat(store Store, char rune) []SubexBranch { func (state SubexCopyAnyState) accepting(store Store) []string { return nil } + +type SubexRangeState struct { + parts map[rune]rune + next SubexState +} +func (state SubexRangeState) eat(store Store, char rune) []SubexBranch { + out, exists := state.parts[char] + if !exists { + return nil + } else { + return []SubexBranch{{ + state: state.next, + output: string(out), + store: store, + }} + } +} +func (state SubexRangeState) accepting(store Store) []string { + return nil +} -- cgit v1.2.3