subex

Subex: Substitute Expressions, a tiny language for string operations
git clone http://shtanton.xyz/git/repo/subex
Log | Files | Refs

commit b2d1d965dee8cc2c1e063067d53a3c8e28a46d6c
parent ce2db2bc333ed938ec93d5ad0838f8cb720c4865
Author: Charlie Stanton <charlie@shtanton.xyz>
Date:   Sat, 24 Dec 2022 10:04:42 +0000

Adds the character range mapping syntax

Ranges of characters can be mapped with []

For example, capitalisation of a letter: [a-z=A-Z]
Caesar cipher shift of 1: [a-zA-Z=b-zaB-ZA]

Diffstat:
Mmain/parse.go | 68++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
Mmain/subexast.go | 10++++++++++
Mmain/subexstate.go | 20++++++++++++++++++++
3 files changed, 98 insertions(+), 0 deletions(-)

diff --git a/main/parse.go b/main/parse.go @@ -21,6 +21,71 @@ func parseReplacement(l *RuneReader) (output []TransducerOutput) { return output } +func parseRangeSubex(l *RuneReader) map[rune]rune { + parts := make(map[rune]rune) + var froms []rune + var hasTo bool + for { + fromsStart := l.next() + if fromsStart == ']' { + hasTo = false + break + } else if fromsStart == '=' { + hasTo = true + break + } + var fromsEnd rune + if l.accept("-") { + fromsEnd = l.next() + if fromsEnd == ']' || fromsEnd == '=' { + l.rewind() + fromsEnd = fromsStart + } + } else { + fromsEnd = fromsStart + } + for i := fromsStart; i <= fromsEnd; i += 1 { + froms = append(froms, i) + } + } + if len(froms) == 0 { + panic("Missing from part of range expression") + } + + var tos []rune + if hasTo { + for { + tosStart := l.next() + if tosStart == ']' { + break + } + var tosEnd rune + if l.accept("-") { + tosEnd = l.next() + if tosEnd == ']' { + l.rewind() + tosEnd = tosStart + } + } else { + tosEnd = tosStart + } + for i := tosStart; i <= tosEnd; i += 1 { + tos = append(tos, i) + } + } + } else { + tos = froms + } + if len(tos) == 0 { + panic("Missing to part of range expression") + } + + for i, from := range froms { + parts[from] = tos[i % len(tos)] + } + return parts +} + func parseSubex(l *RuneReader, minPower int) SubexAST { var lhs SubexAST r := l.next() @@ -32,6 +97,9 @@ func parseSubex(l *RuneReader, minPower int) SubexAST { if !l.accept(")") { panic("Missing matching )") } + case '[': + rangeParts := parseRangeSubex(l) + lhs = SubexASTRange {rangeParts} case ')', '*', '-', '|', '!', '?', ';': l.rewind() return nil diff --git a/main/subexast.go b/main/subexast.go @@ -151,3 +151,13 @@ func (ast SubexASTJoin) compileWith(next SubexState) SubexState { next, } } + +type SubexASTRange struct { + parts map[rune]rune +} +func (ast SubexASTRange) compileWith(next SubexState) SubexState { + return &SubexRangeState { + parts: ast.parts, + next: next, + } +} diff --git a/main/subexstate.go b/main/subexstate.go @@ -123,3 +123,23 @@ func (state SubexCopyAnyState) eat(store Store, char rune) []SubexBranch { func (state SubexCopyAnyState) accepting(store Store) []string { return nil } + +type SubexRangeState struct { + parts map[rune]rune + next SubexState +} +func (state SubexRangeState) eat(store Store, char rune) []SubexBranch { + out, exists := state.parts[char] + if !exists { + return nil + } else { + return []SubexBranch{{ + state: state.next, + output: string(out), + store: store, + }} + } +} +func (state SubexRangeState) accepting(store Store) []string { + return nil +}