<- Back to shtanton's homepage
aboutsummaryrefslogtreecommitdiff
path: root/subex
diff options
context:
space:
mode:
authorCharlie Stanton <charlie@shtanton.xyz>2023-04-19 14:34:22 +0100
committerCharlie Stanton <charlie@shtanton.xyz>2023-04-19 14:34:22 +0100
commit10f847acc7087317b0fbe20b7cf3307a0fafab8a (patch)
tree4abf2f4009fcac55013672e841b2f9d3a2b2fb52 /subex
parent5089fe689f17a3489b6be76588b8fc7f93d70e55 (diff)
downloadstred-go-10f847acc7087317b0fbe20b7cf3307a0fafab8a.tar
Changes the parsing API for subex to be more suitable to being part of a larger program
Diffstat (limited to 'subex')
-rw-r--r--subex/lex.go16
-rw-r--r--subex/main.go7
-rw-r--r--subex/parse.go91
3 files changed, 60 insertions, 54 deletions
diff --git a/subex/lex.go b/subex/lex.go
index f020b23..74bf370 100644
--- a/subex/lex.go
+++ b/subex/lex.go
@@ -5,11 +5,11 @@ import (
)
const eof rune = -1
-type RuneReader struct {
+type StringRuneReader struct {
input string
pos, width int
}
-func (l *RuneReader) next() rune {
+func (l *StringRuneReader) Next() rune {
if l.pos >= len(l.input) {
l.width = 0
return eof
@@ -19,16 +19,6 @@ func (l *RuneReader) next() rune {
l.pos += l.width
return r
}
-func (l *RuneReader) accept(chars string) bool {
- r := l.next()
- for _, char := range chars {
- if char == r {
- return true
- }
- }
- l.rewind()
- return false
-}
-func (l *RuneReader) rewind() {
+func (l *StringRuneReader) Rewind() {
l.pos -= l.width
}
diff --git a/subex/main.go b/subex/main.go
index 091625b..9824f10 100644
--- a/subex/main.go
+++ b/subex/main.go
@@ -131,7 +131,12 @@ func Main() {
panic("Expected: program [subex]")
}
program := os.Args[1]
- ast := Parse(program)
+ reader := &StringRuneReader {
+ input: program,
+ pos: 0,
+ width: 0,
+ }
+ ast := Parse(reader)
transducer := CompileTransducer(ast)
stdin := bufio.NewReader(os.Stdin);
diff --git a/subex/parse.go b/subex/parse.go
index d6ef995..e6efc2e 100644
--- a/subex/parse.go
+++ b/subex/parse.go
@@ -4,8 +4,24 @@ import (
"main/walk"
)
-func expectBracket(l *RuneReader, ifLeft walk.Atom, ifRight walk.Atom) walk.Atom {
- switch l.next() {
+type RuneReader interface {
+ Next() rune
+ Rewind()
+}
+
+func accept(l RuneReader, chars string) bool {
+ r := l.Next()
+ for _, char := range chars {
+ if char == r {
+ return true
+ }
+ }
+ l.Rewind()
+ return false
+}
+
+func expectBracket(l RuneReader, ifLeft walk.Atom, ifRight walk.Atom) walk.Atom {
+ switch l.Next() {
case '(':
return ifLeft
case ')':
@@ -16,7 +32,7 @@ func expectBracket(l *RuneReader, ifLeft walk.Atom, ifRight walk.Atom) walk.Atom
}
// Having just read termType, read in a bracket and return the corresponding Atom
-func parseTerminatorAtomLiteral(termType rune, l *RuneReader) walk.Atom {
+func parseTerminatorAtomLiteral(termType rune, l RuneReader) walk.Atom {
switch termType {
case '@':
return expectBracket(l, walk.ArrayBegin, walk.ArrayEnd)
@@ -34,41 +50,41 @@ func charIsDigit(c rune) bool {
}
// Parse a positive integer, reads digits 0-9 and stops at the first non-digit
-func parseInt(l *RuneReader) (output int) {
+func parseInt(l RuneReader) (output int) {
for {
- char := l.next()
+ char := l.Next()
if charIsDigit(char) {
output = output * 10 + int(char - '0')
} else {
break
}
}
- l.rewind()
+ l.Rewind()
return output
}
// Having just read {, read in and parse the range contents
-func parseRepeatRange(l *RuneReader) (output []ConvexRange) {
+func parseRepeatRange(l RuneReader) (output []ConvexRange) {
loop: for {
var start, end int
- char := l.next()
- l.rewind()
+ char := l.Next()
+ l.Rewind()
if char == '-' {
start = -1
} else {
start = parseInt(l)
}
- switch l.next() {
+ switch l.Next() {
case ',':
output = append(output, ConvexRange{start, start})
continue loop
case '-':
- char := l.next()
+ char := l.Next()
if charIsDigit(char) {
- l.rewind()
+ l.Rewind()
end = parseInt(l)
} else {
- l.rewind()
+ l.Rewind()
end = -1
}
case '}':
@@ -77,7 +93,7 @@ func parseRepeatRange(l *RuneReader) (output []ConvexRange) {
default:
panic("Invalid character in repeat specifier")
}
- switch l.next() {
+ switch l.Next() {
case ',':
output = append(output, ConvexRange{start, end})
continue loop
@@ -91,17 +107,17 @@ func parseRepeatRange(l *RuneReader) (output []ConvexRange) {
return output
}
-func parseReplacement(l *RuneReader) (output []OutputContent) {
+func parseReplacement(l RuneReader) (output []OutputContent) {
// TODO escaping
loop: for {
- r := l.next()
+ r := l.Next()
switch r {
case eof:
panic("Missing closing \"")
case '"':
break loop
case '$':
- slot := l.next()
+ slot := l.Next()
if slot == eof {
panic("Missing slot character")
}
@@ -116,13 +132,13 @@ func parseReplacement(l *RuneReader) (output []OutputContent) {
}
// Parse the contents of a range subex [] into a map
-func parseRangeSubex(l *RuneReader) map[walk.Atom]walk.Atom {
+func parseRangeSubex(l RuneReader) map[walk.Atom]walk.Atom {
// TODO escaping
parts := make(map[walk.Atom]walk.Atom)
var froms []walk.Atom
var hasTo bool
for {
- fromsStart := l.next()
+ fromsStart := l.Next()
if fromsStart == ']' {
hasTo = false
break
@@ -136,10 +152,10 @@ func parseRangeSubex(l *RuneReader) map[walk.Atom]walk.Atom {
continue
}
}
- if l.accept("-") {
- fromsEnd := l.next()
+ if accept(l, "-") {
+ fromsEnd := l.Next()
if fromsEnd == ']' || fromsEnd == '=' {
- l.rewind()
+ l.Rewind()
fromsEnd = fromsStart
}
for i := fromsStart; i <= fromsEnd; i += 1 {
@@ -156,7 +172,7 @@ func parseRangeSubex(l *RuneReader) map[walk.Atom]walk.Atom {
var tos []walk.Atom
if hasTo {
for {
- tosStart := l.next()
+ tosStart := l.Next()
if tosStart == ']' {
break
} else {
@@ -166,10 +182,10 @@ func parseRangeSubex(l *RuneReader) map[walk.Atom]walk.Atom {
continue
}
}
- if l.accept("-") {
- tosEnd := l.next()
+ if accept(l, "-") {
+ tosEnd := l.Next()
if tosEnd == ']' {
- l.rewind()
+ l.Rewind()
tosEnd = tosStart
}
for i := tosStart; i <= tosEnd; i += 1 {
@@ -192,22 +208,22 @@ func parseRangeSubex(l *RuneReader) map[walk.Atom]walk.Atom {
return parts
}
-func parseSubex(l *RuneReader, minPower int) SubexAST {
+func parseSubex(l RuneReader, minPower int) SubexAST {
var lhs SubexAST
- r := l.next()
+ r := l.Next()
switch r {
case eof:
return nil
case '(':
lhs = parseSubex(l, 0)
- if !l.accept(")") {
+ if !accept(l, ")") {
panic("Missing matching )")
}
case '[':
rangeParts := parseRangeSubex(l)
lhs = SubexASTRange {rangeParts}
case ')', '|', ';', '{', '+', '$':
- l.rewind()
+ l.Rewind()
return nil
case '"':
replacement := parseReplacement(l)
@@ -227,7 +243,7 @@ func parseSubex(l *RuneReader, minPower int) SubexAST {
continue loop
}
}
- r := l.next()
+ r := l.Next()
switch {
case r == '{' && minPower <= 8:
lhs = SubexASTRepeat {
@@ -245,7 +261,7 @@ func parseSubex(l *RuneReader, minPower int) SubexAST {
case r == '!' && minPower <= 8:
lhs = SubexASTNot {lhs}
case r == '$' && minPower <= 8:
- slot := l.next()
+ slot := l.Next()
if slot == eof {
panic("Missing slot character")
}
@@ -269,18 +285,13 @@ func parseSubex(l *RuneReader, minPower int) SubexAST {
delimiter: rhs,
}
default:
- l.rewind()
+ l.Rewind()
break loop
}
}
return lhs
}
-func Parse(input string) SubexAST {
- l := RuneReader {
- input: input,
- pos: 0,
- width: 0,
- }
- return parseSubex(&l, 0)
+func Parse(l RuneReader) SubexAST {
+ return parseSubex(l, 0)
}