<- Back to shtanton's homepage
aboutsummaryrefslogtreecommitdiff
path: root/main
diff options
context:
space:
mode:
authorCharlie Stanton <charlie@shtanton.xyz>2022-08-26 11:51:46 +0100
committerCharlie Stanton <charlie@shtanton.xyz>2022-08-26 11:51:46 +0100
commitce5c224211a94bfd4c898b51d15febdf2ed9d6f2 (patch)
tree8d1c9db463d9c1793bd3aad2b6875a22d4add90c /main
parentececdecdaf6c6f6295d31a92f0663d703e7760dd (diff)
downloadstred-go-ce5c224211a94bfd4c898b51d15febdf2ed9d6f2.tar
Refactors some stuff and adds lexing and parsing
Diffstat (limited to 'main')
-rw-r--r--main/command.go84
-rw-r--r--main/filter.go52
-rw-r--r--main/json.go5
-rw-r--r--main/lex.go224
-rw-r--r--main/main.go119
-rw-r--r--main/parse.go145
-rw-r--r--main/pathfilter.go31
-rw-r--r--main/pathfilterast.go56
8 files changed, 593 insertions, 123 deletions
diff --git a/main/command.go b/main/command.go
index 560d3c3..bad5b1e 100644
--- a/main/command.go
+++ b/main/command.go
@@ -2,24 +2,26 @@ package main
type PrintValueCommand struct {}
func (cmd PrintValueCommand) exec(state *ProgramState) {
- state.out <- state.space
+ for _, item := range state.space {
+ state.out <- item
+ }
}
type ToggleTerminalCommand struct {}
func (cmd ToggleTerminalCommand) exec(state *ProgramState) {
- terminal, isTerminal := state.space.value.(TerminalValue)
- if !isTerminal {
- return
+ toggled := map[TerminalValue]TerminalValue {
+ ArrayBegin: MapBegin,
+ ArrayEnd: MapEnd,
+ MapBegin: ArrayBegin,
+ MapEnd: ArrayEnd,
}
- switch terminal {
- case ArrayBegin:
- state.space.value = MapBegin
- case ArrayEnd:
- state.space.value = MapEnd
- case MapBegin:
- state.space.value = ArrayBegin
- case MapEnd:
- state.space.value = ArrayEnd
+
+ for i := range state.space {
+ terminal, isTerminal := state.space[i].value.(TerminalValue)
+ if !isTerminal {
+ continue
+ }
+ state.space[i].value = toggled[terminal]
}
}
@@ -28,11 +30,63 @@ type FilteredCommand struct {
command Command
}
func (cmd FilteredCommand) exec(state *ProgramState) {
- if cmd.filter.exec(state) {
- cmd.command.exec(state)
+ for _, item := range state.space {
+ if cmd.filter.exec(item) {
+ cmd.command.exec(state)
+ return
+ }
+ }
+}
+
+type SequenceCommand struct {
+ commands []Command
+}
+func (cmd SequenceCommand) exec(state *ProgramState) {
+ for _, command := range cmd.commands {
+ command.exec(state)
+ }
+}
+
+type AppendCommand struct {
+ values []WalkValue
+}
+func (cmd AppendCommand) exec(state *ProgramState) {
+ for _, value := range cmd.values {
+ state.space = append(state.space, WalkItem {
+ path: nil,
+ value: value,
+ })
}
}
+type PrependCommand struct {
+ values []WalkValue
+}
+func (cmd PrependCommand) exec(state *ProgramState) {
+ var newItems []WalkItem
+ for _, value := range cmd.values {
+ newItems = append(newItems, WalkItem {
+ path: nil,
+ value: value,
+ })
+ }
+ state.space = append(newItems, state.space...)
+}
+
+type PrintLiteralsCommand struct {
+ items []WalkItem
+}
+func (cmd PrintLiteralsCommand) exec(state *ProgramState) {
+ for _, item := range cmd.items {
+ state.out <- item
+ }
+}
+
+type DeleteAllCommand struct {}
+func (cmd DeleteAllCommand) exec(state *ProgramState) {
+ state.space = nil
+}
+
type Command interface {
exec(*ProgramState)
} \ No newline at end of file
diff --git a/main/filter.go b/main/filter.go
index 95e6d82..662fa7b 100644
--- a/main/filter.go
+++ b/main/filter.go
@@ -3,10 +3,10 @@ package main
type PathFilter struct {
initial PathFilterState
}
-func (filter PathFilter) exec(state *ProgramState) bool {
+func (filter PathFilter) exec(space WalkItem) bool {
pathFilterState := make(map[PathFilterState]struct{})
pathFilterState[filter.initial] = struct{}{}
- for _, segment := range state.space.path {
+ for _, segment := range space.path {
nextPathFilterState := make(map[PathFilterState]struct{})
for curState := range pathFilterState {
for nextState := range curState.eat(segment) {
@@ -23,12 +23,58 @@ func (filter PathFilter) exec(state *ProgramState) bool {
return false
}
+type MapTerminalFilter struct {}
+func (filter MapTerminalFilter) exec(space WalkItem) bool {
+ terminal, isTerminal := space.value.(TerminalValue)
+ if !isTerminal {
+ return false
+ }
+ return terminal == MapBegin || terminal == MapEnd
+}
+
+type NonTerminalFilter struct {}
+func (filter NonTerminalFilter) exec(space WalkItem) bool {
+ _, isTerminal := space.value.(TerminalValue)
+ return !isTerminal
+}
+
type RangeFilter struct {
start Filter
end Filter
active bool
}
+func (filter *RangeFilter) exec(space WalkItem) bool {
+ if filter.active {
+ if filter.end.exec(space) {
+ filter.active = false
+ }
+ return true
+ } else {
+ if filter.start.exec(space) {
+ filter.active = true
+ }
+ return filter.active
+ }
+}
+
+type BeginTerminalFilter struct {}
+func (filter BeginTerminalFilter) exec(space WalkItem) bool {
+ terminal, isTerminal := space.value.(TerminalValue)
+ if !isTerminal {
+ return false
+ }
+ return terminal == ArrayBegin || terminal == MapBegin
+}
+
+type EndTerminalFilter struct {}
+func (filter EndTerminalFilter) exec(space WalkItem) bool {
+ terminal, isTerminal := space.value.(TerminalValue)
+ if !isTerminal {
+ return false
+ }
+ return terminal == ArrayEnd || terminal == MapEnd
+}
type Filter interface {
- exec(*ProgramState) bool
+ exec(WalkItem) bool
} \ No newline at end of file
diff --git a/main/json.go b/main/json.go
index 66ca5d5..77c3733 100644
--- a/main/json.go
+++ b/main/json.go
@@ -6,11 +6,6 @@ import (
"fmt"
)
-type WalkItem struct {
- value WalkValue
- path Path
-}
-
type WalkItemStream struct {
channel chan WalkItem
rewinds []WalkItem
diff --git a/main/lex.go b/main/lex.go
new file mode 100644
index 0000000..6977f8a
--- /dev/null
+++ b/main/lex.go
@@ -0,0 +1,224 @@
+package main
+
+import (
+ "fmt"
+ "strings"
+ "unicode/utf8"
+)
+
+type stateFunc func(*lexer) stateFunc
+
+type lexer struct {
+ input string
+ start int
+ pos int
+ width int
+ tokenStream chan Token
+}
+
+func (l *lexer) run() {
+ for state := lexCommand; state != nil; {
+ state = state(l)
+ }
+ close(l.tokenStream)
+}
+
+func (l *lexer) emit(t TokenType) {
+ l.tokenStream <- Token{
+ typ: t,
+ val: l.input[l.start:l.pos],
+ }
+ l.start = l.pos
+}
+
+func (l *lexer) errorf(format string, args ...interface{}) stateFunc {
+ l.tokenStream <- Token{
+ typ: TokenErr,
+ val: fmt.Sprintf(format, args...),
+ }
+ return nil
+}
+
+const eof rune = -1
+
+func (l *lexer) next() rune {
+ if l.pos >= len(l.input) {
+ l.width = 0
+ return eof
+ }
+ var r rune
+ r, l.width = utf8.DecodeRuneInString(l.input[l.pos:])
+ l.pos += l.width
+ return r
+}
+
+func (l *lexer) backup() {
+ l.pos -= l.width
+}
+
+func (l *lexer) ignore() {
+ l.start = l.pos
+}
+
+func (l *lexer) reset() {
+ l.pos = l.start
+}
+
+func (l *lexer) peek() rune {
+ w := l.width
+ r := l.next()
+ l.backup()
+ l.width = w
+ return r
+}
+
+func (l *lexer) accept(valid string) bool {
+ if strings.IndexRune(valid, l.next()) >= 0 {
+ return true
+ }
+ l.backup()
+ return false
+}
+
+func (l *lexer) acceptAll(valid string) {
+ for strings.IndexRune(valid, l.next()) >= 0 {}
+ l.backup()
+}
+
+func (l *lexer) acceptPassing(valid func(rune) bool) bool {
+ if valid(l.next()) {
+ return true
+ }
+ l.backup()
+ return false
+}
+
+func (l *lexer) acceptAllPassing(valid func(rune) bool) {
+ for valid(l.next()) {}
+ l.backup()
+}
+
+type TokenType int
+
+const (
+ TokenErr TokenType = iota // Lexing error
+ TokenEOF // end of file
+ TokenSemicolon // ;
+ TokenLParen // (
+ TokenRParen // )
+ TokenLBrace // {
+ TokenRBrace // }
+ TokenLBrack // [
+ TokenRBrack // ]
+ TokenCommand // A command character
+ TokenHash // #
+ TokenAt // @
+ TokenDot // .
+ TokenAst // *
+ TokenPatternStringIndex // A string index in a pattern
+ TokenPatternIntegerIndex // An integer index in a pattern
+)
+
+type Token struct {
+ typ TokenType
+ val string
+}
+
+func (t Token) String() string {
+ switch t.typ {
+ case TokenEOF:
+ return "EOF"
+ case TokenErr:
+ return t.val
+ }
+ if len(t.val) > 10 {
+ return fmt.Sprintf("%.10q...", t.val)
+ }
+ return fmt.Sprintf("%q", t.val)
+}
+
+func Lex(input string) chan Token {
+ l := &lexer{
+ input: input,
+ tokenStream: make(chan Token),
+ }
+ go l.run()
+ return l.tokenStream
+}
+
+const (
+ whitespace string = " \t"
+ whitespaceNewlines string = " \t\r\n"
+)
+
+func isAlpha(r rune) bool {
+ return ('a' <= r && r < 'z') || ('A' <= r && r <= 'Z')
+}
+func isDigit(r rune) bool {
+ return '0' <= r && r <= '9'
+}
+func isAlphaNumeric(r rune) bool {
+ return isAlpha(r) || isDigit(r)
+}
+func isStringIndexChar(r rune) bool {
+ return isAlphaNumeric(r) || r == '_' || r == '-'
+}
+
+func lexCommand(l *lexer) stateFunc {
+ l.acceptAll(whitespace)
+ l.ignore()
+ if l.peek() == eof {
+ l.emit(TokenEOF)
+ return nil
+ }
+ r := l.next()
+ switch r {
+ case '#':
+ l.emit(TokenHash)
+ return lexPatternStringIndex
+ case '@':
+ l.emit(TokenAt)
+ return lexPatternIntegerIndex
+ case '.':
+ l.emit(TokenDot)
+ return lexCommand
+ case '*':
+ l.emit(TokenAst)
+ return lexCommand
+ case '{':
+ l.emit(TokenLBrace)
+ return lexCommand
+ case '}':
+ l.emit(TokenRBrace)
+ return lexCommandEnd
+ }
+ if isAlpha(r) {
+ l.emit(TokenCommand)
+ return lexCommandEnd
+ }
+ return l.errorf("Expected command found something else")
+}
+
+func lexPatternStringIndex(l *lexer) stateFunc {
+ l.acceptAllPassing(isStringIndexChar)
+ l.emit(TokenPatternStringIndex)
+ return lexCommand
+}
+
+func lexPatternIntegerIndex(l *lexer) stateFunc {
+ l.acceptAllPassing(isDigit)
+ l.emit(TokenPatternIntegerIndex)
+ return lexCommand
+}
+
+func lexCommandEnd(l *lexer) stateFunc {
+ if l.peek() == eof {
+ l.emit(TokenEOF)
+ return nil
+ }
+ if l.accept(";") {
+ l.emit(TokenSemicolon)
+ return lexCommand
+ }
+ return l.errorf("Expected ; found something else")
+}
diff --git a/main/main.go b/main/main.go
index 31e46c6..5503fb1 100644
--- a/main/main.go
+++ b/main/main.go
@@ -1,7 +1,6 @@
package main
import (
- "fmt"
"os"
"bufio"
)
@@ -23,117 +22,67 @@ type ValueString string
type WalkValue interface {}
+type WalkItem struct {
+ value WalkValue
+ path Path
+}
+
type Program []Command
type ProgramState struct {
- space WalkItem
+ space []WalkItem
in chan WalkItem
out chan WalkItem
program []Command
}
-type StringSegmentPathFilterAST struct {
- index string
-}
-func (ast StringSegmentPathFilterAST) compileWith(next PathFilterState) PathFilterState {
- return StringSegmentPathFilter {
- index: ast.index,
- next: next,
+func main() {
+ quiet := false
+ var input string
+ hasInput := false
+
+ for i := 1; i < len(os.Args); i += 1 {
+ switch os.Args[i] {
+ case "-n":
+ quiet = true
+ continue
+ }
+ if i < len(os.Args) - 1 {
+ panic("Unexpected arguments after program")
+ }
+ input = os.Args[i]
+ hasInput = true
}
-}
-
-type RepeatPathFilterAST struct {
- content PathFilterAST
-}
-func (ast RepeatPathFilterAST) compileWith(next PathFilterState) PathFilterState {
- nextGroup := &GroupPathFilter{}
- repeatStart := ast.content.compileWith(nextGroup)
- nextGroup.filters = []PathFilterState{next, repeatStart}
- return nextGroup
-}
-
-type SequencePathFilterAST struct {
- sequence []PathFilterAST
-}
-func (ast SequencePathFilterAST) compileWith(next PathFilterState) PathFilterState {
- for i := len(ast.sequence) - 1; i >= 0; i -= 1 {
- next = ast.sequence[i].compileWith(next)
+ if !hasInput {
+ panic("Missing program")
}
- return next
-}
-type AnySegmentPathFilterAST struct {}
-func (ast AnySegmentPathFilterAST) compileWith(next PathFilterState) PathFilterState {
- return AnySegmentPathFilter{next: next}
-}
+ tokens := Lex(input)
+ program := Parse(tokens)
-type PathFilterAST interface {
- compileWith(PathFilterState) PathFilterState
-}
-
-func compilePathFilterAST(ast PathFilterAST) PathFilter {
- return PathFilter{
- initial: ast.compileWith(NonePathFilter{}),
- }
-}
-
-func main() {
- if len(os.Args) < 2 {
- fmt.Println("Missing program arg")
- return
- }
- //input := os.Args[1]
- //tokens := Lex(input)
- //program := Parse(tokens)
-
stdin := bufio.NewReader(os.Stdin)
dataStream := Json(stdin)
-
- var allRemainingPathFilter AnySegmentPathFilter
- {
- g := GroupPathFilter {
- filters: []PathFilterState{NonePathFilter{}},
- }
- allRemainingPathFilter = AnySegmentPathFilter {
- next: PathFilterState(&g),
- }
- g.filters = append(g.filters, PathFilterState(&allRemainingPathFilter))
- }
state := ProgramState {
in: dataStream,
out: make(chan WalkItem),
- program: []Command {
- FilteredCommand {
- filter: compilePathFilterAST(
- StringSegmentPathFilterAST {"people"},
- ),
- command: PrintValueCommand{},
- },
- FilteredCommand {
- filter: compilePathFilterAST(
- SequencePathFilterAST {
- []PathFilterAST{
- StringSegmentPathFilterAST {"people"},
- AnySegmentPathFilterAST{},
- StringSegmentPathFilterAST {"age"},
- },
- },
- ),
- command: PrintValueCommand{},
- },
- },
+ program: program,
}
go func () {
for walkItem := range dataStream {
- state.space = walkItem
+ state.space = []WalkItem{walkItem}
for _, cmd := range state.program {
cmd.exec(&state)
}
+ if !quiet {
+ for _, item := range state.space {
+ state.out <- item
+ }
+ }
}
close(state.out)
}()
JsonOut(state.out)
-}
+} \ No newline at end of file
diff --git a/main/parse.go b/main/parse.go
new file mode 100644
index 0000000..e876010
--- /dev/null
+++ b/main/parse.go
@@ -0,0 +1,145 @@
+package main
+
+import (
+ "strings"
+ "strconv"
+ "fmt"
+)
+
+type parser struct {
+ tokenStream chan Token
+ rewinds []Token
+}
+func (p *parser) next() Token {
+ if len(p.rewinds) == 0 {
+ return <- p.tokenStream
+ }
+ token := p.rewinds[len(p.rewinds)-1]
+ p.rewinds = p.rewinds[:len(p.rewinds)-1]
+ return token
+}
+func (p *parser) rewind(token Token) {
+ p.rewinds = append(p.rewinds, token)
+}
+func (p *parser) peek() Token {
+ token := p.next()
+ p.rewind(token)
+ return token
+}
+
+// TODO: make a pratt parser
+func (p *parser) parsePathPatternFilter() PathFilterAST {
+ var segments []PathFilterAST
+ loop: for {
+ token := p.next()
+ switch token.typ {
+ case TokenHash:
+ stringIndex := p.next()
+ if stringIndex.typ != TokenPatternStringIndex {
+ panic("Expected string index after # in pattern")
+ }
+ segments = append(segments, StringSegmentPathFilterAST{stringIndex.val})
+ case TokenAt:
+ intIndex := p.next()
+ if intIndex.typ != TokenPatternIntegerIndex {
+ panic("Expected integer index after @ in pattern")
+ }
+ index, err := strconv.Atoi(intIndex.val)
+ if err != nil {
+ panic("Expected integer index after @ in pattern")
+ }
+ segments = append(segments, IntegerSegmentPathFilterAST{index})
+ case TokenDot:
+ segments = append(segments, AnySegmentPathFilterAST{})
+ case TokenAst:
+ if len(segments) == 0 {
+ panic("Invalid * in pattern, * must go after something")
+ }
+ segments[len(segments) - 1] = RepeatPathFilterAST {segments[len(segments)-1]}
+ default:
+ p.rewind(token)
+ break loop
+ }
+ }
+ return SequencePathFilterAST {segments}
+}
+
+// TODO: should only return a single filter
+func (p *parser) parseFilter() []Filter {
+ var filters []Filter
+ token := p.next()
+ switch token.typ {
+ case TokenHash, TokenAt, TokenDot:
+ p.rewind(token)
+ filterAst := p.parsePathPatternFilter()
+ filters = append(filters, compilePathFilterAST(filterAst))
+ token = p.next()
+ }
+ if len(filters) == 0 {
+ panic("Missing filter")
+ }
+ p.rewind(token)
+ return filters
+}
+
+func (p *parser) parseBasicCommand(commandChar rune) Command {
+ switch commandChar {
+ case 'p':
+ return PrintValueCommand{}
+ case 'd':
+ return DeleteAllCommand{}
+ default:
+ panic("Invalid command")
+ }
+}
+
+func (p *parser) parseCommand() Command {
+ token := p.next()
+ switch token.typ {
+ case TokenHash, TokenAt, TokenDot:
+ p.rewind(token)
+ filters := p.parseFilter()
+ command := p.parseCommand()
+ for _, filter := range filters {
+ command = FilteredCommand {
+ filter: filter,
+ command: command,
+ }
+ }
+ return command
+ case TokenCommand:
+ commandChar, _, err := strings.NewReader(token.val).ReadRune()
+ if err != nil {
+ panic("Error reading a command character!?")
+ }
+ return p.parseBasicCommand(commandChar)
+ default:
+ fmt.Println(token)
+ panic("Invalid token, expected command")
+ }
+}
+
+func (p *parser) parseCommands() []Command {
+ var commands []Command
+ for {
+ nextToken := p.peek()
+ if nextToken.typ == TokenEOF || nextToken.typ == TokenRBrace {
+ return commands
+ }
+ commands = append(commands, p.parseCommand())
+ semicolon := p.next()
+ if semicolon.typ == TokenEOF || semicolon.typ == TokenRBrace {
+ return commands
+ }
+ if semicolon.typ != TokenSemicolon {
+ panic("Expected ; after command")
+ }
+ }
+}
+
+func Parse(tokens chan Token) []Command {
+ p := parser {
+ tokenStream: tokens,
+ }
+ return p.parseCommands()
+}
diff --git a/main/pathfilter.go b/main/pathfilter.go
index 7b6c64f..b64872e 100644
--- a/main/pathfilter.go
+++ b/main/pathfilter.go
@@ -1,20 +1,5 @@
package main
-type MapTerminalFilter struct {}
-func (filter MapTerminalFilter) exec(state *ProgramState) bool {
- terminal, isTerminal := state.space.value.(TerminalValue)
- if !isTerminal {
- return false
- }
- return terminal == MapBegin || terminal == MapEnd
-}
-
-type NonTerminalFilter struct {}
-func (filter NonTerminalFilter) exec(state *ProgramState) bool {
- _, isTerminal := state.space.value.(TerminalValue)
- return !isTerminal
-}
-
type AnySegmentPathFilter struct {
next PathFilterState
}
@@ -72,6 +57,22 @@ func (filter StringSegmentPathFilter) accept() bool {
return false
}
+type IntegerSegmentPathFilter struct {
+ index int
+ next PathFilterState
+}
+func (filter IntegerSegmentPathFilter) eat(segment PathSegment) map[PathFilterState]struct{} {
+ i, isInteger := segment.(int)
+ res := make(map[PathFilterState]struct{})
+ if isInteger && i == filter.index {
+ res[filter.next] = struct{}{}
+ }
+ return res
+}
+func (filter IntegerSegmentPathFilter) accept() bool {
+ return false
+}
+
type PathFilterState interface {
eat(PathSegment) map[PathFilterState]struct{}
accept() bool
diff --git a/main/pathfilterast.go b/main/pathfilterast.go
new file mode 100644
index 0000000..c2ddc7f
--- /dev/null
+++ b/main/pathfilterast.go
@@ -0,0 +1,56 @@
+package main
+
+type StringSegmentPathFilterAST struct {
+ index string
+}
+func (ast StringSegmentPathFilterAST) compileWith(next PathFilterState) PathFilterState {
+ return StringSegmentPathFilter {
+ index: ast.index,
+ next: next,
+ }
+}
+
+type IntegerSegmentPathFilterAST struct {
+ index int
+}
+func (ast IntegerSegmentPathFilterAST) compileWith(next PathFilterState) PathFilterState {
+ return IntegerSegmentPathFilter {
+ index: ast.index,
+ next: next,
+ }
+}
+
+type RepeatPathFilterAST struct {
+ content PathFilterAST
+}
+func (ast RepeatPathFilterAST) compileWith(next PathFilterState) PathFilterState {
+ nextGroup := &GroupPathFilter{}
+ repeatStart := ast.content.compileWith(nextGroup)
+ nextGroup.filters = []PathFilterState{next, repeatStart}
+ return nextGroup
+}
+
+type SequencePathFilterAST struct {
+ sequence []PathFilterAST
+}
+func (ast SequencePathFilterAST) compileWith(next PathFilterState) PathFilterState {
+ for i := len(ast.sequence) - 1; i >= 0; i -= 1 {
+ next = ast.sequence[i].compileWith(next)
+ }
+ return next
+}
+
+type AnySegmentPathFilterAST struct {}
+func (ast AnySegmentPathFilterAST) compileWith(next PathFilterState) PathFilterState {
+ return AnySegmentPathFilter{next: next}
+}
+
+type PathFilterAST interface {
+ compileWith(PathFilterState) PathFilterState
+}
+
+func compilePathFilterAST(ast PathFilterAST) PathFilter {
+ return PathFilter{
+ initial: ast.compileWith(NonePathFilter{}),
+ }
+}