<- Back to shtanton's homepage
aboutsummaryrefslogtreecommitdiff
path: root/main/lex.go
diff options
context:
space:
mode:
authorCharlie Stanton <charlie@shtanton.xyz>2022-08-26 11:51:46 +0100
committerCharlie Stanton <charlie@shtanton.xyz>2022-08-26 11:51:46 +0100
commitce5c224211a94bfd4c898b51d15febdf2ed9d6f2 (patch)
tree8d1c9db463d9c1793bd3aad2b6875a22d4add90c /main/lex.go
parentececdecdaf6c6f6295d31a92f0663d703e7760dd (diff)
downloadstred-go-ce5c224211a94bfd4c898b51d15febdf2ed9d6f2.tar
Refactors some stuff and adds lexing and parsing
Diffstat (limited to 'main/lex.go')
-rw-r--r--main/lex.go224
1 files changed, 224 insertions, 0 deletions
diff --git a/main/lex.go b/main/lex.go
new file mode 100644
index 0000000..6977f8a
--- /dev/null
+++ b/main/lex.go
@@ -0,0 +1,224 @@
+package main
+
+import (
+ "fmt"
+ "strings"
+ "unicode/utf8"
+)
+
+type stateFunc func(*lexer) stateFunc
+
+type lexer struct {
+ input string
+ start int
+ pos int
+ width int
+ tokenStream chan Token
+}
+
+func (l *lexer) run() {
+ for state := lexCommand; state != nil; {
+ state = state(l)
+ }
+ close(l.tokenStream)
+}
+
+func (l *lexer) emit(t TokenType) {
+ l.tokenStream <- Token{
+ typ: t,
+ val: l.input[l.start:l.pos],
+ }
+ l.start = l.pos
+}
+
+func (l *lexer) errorf(format string, args ...interface{}) stateFunc {
+ l.tokenStream <- Token{
+ typ: TokenErr,
+ val: fmt.Sprintf(format, args...),
+ }
+ return nil
+}
+
+const eof rune = -1
+
+func (l *lexer) next() rune {
+ if l.pos >= len(l.input) {
+ l.width = 0
+ return eof
+ }
+ var r rune
+ r, l.width = utf8.DecodeRuneInString(l.input[l.pos:])
+ l.pos += l.width
+ return r
+}
+
+func (l *lexer) backup() {
+ l.pos -= l.width
+}
+
+func (l *lexer) ignore() {
+ l.start = l.pos
+}
+
+func (l *lexer) reset() {
+ l.pos = l.start
+}
+
+func (l *lexer) peek() rune {
+ w := l.width
+ r := l.next()
+ l.backup()
+ l.width = w
+ return r
+}
+
+func (l *lexer) accept(valid string) bool {
+ if strings.IndexRune(valid, l.next()) >= 0 {
+ return true
+ }
+ l.backup()
+ return false
+}
+
+func (l *lexer) acceptAll(valid string) {
+ for strings.IndexRune(valid, l.next()) >= 0 {}
+ l.backup()
+}
+
+func (l *lexer) acceptPassing(valid func(rune) bool) bool {
+ if valid(l.next()) {
+ return true
+ }
+ l.backup()
+ return false
+}
+
+func (l *lexer) acceptAllPassing(valid func(rune) bool) {
+ for valid(l.next()) {}
+ l.backup()
+}
+
+type TokenType int
+
+const (
+ TokenErr TokenType = iota // Lexing error
+ TokenEOF // end of file
+ TokenSemicolon // ;
+ TokenLParen // (
+ TokenRParen // )
+ TokenLBrace // {
+ TokenRBrace // }
+ TokenLBrack // [
+ TokenRBrack // ]
+ TokenCommand // A command character
+ TokenHash // #
+ TokenAt // @
+ TokenDot // .
+ TokenAst // *
+ TokenPatternStringIndex // A string index in a pattern
+ TokenPatternIntegerIndex // An integer index in a pattern
+)
+
+type Token struct {
+ typ TokenType
+ val string
+}
+
+func (t Token) String() string {
+ switch t.typ {
+ case TokenEOF:
+ return "EOF"
+ case TokenErr:
+ return t.val
+ }
+ if len(t.val) > 10 {
+ return fmt.Sprintf("%.10q...", t.val)
+ }
+ return fmt.Sprintf("%q", t.val)
+}
+
+func Lex(input string) chan Token {
+ l := &lexer{
+ input: input,
+ tokenStream: make(chan Token),
+ }
+ go l.run()
+ return l.tokenStream
+}
+
+const (
+ whitespace string = " \t"
+ whitespaceNewlines string = " \t\r\n"
+)
+
+func isAlpha(r rune) bool {
+ return ('a' <= r && r < 'z') || ('A' <= r && r <= 'Z')
+}
+func isDigit(r rune) bool {
+ return '0' <= r && r <= '9'
+}
+func isAlphaNumeric(r rune) bool {
+ return isAlpha(r) || isDigit(r)
+}
+func isStringIndexChar(r rune) bool {
+ return isAlphaNumeric(r) || r == '_' || r == '-'
+}
+
+func lexCommand(l *lexer) stateFunc {
+ l.acceptAll(whitespace)
+ l.ignore()
+ if l.peek() == eof {
+ l.emit(TokenEOF)
+ return nil
+ }
+ r := l.next()
+ switch r {
+ case '#':
+ l.emit(TokenHash)
+ return lexPatternStringIndex
+ case '@':
+ l.emit(TokenAt)
+ return lexPatternIntegerIndex
+ case '.':
+ l.emit(TokenDot)
+ return lexCommand
+ case '*':
+ l.emit(TokenAst)
+ return lexCommand
+ case '{':
+ l.emit(TokenLBrace)
+ return lexCommand
+ case '}':
+ l.emit(TokenRBrace)
+ return lexCommandEnd
+ }
+ if isAlpha(r) {
+ l.emit(TokenCommand)
+ return lexCommandEnd
+ }
+ return l.errorf("Expected command found something else")
+}
+
+func lexPatternStringIndex(l *lexer) stateFunc {
+ l.acceptAllPassing(isStringIndexChar)
+ l.emit(TokenPatternStringIndex)
+ return lexCommand
+}
+
+func lexPatternIntegerIndex(l *lexer) stateFunc {
+ l.acceptAllPassing(isDigit)
+ l.emit(TokenPatternIntegerIndex)
+ return lexCommand
+}
+
+func lexCommandEnd(l *lexer) stateFunc {
+ if l.peek() == eof {
+ l.emit(TokenEOF)
+ return nil
+ }
+ if l.accept(";") {
+ l.emit(TokenSemicolon)
+ return lexCommand
+ }
+ return l.errorf("Expected ; found something else")
+}