From ce5c224211a94bfd4c898b51d15febdf2ed9d6f2 Mon Sep 17 00:00:00 2001 From: Charlie Stanton Date: Fri, 26 Aug 2022 11:51:46 +0100 Subject: Refactors some stuff and adds lexing and parsing --- main/lex.go | 224 ++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 224 insertions(+) create mode 100644 main/lex.go (limited to 'main/lex.go') diff --git a/main/lex.go b/main/lex.go new file mode 100644 index 0000000..6977f8a --- /dev/null +++ b/main/lex.go @@ -0,0 +1,224 @@ +package main + +import ( + "fmt" + "strings" + "unicode/utf8" +) + +type stateFunc func(*lexer) stateFunc + +type lexer struct { + input string + start int + pos int + width int + tokenStream chan Token +} + +func (l *lexer) run() { + for state := lexCommand; state != nil; { + state = state(l) + } + close(l.tokenStream) +} + +func (l *lexer) emit(t TokenType) { + l.tokenStream <- Token{ + typ: t, + val: l.input[l.start:l.pos], + } + l.start = l.pos +} + +func (l *lexer) errorf(format string, args ...interface{}) stateFunc { + l.tokenStream <- Token{ + typ: TokenErr, + val: fmt.Sprintf(format, args...), + } + return nil +} + +const eof rune = -1 + +func (l *lexer) next() rune { + if l.pos >= len(l.input) { + l.width = 0 + return eof + } + var r rune + r, l.width = utf8.DecodeRuneInString(l.input[l.pos:]) + l.pos += l.width + return r +} + +func (l *lexer) backup() { + l.pos -= l.width +} + +func (l *lexer) ignore() { + l.start = l.pos +} + +func (l *lexer) reset() { + l.pos = l.start +} + +func (l *lexer) peek() rune { + w := l.width + r := l.next() + l.backup() + l.width = w + return r +} + +func (l *lexer) accept(valid string) bool { + if strings.IndexRune(valid, l.next()) >= 0 { + return true + } + l.backup() + return false +} + +func (l *lexer) acceptAll(valid string) { + for strings.IndexRune(valid, l.next()) >= 0 {} + l.backup() +} + +func (l *lexer) acceptPassing(valid func(rune) bool) bool { + if valid(l.next()) { + return true + } + l.backup() + return false +} + +func (l *lexer) acceptAllPassing(valid func(rune) bool) { + for valid(l.next()) {} + l.backup() +} + +type TokenType int + +const ( + TokenErr TokenType = iota // Lexing error + TokenEOF // end of file + TokenSemicolon // ; + TokenLParen // ( + TokenRParen // ) + TokenLBrace // { + TokenRBrace // } + TokenLBrack // [ + TokenRBrack // ] + TokenCommand // A command character + TokenHash // # + TokenAt // @ + TokenDot // . + TokenAst // * + TokenPatternStringIndex // A string index in a pattern + TokenPatternIntegerIndex // An integer index in a pattern +) + +type Token struct { + typ TokenType + val string +} + +func (t Token) String() string { + switch t.typ { + case TokenEOF: + return "EOF" + case TokenErr: + return t.val + } + if len(t.val) > 10 { + return fmt.Sprintf("%.10q...", t.val) + } + return fmt.Sprintf("%q", t.val) +} + +func Lex(input string) chan Token { + l := &lexer{ + input: input, + tokenStream: make(chan Token), + } + go l.run() + return l.tokenStream +} + +const ( + whitespace string = " \t" + whitespaceNewlines string = " \t\r\n" +) + +func isAlpha(r rune) bool { + return ('a' <= r && r < 'z') || ('A' <= r && r <= 'Z') +} +func isDigit(r rune) bool { + return '0' <= r && r <= '9' +} +func isAlphaNumeric(r rune) bool { + return isAlpha(r) || isDigit(r) +} +func isStringIndexChar(r rune) bool { + return isAlphaNumeric(r) || r == '_' || r == '-' +} + +func lexCommand(l *lexer) stateFunc { + l.acceptAll(whitespace) + l.ignore() + if l.peek() == eof { + l.emit(TokenEOF) + return nil + } + r := l.next() + switch r { + case '#': + l.emit(TokenHash) + return lexPatternStringIndex + case '@': + l.emit(TokenAt) + return lexPatternIntegerIndex + case '.': + l.emit(TokenDot) + return lexCommand + case '*': + l.emit(TokenAst) + return lexCommand + case '{': + l.emit(TokenLBrace) + return lexCommand + case '}': + l.emit(TokenRBrace) + return lexCommandEnd + } + if isAlpha(r) { + l.emit(TokenCommand) + return lexCommandEnd + } + return l.errorf("Expected command found something else") +} + +func lexPatternStringIndex(l *lexer) stateFunc { + l.acceptAllPassing(isStringIndexChar) + l.emit(TokenPatternStringIndex) + return lexCommand +} + +func lexPatternIntegerIndex(l *lexer) stateFunc { + l.acceptAllPassing(isDigit) + l.emit(TokenPatternIntegerIndex) + return lexCommand +} + +func lexCommandEnd(l *lexer) stateFunc { + if l.peek() == eof { + l.emit(TokenEOF) + return nil + } + if l.accept(";") { + l.emit(TokenSemicolon) + return lexCommand + } + return l.errorf("Expected ; found something else") +} -- cgit v1.2.3