From 96812b9ea732cc7ae26efce4568c19aec0000abc Mon Sep 17 00:00:00 2001 From: Charlie Stanton Date: Wed, 21 Sep 2022 19:37:02 +0100 Subject: Adds some new commands --- main/lex.go | 194 +++++++++++++++++++++++++++++++++++++++++++++++++++++++++--- 1 file changed, 187 insertions(+), 7 deletions(-) (limited to 'main/lex.go') diff --git a/main/lex.go b/main/lex.go index 91231ed..0daf2d1 100644 --- a/main/lex.go +++ b/main/lex.go @@ -64,6 +64,16 @@ func (l *lexer) reset() { l.pos = l.start } +func (l *lexer) expect(valid string) bool { + for _, r := range valid { + if l.next() != r { + l.backup() + return false + } + } + return true +} + func (l *lexer) peek() rune { w := l.width r := l.next() @@ -116,6 +126,7 @@ const ( TokenDot // . TokenAst // * TokenBar // | + TokenOr // || TokenAnd // && TokenHat // ^ TokenDollar // $ @@ -123,6 +134,17 @@ const ( TokenHatDollar // ^$ TokenExclamation // ! TokenTilde // ~ + TokenDoubleQuote // " + TokenStringLiteral // A string literal, not including the " either side + TokenNullLiteral // null + TokenTrueLiteral // true + TokenFalseLiteral // false + TokenColon // : + TokenComma // , + TokenSubstituteDelimiter // usually / but could be something else + TokenSubstitutePlaceholder // \1, \2 etc. + TokenTerminalLiteral // One of {, }, [, ] + TokenNumberLiteral // A number literal TokenPatternStringIndex // A string index in a pattern TokenPatternIntegerIndex // An integer index in a pattern ) @@ -183,10 +205,12 @@ func lexCommand(l *lexer) stateFunc { switch r { case '#': l.emit(TokenHash) - return lexPatternStringIndex + lexPatternStringIndex(l) + return lexCommand case '@': l.emit(TokenAt) - return lexPatternIntegerIndex + lexPatternIntegerIndex(l) + return lexCommand case '.': l.emit(TokenDot) return lexCommand @@ -194,7 +218,17 @@ func lexCommand(l *lexer) stateFunc { l.emit(TokenAst) return lexCommand case '|': - l.emit(TokenBar) + if l.accept("|") { + l.emit(TokenOr) + } else { + l.emit(TokenBar) + } + return lexCommand + case '[': + l.emit(TokenLBrack) + return lexCommand + case ']': + l.emit(TokenRBrack) return lexCommand case '(': l.emit(TokenLParen) @@ -232,6 +266,12 @@ func lexCommand(l *lexer) stateFunc { case '~': l.emit(TokenTilde) return lexCommand + case 'i': + l.emit(TokenCommand) + return lexMultipleLiterals + case 'S': + l.emit(TokenCommand) + return lexBigSubstitution } if isAlpha(r) { l.emit(TokenCommand) @@ -240,16 +280,152 @@ func lexCommand(l *lexer) stateFunc { return l.errorf("Expected command found something else") } -func lexPatternStringIndex(l *lexer) stateFunc { +func lexBigSubstitution(l *lexer) stateFunc { + delimiter := l.next() + if delimiter == eof || isAlphaNumeric(delimiter) { + return l.errorf("Invalid delimiter for big substitution") + } + l.emit(TokenSubstituteDelimiter) + loop: for { + r := l.next() + switch r { + case delimiter: + l.emit(TokenSubstituteDelimiter) + break loop + case '#': + l.emit(TokenHash) + lexPatternStringIndex(l) + case '@': + l.emit(TokenAt) + lexPatternIntegerIndex(l) + case '.': + l.emit(TokenDot) + case '*': + l.emit(TokenAst) + case '|': + l.emit(TokenBar) + case '[': + l.emit(TokenLBrack) + case ']': + l.emit(TokenRBrack) + case '?': + l.emit(TokenQuestion) + case ':': + l.emit(TokenColon) + case ',': + l.emit(TokenComma) + } + } + loop2: for { + r := l.next() + switch r { + case delimiter: + l.emit(TokenSubstituteDelimiter) + break loop2 + case '\\': + if !l.acceptPassing(isDigit) { + return l.errorf("Expected digit after \\") + } + l.emit(TokenSubstitutePlaceholder) + } + } + // TODO: No clue where I was going with this + return lexCommand +} + +func lexMultipleLiterals(l *lexer) stateFunc { + l.acceptAll(whitespaceNewlines) + l.ignore() + r := l.next() + switch r { + case ';', eof: + l.backup() + return lexCommandEnd + case ':': + l.emit(TokenColon) + return lexMultipleLiterals + case ',': + l.emit(TokenComma) + return lexMultipleLiterals + } + err := lexSingleLiteral(l) + if err != "" { + return l.errorf(err) + } + return lexMultipleLiterals +} + +func lexSingleLiteral(l *lexer) string { + l.acceptAll(whitespaceNewlines) + l.ignore() + r := l.next() + switch r { + case '"': + l.emit(TokenDoubleQuote) + if !lexStringLiteral(l) { + return "Expected closing \"" + } + case 'n': + if !l.expect("ull") { + return "Invalid literal, expected null" + } + l.emit(TokenNullLiteral) + case 't': + if !l.expect("rue") { + return "Invalid literal, expected true" + } + l.emit(TokenTrueLiteral) + case 'f': + if !l.expect("alse") { + return "Invalid literal, expected false" + } + l.emit(TokenFalseLiteral) + case '{', '}', '[', ']': + l.emit(TokenTerminalLiteral) + default: + if isDigit(r) { + lexNumberLiteral(l) + return "" + } + return "Invalid literal" + } + return "" +} + +// Just read the first digit +func lexNumberLiteral(l *lexer) { + l.acceptAllPassing(isDigit) + if l.accept(".") { + l.acceptAllPassing(isDigit) + } + l.emit(TokenNumberLiteral) +} + +// TODO: escape characters +func lexStringLiteral(l *lexer) bool { + for { + r := l.next() + switch r { + case '"': + l.backup() + l.emit(TokenStringLiteral) + l.next() + l.emit(TokenDoubleQuote) + return true + case eof: + return false + } + } +} + +func lexPatternStringIndex(l *lexer) { l.acceptAllPassing(isStringIndexChar) l.emit(TokenPatternStringIndex) - return lexCommand } -func lexPatternIntegerIndex(l *lexer) stateFunc { +func lexPatternIntegerIndex(l *lexer) { l.acceptAllPassing(isDigit) l.emit(TokenPatternIntegerIndex) - return lexCommand } func lexCommandEnd(l *lexer) stateFunc { @@ -261,5 +437,9 @@ func lexCommandEnd(l *lexer) stateFunc { l.emit(TokenSemicolon) return lexCommand } + if l.accept("}") { + l.emit(TokenRBrace) + return lexCommandEnd + } return l.errorf("Expected ; found something else") } -- cgit v1.2.3