<- Back to shtanton's homepage
aboutsummaryrefslogtreecommitdiff
path: root/main/lex.go
diff options
context:
space:
mode:
Diffstat (limited to 'main/lex.go')
-rw-r--r--main/lex.go194
1 files changed, 187 insertions, 7 deletions
diff --git a/main/lex.go b/main/lex.go
index 91231ed..0daf2d1 100644
--- a/main/lex.go
+++ b/main/lex.go
@@ -64,6 +64,16 @@ func (l *lexer) reset() {
l.pos = l.start
}
+func (l *lexer) expect(valid string) bool {
+ for _, r := range valid {
+ if l.next() != r {
+ l.backup()
+ return false
+ }
+ }
+ return true
+}
+
func (l *lexer) peek() rune {
w := l.width
r := l.next()
@@ -116,6 +126,7 @@ const (
TokenDot // .
TokenAst // *
TokenBar // |
+ TokenOr // ||
TokenAnd // &&
TokenHat // ^
TokenDollar // $
@@ -123,6 +134,17 @@ const (
TokenHatDollar // ^$
TokenExclamation // !
TokenTilde // ~
+ TokenDoubleQuote // "
+ TokenStringLiteral // A string literal, not including the " either side
+ TokenNullLiteral // null
+ TokenTrueLiteral // true
+ TokenFalseLiteral // false
+ TokenColon // :
+ TokenComma // ,
+ TokenSubstituteDelimiter // usually / but could be something else
+ TokenSubstitutePlaceholder // \1, \2 etc.
+ TokenTerminalLiteral // One of {, }, [, ]
+ TokenNumberLiteral // A number literal
TokenPatternStringIndex // A string index in a pattern
TokenPatternIntegerIndex // An integer index in a pattern
)
@@ -183,10 +205,12 @@ func lexCommand(l *lexer) stateFunc {
switch r {
case '#':
l.emit(TokenHash)
- return lexPatternStringIndex
+ lexPatternStringIndex(l)
+ return lexCommand
case '@':
l.emit(TokenAt)
- return lexPatternIntegerIndex
+ lexPatternIntegerIndex(l)
+ return lexCommand
case '.':
l.emit(TokenDot)
return lexCommand
@@ -194,7 +218,17 @@ func lexCommand(l *lexer) stateFunc {
l.emit(TokenAst)
return lexCommand
case '|':
- l.emit(TokenBar)
+ if l.accept("|") {
+ l.emit(TokenOr)
+ } else {
+ l.emit(TokenBar)
+ }
+ return lexCommand
+ case '[':
+ l.emit(TokenLBrack)
+ return lexCommand
+ case ']':
+ l.emit(TokenRBrack)
return lexCommand
case '(':
l.emit(TokenLParen)
@@ -232,6 +266,12 @@ func lexCommand(l *lexer) stateFunc {
case '~':
l.emit(TokenTilde)
return lexCommand
+ case 'i':
+ l.emit(TokenCommand)
+ return lexMultipleLiterals
+ case 'S':
+ l.emit(TokenCommand)
+ return lexBigSubstitution
}
if isAlpha(r) {
l.emit(TokenCommand)
@@ -240,16 +280,152 @@ func lexCommand(l *lexer) stateFunc {
return l.errorf("Expected command found something else")
}
-func lexPatternStringIndex(l *lexer) stateFunc {
+func lexBigSubstitution(l *lexer) stateFunc {
+ delimiter := l.next()
+ if delimiter == eof || isAlphaNumeric(delimiter) {
+ return l.errorf("Invalid delimiter for big substitution")
+ }
+ l.emit(TokenSubstituteDelimiter)
+ loop: for {
+ r := l.next()
+ switch r {
+ case delimiter:
+ l.emit(TokenSubstituteDelimiter)
+ break loop
+ case '#':
+ l.emit(TokenHash)
+ lexPatternStringIndex(l)
+ case '@':
+ l.emit(TokenAt)
+ lexPatternIntegerIndex(l)
+ case '.':
+ l.emit(TokenDot)
+ case '*':
+ l.emit(TokenAst)
+ case '|':
+ l.emit(TokenBar)
+ case '[':
+ l.emit(TokenLBrack)
+ case ']':
+ l.emit(TokenRBrack)
+ case '?':
+ l.emit(TokenQuestion)
+ case ':':
+ l.emit(TokenColon)
+ case ',':
+ l.emit(TokenComma)
+ }
+ }
+ loop2: for {
+ r := l.next()
+ switch r {
+ case delimiter:
+ l.emit(TokenSubstituteDelimiter)
+ break loop2
+ case '\\':
+ if !l.acceptPassing(isDigit) {
+ return l.errorf("Expected digit after \\")
+ }
+ l.emit(TokenSubstitutePlaceholder)
+ }
+ }
+ // TODO: No clue where I was going with this
+ return lexCommand
+}
+
+func lexMultipleLiterals(l *lexer) stateFunc {
+ l.acceptAll(whitespaceNewlines)
+ l.ignore()
+ r := l.next()
+ switch r {
+ case ';', eof:
+ l.backup()
+ return lexCommandEnd
+ case ':':
+ l.emit(TokenColon)
+ return lexMultipleLiterals
+ case ',':
+ l.emit(TokenComma)
+ return lexMultipleLiterals
+ }
+ err := lexSingleLiteral(l)
+ if err != "" {
+ return l.errorf(err)
+ }
+ return lexMultipleLiterals
+}
+
+func lexSingleLiteral(l *lexer) string {
+ l.acceptAll(whitespaceNewlines)
+ l.ignore()
+ r := l.next()
+ switch r {
+ case '"':
+ l.emit(TokenDoubleQuote)
+ if !lexStringLiteral(l) {
+ return "Expected closing \""
+ }
+ case 'n':
+ if !l.expect("ull") {
+ return "Invalid literal, expected null"
+ }
+ l.emit(TokenNullLiteral)
+ case 't':
+ if !l.expect("rue") {
+ return "Invalid literal, expected true"
+ }
+ l.emit(TokenTrueLiteral)
+ case 'f':
+ if !l.expect("alse") {
+ return "Invalid literal, expected false"
+ }
+ l.emit(TokenFalseLiteral)
+ case '{', '}', '[', ']':
+ l.emit(TokenTerminalLiteral)
+ default:
+ if isDigit(r) {
+ lexNumberLiteral(l)
+ return ""
+ }
+ return "Invalid literal"
+ }
+ return ""
+}
+
+// Just read the first digit
+func lexNumberLiteral(l *lexer) {
+ l.acceptAllPassing(isDigit)
+ if l.accept(".") {
+ l.acceptAllPassing(isDigit)
+ }
+ l.emit(TokenNumberLiteral)
+}
+
+// TODO: escape characters
+func lexStringLiteral(l *lexer) bool {
+ for {
+ r := l.next()
+ switch r {
+ case '"':
+ l.backup()
+ l.emit(TokenStringLiteral)
+ l.next()
+ l.emit(TokenDoubleQuote)
+ return true
+ case eof:
+ return false
+ }
+ }
+}
+
+func lexPatternStringIndex(l *lexer) {
l.acceptAllPassing(isStringIndexChar)
l.emit(TokenPatternStringIndex)
- return lexCommand
}
-func lexPatternIntegerIndex(l *lexer) stateFunc {
+func lexPatternIntegerIndex(l *lexer) {
l.acceptAllPassing(isDigit)
l.emit(TokenPatternIntegerIndex)
- return lexCommand
}
func lexCommandEnd(l *lexer) stateFunc {
@@ -261,5 +437,9 @@ func lexCommandEnd(l *lexer) stateFunc {
l.emit(TokenSemicolon)
return lexCommand
}
+ if l.accept("}") {
+ l.emit(TokenRBrace)
+ return lexCommandEnd
+ }
return l.errorf("Expected ; found something else")
}