package main import ( "fmt" "strings" "unicode/utf8" ) type stateFunc func(*lexer) stateFunc type lexer struct { input string start int pos int width int tokenStream chan Token } func (l *lexer) run() { for state := lexCommand; state != nil; { state = state(l) } close(l.tokenStream) } func (l *lexer) emit(t TokenType) { l.tokenStream <- Token{ typ: t, val: l.input[l.start:l.pos], } l.start = l.pos } func (l *lexer) errorf(format string, args ...interface{}) stateFunc { l.tokenStream <- Token{ typ: TokenErr, val: fmt.Sprintf(format, args...), } return nil } const eof rune = -1 func (l *lexer) next() rune { if l.pos >= len(l.input) { l.width = 0 return eof } var r rune r, l.width = utf8.DecodeRuneInString(l.input[l.pos:]) l.pos += l.width return r } func (l *lexer) backup() { l.pos -= l.width } func (l *lexer) ignore() { l.start = l.pos } func (l *lexer) reset() { l.pos = l.start } func (l *lexer) expect(valid string) bool { for _, r := range valid { if l.next() != r { l.backup() return false } } return true } func (l *lexer) peek() rune { w := l.width r := l.next() l.backup() l.width = w return r } func (l *lexer) accept(valid string) bool { if strings.IndexRune(valid, l.next()) >= 0 { return true } l.backup() return false } func (l *lexer) acceptAll(valid string) { for strings.IndexRune(valid, l.next()) >= 0 {} l.backup() } func (l *lexer) acceptPassing(valid func(rune) bool) bool { if valid(l.next()) { return true } l.backup() return false } func (l *lexer) acceptAllPassing(valid func(rune) bool) { for valid(l.next()) {} l.backup() } type TokenType int const ( TokenErr TokenType = iota // Lexing error TokenEOF // end of file TokenLBrace // { TokenRBrace // } TokenCommand // A command character TokenSubstituteDelimiter // usually / but could be something else TokenSubex // A subex TokenLabel // A label ) type Token struct { typ TokenType val string } func (t Token) String() string { switch t.typ { case TokenEOF: return "EOF" case TokenErr: return t.val } if len(t.val) > 10 { return fmt.Sprintf("%.10q...", t.val) } return fmt.Sprintf("%q", t.val) } func Lex(input string) chan Token { l := &lexer{ input: input, tokenStream: make(chan Token), } go l.run() return l.tokenStream } const ( whitespace string = " \t" whitespaceNewlines string = " \t\r\n" ) func isAlpha(r rune) bool { return ('a' <= r && r < 'z') || ('A' <= r && r <= 'Z') } func isDigit(r rune) bool { return '0' <= r && r <= '9' } func isAlphaNumeric(r rune) bool { return isAlpha(r) || isDigit(r) } func isStringIndexChar(r rune) bool { return isAlphaNumeric(r) || r == '_' || r == '-' } func lexCommand(l *lexer) stateFunc { l.acceptAll(whitespace) l.ignore() r := l.next() switch r { case eof: l.emit(TokenEOF) return nil case '{': l.emit(TokenLBrace) return lexCommand case '}': l.emit(TokenRBrace) return lexCommand case 's', 'S', 'M': l.emit(TokenCommand) return lexSubstitution case 'x', 'X', 'y', 'Y', 'z', 'Z', 'n', 'N': l.emit(TokenCommand) if l.peek() == '/' { return lexSubstitution } else { return lexCommand } case ':', 'b': l.emit(TokenCommand) return lexLabel } if isAlpha(r) { l.emit(TokenCommand) return lexCommand } return l.errorf("Expected command found something else") } func lexSubstitution(l *lexer) stateFunc { delimiter := l.next() if delimiter == eof { return l.errorf("Missing subex in substitution command") } l.emit(TokenSubstituteDelimiter) loop: for { r := l.next() switch r { case delimiter: l.backup() l.emit(TokenSubex) l.next() l.emit(TokenSubstituteDelimiter) break loop case eof: return l.errorf("Missing closing substitution delimiter") default: } } return lexCommand } func lexLabel(l *lexer) stateFunc { l.next() l.emit(TokenLabel) return lexCommand }