lex.go (3791B)
1 package main 2 3 import ( 4 "fmt" 5 "strings" 6 "unicode/utf8" 7 ) 8 9 type stateFunc func(*lexer) stateFunc 10 11 type lexer struct { 12 input string 13 start int 14 pos int 15 width int 16 tokenStream chan Token 17 } 18 19 func (l *lexer) run() { 20 for state := lexCommand; state != nil; { 21 state = state(l) 22 } 23 close(l.tokenStream) 24 } 25 26 func (l *lexer) emit(t TokenType) { 27 l.tokenStream <- Token{ 28 typ: t, 29 val: l.input[l.start:l.pos], 30 } 31 l.start = l.pos 32 } 33 34 func (l *lexer) errorf(format string, args ...interface{}) stateFunc { 35 l.tokenStream <- Token{ 36 typ: TokenErr, 37 val: fmt.Sprintf(format, args...), 38 } 39 return nil 40 } 41 42 const eof rune = -1 43 44 func (l *lexer) next() rune { 45 if l.pos >= len(l.input) { 46 l.width = 0 47 return eof 48 } 49 var r rune 50 r, l.width = utf8.DecodeRuneInString(l.input[l.pos:]) 51 l.pos += l.width 52 return r 53 } 54 55 func (l *lexer) backup() { 56 l.pos -= l.width 57 } 58 59 func (l *lexer) ignore() { 60 l.start = l.pos 61 } 62 63 func (l *lexer) reset() { 64 l.pos = l.start 65 } 66 67 func (l *lexer) expect(valid string) bool { 68 for _, r := range valid { 69 if l.next() != r { 70 l.backup() 71 return false 72 } 73 } 74 return true 75 } 76 77 func (l *lexer) peek() rune { 78 w := l.width 79 r := l.next() 80 l.backup() 81 l.width = w 82 return r 83 } 84 85 func (l *lexer) accept(valid string) bool { 86 if strings.IndexRune(valid, l.next()) >= 0 { 87 return true 88 } 89 l.backup() 90 return false 91 } 92 93 func (l *lexer) acceptAll(valid string) { 94 for strings.IndexRune(valid, l.next()) >= 0 {} 95 l.backup() 96 } 97 98 func (l *lexer) acceptPassing(valid func(rune) bool) bool { 99 if valid(l.next()) { 100 return true 101 } 102 l.backup() 103 return false 104 } 105 106 func (l *lexer) acceptAllPassing(valid func(rune) bool) { 107 for valid(l.next()) {} 108 l.backup() 109 } 110 111 type TokenType int 112 113 const ( 114 TokenErr TokenType = iota // Lexing error 115 TokenEOF // end of file 116 TokenLBrace // { 117 TokenRBrace // } 118 TokenCommand // A command character 119 TokenSubstituteDelimiter // usually / but could be something else 120 TokenSubex // A subex 121 TokenLabel // A label 122 ) 123 124 type Token struct { 125 typ TokenType 126 val string 127 } 128 129 func (t Token) String() string { 130 switch t.typ { 131 case TokenEOF: 132 return "EOF" 133 case TokenErr: 134 return t.val 135 } 136 if len(t.val) > 10 { 137 return fmt.Sprintf("%.10q...", t.val) 138 } 139 return fmt.Sprintf("%q", t.val) 140 } 141 142 func Lex(input string) chan Token { 143 l := &lexer{ 144 input: input, 145 tokenStream: make(chan Token), 146 } 147 go l.run() 148 return l.tokenStream 149 } 150 151 const ( 152 whitespace string = " \t" 153 whitespaceNewlines string = " \t\r\n" 154 ) 155 156 func isAlpha(r rune) bool { 157 return ('a' <= r && r < 'z') || ('A' <= r && r <= 'Z') 158 } 159 func isDigit(r rune) bool { 160 return '0' <= r && r <= '9' 161 } 162 func isAlphaNumeric(r rune) bool { 163 return isAlpha(r) || isDigit(r) 164 } 165 func isStringIndexChar(r rune) bool { 166 return isAlphaNumeric(r) || r == '_' || r == '-' 167 } 168 169 func lexCommand(l *lexer) stateFunc { 170 l.acceptAll(whitespace) 171 l.ignore() 172 r := l.next() 173 switch r { 174 case eof: 175 l.emit(TokenEOF) 176 return nil 177 case '{': 178 l.emit(TokenLBrace) 179 return lexCommand 180 case '}': 181 l.emit(TokenRBrace) 182 return lexCommand 183 case 's', 'S': 184 l.emit(TokenCommand) 185 return lexSubstitution 186 case ':', 'b': 187 l.emit(TokenCommand) 188 return lexLabel 189 } 190 if isAlpha(r) { 191 l.emit(TokenCommand) 192 return lexCommand 193 } 194 return l.errorf("Expected command found something else") 195 } 196 197 func lexSubstitution(l *lexer) stateFunc { 198 delimiter := l.next() 199 if delimiter == eof { 200 return l.errorf("Missing subex in substitution command") 201 } 202 l.emit(TokenSubstituteDelimiter) 203 loop: for { 204 r := l.next() 205 switch r { 206 case delimiter: 207 l.backup() 208 l.emit(TokenSubex) 209 l.next() 210 l.emit(TokenSubstituteDelimiter) 211 break loop 212 case eof: 213 return l.errorf("Missing closing substitution delimiter") 214 default: 215 } 216 } 217 return lexCommand 218 } 219 220 func lexLabel(l *lexer) stateFunc { 221 l.next() 222 l.emit(TokenLabel) 223 return lexCommand 224 }