stred-go

Stred: Streaming Tree Editor. Like sed but for JSON. This is the go implementation
git clone https://shtanton.xyz/git/stred-go.git
Log | Files | Refs | README

lex.go (3791B)


      1 package main
      2 
      3 import (
      4 	"fmt"
      5 	"strings"
      6 	"unicode/utf8"
      7 )
      8 
      9 type stateFunc func(*lexer) stateFunc
     10 
     11 type lexer struct {
     12 	input string
     13 	start int
     14 	pos int
     15 	width int
     16 	tokenStream chan Token
     17 }
     18 
     19 func (l *lexer) run() {
     20 	for state := lexCommand; state != nil; {
     21 		state = state(l)
     22 	}
     23 	close(l.tokenStream)
     24 }
     25 
     26 func (l *lexer) emit(t TokenType) {
     27 	l.tokenStream <- Token{
     28 		typ: t,
     29 		val: l.input[l.start:l.pos],
     30 	}
     31 	l.start = l.pos
     32 }
     33 
     34 func (l *lexer) errorf(format string, args ...interface{}) stateFunc {
     35 	l.tokenStream <- Token{
     36 		typ: TokenErr,
     37 		val: fmt.Sprintf(format, args...),
     38 	}
     39 	return nil
     40 }
     41 
     42 const eof rune = -1
     43 
     44 func (l *lexer) next() rune {
     45 	if l.pos >= len(l.input) {
     46 		l.width = 0
     47 		return eof
     48 	}
     49 	var r rune
     50 	r, l.width = utf8.DecodeRuneInString(l.input[l.pos:])
     51 	l.pos += l.width
     52 	return r
     53 }
     54 
     55 func (l *lexer) backup() {
     56 	l.pos -= l.width
     57 }
     58 
     59 func (l *lexer) ignore() {
     60 	l.start = l.pos
     61 }
     62 
     63 func (l *lexer) reset() {
     64 	l.pos = l.start
     65 }
     66 
     67 func (l *lexer) expect(valid string) bool {
     68 	for _, r := range valid {
     69 		if l.next() != r {
     70 			l.backup()
     71 			return false
     72 		}
     73 	}
     74 	return true
     75 }
     76 
     77 func (l *lexer) peek() rune {
     78 	w := l.width
     79 	r := l.next()
     80 	l.backup()
     81 	l.width = w
     82 	return r
     83 }
     84 
     85 func (l *lexer) accept(valid string) bool {
     86 	if strings.IndexRune(valid, l.next()) >= 0 {
     87 		return true
     88 	}
     89 	l.backup()
     90 	return false
     91 }
     92 
     93 func (l *lexer) acceptAll(valid string) {
     94 	for strings.IndexRune(valid, l.next()) >= 0 {}
     95 	l.backup()
     96 }
     97 
     98 func (l *lexer) acceptPassing(valid func(rune) bool) bool {
     99 	if valid(l.next()) {
    100 		return true
    101 	}
    102 	l.backup()
    103 	return false
    104 }
    105 
    106 func (l *lexer) acceptAllPassing(valid func(rune) bool) {
    107 	for valid(l.next()) {}
    108 	l.backup()
    109 }
    110 
    111 type TokenType int
    112 
    113 const (
    114 	TokenErr TokenType = iota // Lexing error
    115 	TokenEOF // end of file
    116 	TokenLBrace // {
    117 	TokenRBrace // }
    118 	TokenCommand // A command character
    119 	TokenSubstituteDelimiter // usually / but could be something else
    120 	TokenSubex // A subex
    121 	TokenLabel // A label
    122 )
    123 
    124 type Token struct {
    125 	typ TokenType
    126 	val string
    127 }
    128 
    129 func (t Token) String() string {
    130 	switch t.typ {
    131 	case TokenEOF:
    132 		return "EOF"
    133 	case TokenErr:
    134 		return t.val
    135 	}
    136 	if len(t.val) > 10 {
    137 		return fmt.Sprintf("%.10q...", t.val)
    138 	}
    139 	return fmt.Sprintf("%q", t.val)
    140 }
    141 
    142 func Lex(input string) chan Token {
    143 	l := &lexer{
    144 		input: input,
    145 		tokenStream: make(chan Token),
    146 	}
    147 	go l.run()
    148 	return l.tokenStream
    149 }
    150 
    151 const (
    152 	whitespace string = " \t"
    153 	whitespaceNewlines string = " \t\r\n"
    154 )
    155 
    156 func isAlpha(r rune) bool {
    157 	return ('a' <= r && r < 'z') || ('A' <= r && r <= 'Z')
    158 }
    159 func isDigit(r rune) bool {
    160 	return '0' <= r && r <= '9'
    161 }
    162 func isAlphaNumeric(r rune) bool {
    163 	return isAlpha(r) || isDigit(r)
    164 }
    165 func isStringIndexChar(r rune) bool {
    166 	return isAlphaNumeric(r) || r == '_' || r == '-'
    167 }
    168 
    169 func lexCommand(l *lexer) stateFunc {
    170 	l.acceptAll(whitespace)
    171 	l.ignore()
    172 	r := l.next()
    173 	switch r {
    174 		case eof:
    175 			l.emit(TokenEOF)
    176 			return nil
    177 		case '{':
    178 			l.emit(TokenLBrace)
    179 			return lexCommand
    180 		case '}':
    181 			l.emit(TokenRBrace)
    182 			return lexCommand
    183 		case 's', 'S':
    184 			l.emit(TokenCommand)
    185 			return lexSubstitution
    186 		case ':', 'b':
    187 			l.emit(TokenCommand)
    188 			return lexLabel
    189 	}
    190 	if isAlpha(r) {
    191 		l.emit(TokenCommand)
    192 		return lexCommand
    193 	}
    194 	return l.errorf("Expected command found something else")
    195 }
    196 
    197 func lexSubstitution(l *lexer) stateFunc {
    198 	delimiter := l.next()
    199 	if delimiter == eof {
    200 		return l.errorf("Missing subex in substitution command")
    201 	}
    202 	l.emit(TokenSubstituteDelimiter)
    203 	loop: for {
    204 		r := l.next()
    205 		switch r {
    206 			case delimiter:
    207 				l.backup()
    208 				l.emit(TokenSubex)
    209 				l.next()
    210 				l.emit(TokenSubstituteDelimiter)
    211 				break loop
    212 			case eof:
    213 				return l.errorf("Missing closing substitution delimiter")
    214 			default:
    215 		}
    216 	}
    217 	return lexCommand
    218 }
    219 
    220 func lexLabel(l *lexer) stateFunc {
    221 	l.next()
    222 	l.emit(TokenLabel)
    223 	return lexCommand
    224 }