stred-go

Stred: Streaming Tree Editor. Like sed but for JSON. This is the go implementation
git clone https://shtanton.xyz/git/stred-go.git
Log | Files | Refs | README

parse.go (9519B)


      1 package subex
      2 
      3 import (
      4 	"main/walk"
      5 	"strconv"
      6 	"strings"
      7 )
      8 
      9 type RuneReader interface {
     10 	Next() rune
     11 	Rewind()
     12 }
     13 
     14 func accept(l RuneReader, chars string) bool {
     15 	r := l.Next()
     16 	for _, char := range chars {
     17 		if char == r {
     18 			return true
     19 		}
     20 	}
     21 	l.Rewind()
     22 	return false
     23 }
     24 
     25 func isNumericRune(r rune) bool {
     26 	return '0' <= r && r <= '9' || r == '.'
     27 }
     28 
     29 // Having just parsed a `, read until the next ` and parse the contents into a list of non-string atoms
     30 func parseScalarLiteral(l RuneReader) (walk.Scalar, bool) {
     31 	r := l.Next()
     32 	if isNumericRune(r) {
     33 		var builder strings.Builder
     34 		builder.WriteRune(r)
     35 		for {
     36 			r := l.Next()
     37 			if !isNumericRune(r) {
     38 				l.Rewind()
     39 				break
     40 			}
     41 			builder.WriteRune(r)
     42 		}
     43 		numberString := builder.String()
     44 		number, err := strconv.ParseFloat(numberString, 64)
     45 		if err != nil {
     46 			panic("Invalid number literal")
     47 		}
     48 		return walk.NumberScalar(number), true
     49 	}
     50 	switch r {
     51 		case 'n':
     52 			if accept(l, "u") && accept(l, "l") && accept(l, "l") {
     53 				return walk.NullScalar{}, true
     54 			} else {
     55 				panic("Invalid literal")
     56 			}
     57 		case 't':
     58 			if accept(l, "r") && accept(l, "u") && accept(l, "e") {
     59 				return walk.BoolScalar(true), true
     60 			} else {
     61 				panic("Invalid literal")
     62 			}
     63 		case 'f':
     64 			if accept(l, "a") && accept(l, "l") && accept(l, "s") && accept(l, "e") {
     65 				return walk.BoolScalar(false), true
     66 			} else {
     67 				panic("Invalid literal")
     68 			}
     69 		default:
     70 			panic("Invalid literal")
     71 	}
     72 }
     73 
     74 func charIsDigit(c rune) bool {
     75 	return '0' <= c && c <= '9'
     76 }
     77 
     78 // Parse a positive integer, reads digits 0-9 and stops at the first non-digit
     79 func parseInt(l RuneReader) (output int) {
     80 	for {
     81 		char := l.Next()
     82 		if charIsDigit(char) {
     83 			output = output * 10 + int(char - '0')
     84 		} else {
     85 			break
     86 		}
     87 	}
     88 	l.Rewind()
     89 	return output
     90 }
     91 
     92 // Having just read {, read in and parse the range contents
     93 func parseRepeatRange(l RuneReader) (output []ConvexRange) {
     94 	loop: for {
     95 		var start, end int
     96 		char := l.Next()
     97 		l.Rewind()
     98 		if char == '-' {
     99 			start = -1
    100 		} else {
    101 			start = parseInt(l)
    102 		}
    103 		switch l.Next() {
    104 			case ',':
    105 				output = append(output, ConvexRange{start, start})
    106 				continue loop
    107 			case '-':
    108 				char := l.Next()
    109 				if charIsDigit(char) {
    110 					l.Rewind()
    111 					end = parseInt(l)
    112 				} else {
    113 					l.Rewind()
    114 					end = -1
    115 				}
    116 			case '}':
    117 				output = append(output, ConvexRange{start, start})
    118 				break loop
    119 			default:
    120 				panic("Invalid character in repeat specifier")
    121 		}
    122 		switch l.Next() {
    123 			case ',':
    124 				output = append(output, ConvexRange{start, end})
    125 				continue loop
    126 			case '}':
    127 				output = append(output, ConvexRange{start, end})
    128 				break loop
    129 			default:
    130 				panic("Invalid character in repeat specifier")
    131 		}
    132 	}
    133 	return output
    134 }
    135 
    136 // TODO: Consider if it's worth making better use of the go type system to enforce output being all runes or all values
    137 func parseReplacement(l RuneReader, runic bool) (output []OutputContentAST) {
    138 	// TODO escaping
    139 	// TODO add arrays, maps and strings
    140 	loop: for {
    141 		r := l.Next()
    142 		switch r {
    143 			case eof:
    144 				panic("Missing closing `")
    145 			case '`':
    146 				break loop
    147 			case '$':
    148 				slot := l.Next()
    149 				if slot == eof {
    150 					panic("Missing slot character")
    151 				}
    152 				output = append(output, OutputLoadAST{slot: slot})
    153 			default:
    154 				if runic {
    155 					output = append(output, OutputRuneLiteralAST {walk.StringRuneAtom(r)})
    156 				} else {
    157 					l.Rewind()
    158 					scalar, ok := parseScalarLiteral(l)
    159 					if !ok {
    160 						panic("Invalid scalar literal")
    161 					}
    162 					output = append(output, OutputValueLiteralAST {scalar})
    163 				}
    164 		}
    165 	}
    166 	return output
    167 }
    168 
    169 // Parse the contents of a range subex [] into a map
    170 // func parseRangeSubex(l RuneReader) map[walk.AtomOLD]walk.AtomOLD {
    171 // 	// TODO escaping
    172 // 	parts := make(map[walk.AtomOLD]walk.AtomOLD)
    173 // 	var froms []walk.AtomOLD
    174 // 	var hasTo bool
    175 // 	for {
    176 // 		fromsStart := l.Next()
    177 // 		if fromsStart == ']' {
    178 // 			hasTo = false
    179 // 			break
    180 // 		} else if fromsStart == '=' {
    181 // 			hasTo = true
    182 // 			break
    183 // 		} else if fromsStart == '`' {
    184 // 			literals := parseNonStringLiteral(l)
    185 // 			froms = append(froms, literals...)
    186 // 			continue
    187 // 		} else if fromsStart == '"' {
    188 // 			froms = append(froms, walk.NewAtomStringTerminal())
    189 // 			continue
    190 // 		}
    191 // 		if accept(l, "-") {
    192 // 			fromsEnd := l.Next()
    193 // 			if fromsEnd == ']' || fromsEnd == '=' {
    194 // 				l.Rewind()
    195 // 				fromsEnd = fromsStart
    196 // 			}
    197 // 			for i := fromsStart; i <= fromsEnd; i += 1 {
    198 // 				froms = append(froms, walk.NewAtomStringRune(i))
    199 // 			}
    200 // 		} else {
    201 // 			froms = append(froms, walk.NewAtomStringRune(fromsStart))
    202 // 		}
    203 // 	}
    204 // 	if len(froms) == 0 {
    205 // 		panic("Missing from part of range expression")
    206 // 	}
    207 
    208 // 	var tos []walk.AtomOLD
    209 // 	if hasTo {
    210 // 		for {
    211 // 			tosStart := l.Next()
    212 // 			if tosStart == ']' {
    213 // 				break
    214 // 			} else if tosStart == '`' {
    215 // 				literals := parseNonStringLiteral(l)
    216 // 				tos = append(tos, literals...)
    217 // 				continue
    218 // 			} else if tosStart == '"' {
    219 // 				tos = append(tos, walk.NewAtomStringTerminal())
    220 // 				continue
    221 // 			}
    222 // 			if accept(l, "-") {
    223 // 				tosEnd := l.Next()
    224 // 				if tosEnd == ']' {
    225 // 					l.Rewind()
    226 // 					tosEnd = tosStart
    227 // 				}
    228 // 				for i := tosStart; i <= tosEnd; i += 1 {
    229 // 					tos = append(tos, walk.NewAtomStringRune(i))
    230 // 				}
    231 // 			} else {
    232 // 				tos = append(tos, walk.NewAtomStringRune(tosStart))
    233 // 			}
    234 // 		}
    235 // 	} else {
    236 // 		tos = froms
    237 // 	}
    238 // 	if len(tos) == 0 {
    239 // 		panic("Missing to part of range expression")
    240 // 	}
    241 	
    242 // 	for i, from := range froms {
    243 // 		parts[from] = tos[i % len(tos)]
    244 // 	}
    245 // 	return parts
    246 // }
    247 
    248 func parseSubex(l RuneReader, minPower int, runic bool) SubexAST {
    249 	var lhs SubexAST
    250 	r := l.Next()
    251 	switch r {
    252 		case eof:
    253 			return nil
    254 		case '(':
    255 			lhs = parseSubex(l, 0, runic)
    256 			if !accept(l, ")") {
    257 				panic("Missing matching )")
    258 			}
    259 		// TODO
    260 		// case '[':
    261 		// 	rangeParts := parseRangeSubex(l)
    262 		// 	lhs = SubexASTRange {rangeParts}
    263 		case ')', ']', '"', '|', ';', '{', '+', '-', '*', '/', '!', '=', '$':
    264 			l.Rewind()
    265 			return SubexASTEmpty{}
    266 		// case '=':
    267 		// 	replacement := parseReplacement(l)
    268 		// 	lhs = SubexASTOutput{replacement}
    269 		// case '^':
    270 		// 	replacement := parseReplacement(l)
    271 		// 	replacement = append(
    272 		// 		[]OutputContentAST{OutputValueLiteralAST {walk.NewAtomStringTerminal()}},
    273 		// 		replacement...
    274 		// 	)
    275 		// 	replacement = append(
    276 		// 		replacement,
    277 		// 		OutputValueLiteralAST {walk.NewAtomStringTerminal()},
    278 		// 	)
    279 		// 	lhs = SubexASTOutput {replacement}
    280 		case '.':
    281 			if runic {
    282 				lhs = SubexASTCopyAnyRune{}
    283 			} else {
    284 				lhs = SubexASTCopyAnyValue{}
    285 			}
    286 		case '?':
    287 			lhs = SubexASTCopyBool{}
    288 		case '%':
    289 			lhs = SubexASTCopyNumber{}
    290 		case ':':
    291 			if runic {
    292 				lhs = SubexASTCopyRune {':'}
    293 			} else {
    294 				if !accept(l, "[") {
    295 					panic("Missing [ after :")
    296 				}
    297 				lhs = SubexASTEnterArray {parseSubex(l, 0, runic)}
    298 				if !accept(l, "]") {
    299 					panic("Missing matching ]")
    300 				}
    301 			}
    302 		case '`':
    303 			lhs = SubexASTOutput {parseReplacement(l, runic)}
    304 		case '~':
    305 			if runic {
    306 				lhs = SubexASTCopyRune {'~'}
    307 			} else {
    308 				if !accept(l, "\"") {
    309 					panic("Missing \" after ~")
    310 				}
    311 				lhs = SubexASTEnterString {parseSubex(l, 0, true)}
    312 				if !accept(l, "\"") {
    313 					panic("Missing matching \"")
    314 				}
    315 			}
    316 		// TODO
    317 		// case '_':
    318 		// 	lhs = SubexASTCopyStringAtom{}
    319 		// case '#':
    320 		// 	lhs = SubexASTCopyString{}
    321 		// case ',':
    322 		// 	lhs = SubexASTCopyValue{}
    323 		// case '"':
    324 		// 	lhs = SubexASTCopyScalar {walk.NewAtomStringTerminal()}
    325 		// case '~':
    326 		// 	literals := parseNonStringLiteral(l)
    327 		// 	var replacement []OutputContentAST
    328 		// 	for _, literal := range literals {
    329 		// 		replacement = append(replacement, OutputValueLiteralAST {literal})
    330 		// 	}
    331 		// 	lhs = SubexASTOutput {replacement}
    332 		default:
    333 			if runic {
    334 				lhs = SubexASTCopyRune {r}
    335 			} else {
    336 				l.Rewind()
    337 				scalar, ok := parseScalarLiteral(l)
    338 				if !ok {
    339 					panic("Invalid subex")
    340 				}
    341 				lhs = SubexASTCopyScalar {scalar}
    342 			}
    343 	}
    344 	loop: for {
    345 		if minPower <= 20 {
    346 			next := parseSubex(l, 21, runic)
    347 			if next != nil && (next != SubexASTEmpty{}) {
    348 				lhs = SubexASTConcat{lhs, next}
    349 				continue loop
    350 			}
    351 		}
    352 		r := l.Next()
    353 		switch {
    354 			case r == '{' && minPower <= 4:
    355 				lhs = SubexASTRepeat {
    356 					Content: lhs,
    357 					Acceptable: parseRepeatRange(l),
    358 				}
    359 			case r == '+' && minPower <= 4:
    360 				lhs = SubexASTSum {lhs}
    361 			case r == '*' && minPower <= 4:
    362 				lhs = SubexASTProduct {lhs}
    363 			case r == '-' && minPower <= 4:
    364 				lhs = SubexASTNegate {lhs}
    365 			case r == '/' && minPower <= 4:
    366 				lhs = SubexASTReciprocal {lhs}
    367 			case r == '!' && minPower <= 4:
    368 				lhs = SubexASTNot {lhs}
    369 			case r == '=' && minPower <= 4:
    370 				lhs = SubexASTEqual {lhs}
    371 			case r == '$' && minPower <= 4:
    372 				slot := l.Next()
    373 				if slot == eof {
    374 					panic("Missing slot character")
    375 				}
    376 				if slot == '_' {
    377 					lhs = SubexASTDiscard {lhs}
    378 				} else {
    379 					lhs = SubexASTStore{
    380 						Match: lhs,
    381 						Slot: slot,
    382 					}
    383 				}
    384 			case r == '|' && minPower <= 8:
    385 				rhs := parseSubex(l, 9, runic)
    386 				if rhs == nil {
    387 					panic("Missing subex after |")
    388 				}
    389 				lhs = SubexASTOr{lhs, rhs}
    390 			case r == ';' && minPower <= 10:
    391 				rhs := parseSubex(l, 11, runic)
    392 				if rhs == nil {
    393 					panic("Missing subex after ;")
    394 				}
    395 				lhs = SubexASTJoin{
    396 					Content: lhs,
    397 					Delimiter: rhs,
    398 				}
    399 			default:
    400 				l.Rewind()
    401 				break loop
    402 		}
    403 	}
    404 	return lhs
    405 }
    406 
    407 func Parse(l RuneReader) SubexAST {
    408 	ast := parseSubex(l, 0, false)
    409 	if ast == nil {
    410 		return SubexASTEmpty{}
    411 	}
    412 	return ast
    413 }