From 8cf10efe3b5a1bcc70bc6e5590ee63fd5eb00c5b Mon Sep 17 00:00:00 2001 From: Charlie Stanton Date: Wed, 19 Jul 2023 11:57:59 +0100 Subject: Huge refactor to a more value based system, doing away with terminals. Also introduces unit testing --- subex/parse.go | 295 ++++++++++++++++++++++++++++++--------------------------- 1 file changed, 157 insertions(+), 138 deletions(-) (limited to 'subex/parse.go') diff --git a/subex/parse.go b/subex/parse.go index 746217b..a671e6d 100644 --- a/subex/parse.go +++ b/subex/parse.go @@ -22,7 +22,7 @@ func accept(l RuneReader, chars string) bool { return false } -func expectBracket(l RuneReader, ifLeft walk.Atom, ifRight walk.Atom) walk.Atom { +func expectBracket(l RuneReader, ifLeft walk.AtomOLD, ifRight walk.AtomOLD) walk.AtomOLD { switch l.Next() { case '(': return ifLeft @@ -38,7 +38,7 @@ func isNumericRune(r rune) bool { } // Having just parsed a `, read until the next ` and parse the contents into a list of non-string atoms -func parseNonStringLiteral(l RuneReader) (literals []walk.Atom) { +func parseNonStringLiteral(l RuneReader) (literals []walk.Scalar) { for { r := l.Next() if isNumericRune(r) { @@ -57,7 +57,7 @@ func parseNonStringLiteral(l RuneReader) (literals []walk.Atom) { if err != nil { panic("Invalid number literal") } - literals = append(literals, walk.NewAtomNumber(number)) + literals = append(literals, walk.NumberScalar(number)) continue } switch r { @@ -67,30 +67,22 @@ func parseNonStringLiteral(l RuneReader) (literals []walk.Atom) { continue case 'n': if accept(l, "u") && accept(l, "l") && accept(l, "l") { - literals = append(literals, walk.NewAtomNull()) + literals = append(literals, walk.NullScalar{}) } else { panic("Invalid literal") } case 't': if accept(l, "r") && accept(l, "u") && accept(l, "e") { - literals = append(literals, walk.NewAtomBool(true)) + literals = append(literals, walk.BoolScalar(true)) } else { panic("Invalid literal") } case 'f': if accept(l, "a") && accept(l, "l") && accept(l, "s") && accept(l, "e") { - literals = append(literals, walk.NewAtomBool(false)) + literals = append(literals, walk.BoolScalar(false)) } else { panic("Invalid literal") } - case '{': - literals = append(literals, walk.NewAtomTerminal(walk.MapBegin)) - case '}': - literals = append(literals, walk.NewAtomTerminal(walk.MapEnd)) - case '[': - literals = append(literals, walk.NewAtomTerminal(walk.ArrayBegin)) - case ']': - literals = append(literals, walk.NewAtomTerminal(walk.ArrayEnd)) default: panic("Invalid literal") } @@ -177,113 +169,113 @@ func parseReplacement(l RuneReader) (output []OutputContentAST) { case '`': literals := parseNonStringLiteral(l) for _, literal := range literals { - output = append(output, OutputAtomLiteralAST {literal}) + output = append(output, OutputValueLiteralAST {literal}) } - case '"': - output = append(output, OutputAtomLiteralAST {walk.NewAtomStringTerminal()}) default: - output = append(output, OutputAtomLiteralAST{atom: walk.NewAtomStringRune(r)}) + panic("Invalid value to insert") + //output = append(output, OutputValueLiteralAST{atom: walk.NewAtomStringRune(r)}) } } return output } // Parse the contents of a range subex [] into a map -func parseRangeSubex(l RuneReader) map[walk.Atom]walk.Atom { - // TODO escaping - parts := make(map[walk.Atom]walk.Atom) - var froms []walk.Atom - var hasTo bool - for { - fromsStart := l.Next() - if fromsStart == ']' { - hasTo = false - break - } else if fromsStart == '=' { - hasTo = true - break - } else if fromsStart == '`' { - literals := parseNonStringLiteral(l) - froms = append(froms, literals...) - continue - } else if fromsStart == '"' { - froms = append(froms, walk.NewAtomStringTerminal()) - continue - } - if accept(l, "-") { - fromsEnd := l.Next() - if fromsEnd == ']' || fromsEnd == '=' { - l.Rewind() - fromsEnd = fromsStart - } - for i := fromsStart; i <= fromsEnd; i += 1 { - froms = append(froms, walk.NewAtomStringRune(i)) - } - } else { - froms = append(froms, walk.NewAtomStringRune(fromsStart)) - } - } - if len(froms) == 0 { - panic("Missing from part of range expression") - } +// func parseRangeSubex(l RuneReader) map[walk.AtomOLD]walk.AtomOLD { +// // TODO escaping +// parts := make(map[walk.AtomOLD]walk.AtomOLD) +// var froms []walk.AtomOLD +// var hasTo bool +// for { +// fromsStart := l.Next() +// if fromsStart == ']' { +// hasTo = false +// break +// } else if fromsStart == '=' { +// hasTo = true +// break +// } else if fromsStart == '`' { +// literals := parseNonStringLiteral(l) +// froms = append(froms, literals...) +// continue +// } else if fromsStart == '"' { +// froms = append(froms, walk.NewAtomStringTerminal()) +// continue +// } +// if accept(l, "-") { +// fromsEnd := l.Next() +// if fromsEnd == ']' || fromsEnd == '=' { +// l.Rewind() +// fromsEnd = fromsStart +// } +// for i := fromsStart; i <= fromsEnd; i += 1 { +// froms = append(froms, walk.NewAtomStringRune(i)) +// } +// } else { +// froms = append(froms, walk.NewAtomStringRune(fromsStart)) +// } +// } +// if len(froms) == 0 { +// panic("Missing from part of range expression") +// } - var tos []walk.Atom - if hasTo { - for { - tosStart := l.Next() - if tosStart == ']' { - break - } else if tosStart == '`' { - literals := parseNonStringLiteral(l) - tos = append(tos, literals...) - continue - } else if tosStart == '"' { - tos = append(tos, walk.NewAtomStringTerminal()) - continue - } - if accept(l, "-") { - tosEnd := l.Next() - if tosEnd == ']' { - l.Rewind() - tosEnd = tosStart - } - for i := tosStart; i <= tosEnd; i += 1 { - tos = append(tos, walk.NewAtomStringRune(i)) - } - } else { - tos = append(tos, walk.NewAtomStringRune(tosStart)) - } - } - } else { - tos = froms - } - if len(tos) == 0 { - panic("Missing to part of range expression") - } +// var tos []walk.AtomOLD +// if hasTo { +// for { +// tosStart := l.Next() +// if tosStart == ']' { +// break +// } else if tosStart == '`' { +// literals := parseNonStringLiteral(l) +// tos = append(tos, literals...) +// continue +// } else if tosStart == '"' { +// tos = append(tos, walk.NewAtomStringTerminal()) +// continue +// } +// if accept(l, "-") { +// tosEnd := l.Next() +// if tosEnd == ']' { +// l.Rewind() +// tosEnd = tosStart +// } +// for i := tosStart; i <= tosEnd; i += 1 { +// tos = append(tos, walk.NewAtomStringRune(i)) +// } +// } else { +// tos = append(tos, walk.NewAtomStringRune(tosStart)) +// } +// } +// } else { +// tos = froms +// } +// if len(tos) == 0 { +// panic("Missing to part of range expression") +// } - for i, from := range froms { - parts[from] = tos[i % len(tos)] - } - return parts -} +// for i, from := range froms { +// parts[from] = tos[i % len(tos)] +// } +// return parts +// } -func parseSubex(l RuneReader, minPower int) SubexAST { +func parseSubex(l RuneReader, minPower int, runic bool) SubexAST { var lhs SubexAST r := l.Next() switch r { case eof: return nil case '(': - lhs = parseSubex(l, 0) + lhs = parseSubex(l, 0, runic) if !accept(l, ")") { panic("Missing matching )") } - case '[': - rangeParts := parseRangeSubex(l) - lhs = SubexASTRange {rangeParts} - case ')', '|', ';', '{', '+', '-', '*', '/', '!', '$', ':': + // TODO + // case '[': + // rangeParts := parseRangeSubex(l) + // lhs = SubexASTRange {rangeParts} + case ')', ']', '"', '|', ';', '{', '+', '-', '*', '/', '!', '$': l.Rewind() - return nil + return SubexASTEmpty{} case '=': replacement := parseReplacement(l) lhs = SubexASTOutput{replacement} @@ -291,47 +283,80 @@ func parseSubex(l RuneReader, minPower int) SubexAST { literals := parseNonStringLiteral(l) lhs = SubexASTEmpty{} for _, literal := range literals { - lhs = SubexASTConcat {lhs, SubexASTCopyAtom {literal}} + lhs = SubexASTConcat {lhs, SubexASTCopyScalar {literal}} } - case '^': - replacement := parseReplacement(l) - replacement = append( - []OutputContentAST{OutputAtomLiteralAST {walk.NewAtomStringTerminal()}}, - replacement... - ) - replacement = append( - replacement, - OutputAtomLiteralAST {walk.NewAtomStringTerminal()}, - ) - lhs = SubexASTOutput {replacement} + // case '^': + // replacement := parseReplacement(l) + // replacement = append( + // []OutputContentAST{OutputValueLiteralAST {walk.NewAtomStringTerminal()}}, + // replacement... + // ) + // replacement = append( + // replacement, + // OutputValueLiteralAST {walk.NewAtomStringTerminal()}, + // ) + // lhs = SubexASTOutput {replacement} case '.': - lhs = SubexASTCopyAny{} + if runic { + lhs = SubexASTCopyAnyRune{} + } else { + lhs = SubexASTCopyAnyValue{} + } case '?': lhs = SubexASTCopyBool{} case '%': lhs = SubexASTCopyNumber{} - case '_': - lhs = SubexASTCopyStringAtom{} - case '#': - lhs = SubexASTCopyString{} - case ',': - lhs = SubexASTCopyValue{} - case '"': - lhs = SubexASTCopyAtom {walk.NewAtomStringTerminal()} + case ':': + if runic { + lhs = SubexASTCopyRune {':'} + } else { + if !accept(l, "[") { + panic("Missing [ after :") + } + lhs = SubexASTEnterArray {parseSubex(l, 0, runic)} + if !accept(l, "]") { + panic("Missing matching ]") + } + } case '~': - literals := parseNonStringLiteral(l) - var replacement []OutputContentAST - for _, literal := range literals { - replacement = append(replacement, OutputAtomLiteralAST {literal}) + if runic { + lhs = SubexASTCopyRune {'~'} + } else { + if !accept(l, "\"") { + panic("Missing \" after ~") + } + lhs = SubexASTEnterString {parseSubex(l, 0, true)} + if !accept(l, "\"") { + panic("Missing matching \"") + } } - lhs = SubexASTOutput {replacement} + // TODO + // case '_': + // lhs = SubexASTCopyStringAtom{} + // case '#': + // lhs = SubexASTCopyString{} + // case ',': + // lhs = SubexASTCopyValue{} + // case '"': + // lhs = SubexASTCopyScalar {walk.NewAtomStringTerminal()} + // case '~': + // literals := parseNonStringLiteral(l) + // var replacement []OutputContentAST + // for _, literal := range literals { + // replacement = append(replacement, OutputValueLiteralAST {literal}) + // } + // lhs = SubexASTOutput {replacement} default: - lhs = SubexASTCopyAtom{Atom: walk.NewAtomStringRune(r)} + if runic { + lhs = SubexASTCopyRune {r} + } else { + panic("Tried to match rune outside of string") + } } loop: for { if minPower <= 20 { - next := parseSubex(l, 21) - if next != nil { + next := parseSubex(l, 21, runic) + if next != nil && (next != SubexASTEmpty{}) { lhs = SubexASTConcat{lhs, next} continue loop } @@ -366,20 +391,14 @@ func parseSubex(l RuneReader, minPower int) SubexAST { Slot: slot, } } - case r == ':' && minPower <= 4: - replacement := parseReplacement(l) - lhs = SubexASTConcat { - SubexASTDiscard {lhs}, - SubexASTOutput {replacement}, - } case r == '|' && minPower <= 8: - rhs := parseSubex(l, 9) + rhs := parseSubex(l, 9, runic) if rhs == nil { panic("Missing subex after |") } lhs = SubexASTOr{lhs, rhs} case r == ';' && minPower <= 10: - rhs := parseSubex(l, 11) + rhs := parseSubex(l, 11, runic) if rhs == nil { panic("Missing subex after ;") } @@ -396,7 +415,7 @@ func parseSubex(l RuneReader, minPower int) SubexAST { } func Parse(l RuneReader) SubexAST { - ast := parseSubex(l, 0) + ast := parseSubex(l, 0, false) if ast == nil { return SubexASTEmpty{} } -- cgit v1.2.3