From 8e80185508a697ddfcfed4a04d3f4e1ac5a330a9 Mon Sep 17 00:00:00 2001 From: Charlie Stanton Date: Mon, 24 Apr 2023 13:16:06 +0100 Subject: WalkItems are now made of Atoms instead of WalkValues, and I have rolled my own JSON parser and serialiser These changes improve performance --- main/command.go | 29 ++- main/main.go | 58 +++--- walk/walk.go | 625 ++++++++++++++++++++++++++++++++++---------------------- 3 files changed, 417 insertions(+), 295 deletions(-) diff --git a/main/command.go b/main/command.go index a0ac35e..c7b1aa9 100644 --- a/main/command.go +++ b/main/command.go @@ -16,16 +16,7 @@ func (cmd PrintValueCommand) exec(state *ProgramState) { panic("Tried to convert invalid atoms to values") } path := walk.PathFromWalkValues(pathValues) - values, err := walk.Compound(state.value) - if err != nil { - panic("Tried to convert invalid atoms to values") - } - for _, value := range values { - state.out <- walk.WalkItem { - Value: value, - Path: path, - } - } + state.out.Print(path, state.value) } type SequenceCommand struct { @@ -39,16 +30,22 @@ func (cmd SequenceCommand) exec(state *ProgramState) { type NextCommand struct {} func (cmd NextCommand) exec(state *ProgramState) { - nextItem := <- state.in - state.value = walk.Atomise([]walk.WalkValue{nextItem.Value}) - state.path = walk.Atomise(nextItem.Path.ToWalkValues()) + nextItem, err := state.in.Read() + if err != nil { + panic("Missing next value") + } + state.value = nextItem.Value + state.path = nextItem.Path } type AppendNextCommand struct {} func (cmd AppendNextCommand) exec(state *ProgramState) { - nextItem := <- state.in - state.value = append(state.value, walk.Atomise([]walk.WalkValue{nextItem.Value})...) - state.path = walk.Atomise(nextItem.Path.ToWalkValues()) + nextItem, err := state.in.Read() + if err != nil { + panic("Missing next value") + } + state.value = append(state.value, nextItem.Value...) + state.path = nextItem.Path } type DeleteValueCommand struct {} diff --git a/main/main.go b/main/main.go index 564d14a..e6730de 100644 --- a/main/main.go +++ b/main/main.go @@ -10,8 +10,8 @@ type Program []Command type ProgramState struct { path, value, xreg []walk.Atom - in chan walk.WalkItem - out chan walk.WalkItem + in walk.JSONIn + out walk.JSONOut program []Command } @@ -40,41 +40,33 @@ func main() { program := Parse(tokens) stdin := bufio.NewReader(os.Stdin) - dataStream := walk.Json(stdin) state := ProgramState { - in: dataStream, - out: make(chan walk.WalkItem), + in: walk.NewJSONIn(stdin), + out: walk.NewJSONOut(), program: program, } - - go func () { - for walkItem := range dataStream { - state.value = walk.Atomise([]walk.WalkValue{walkItem.Value}) - state.path = walk.Atomise(walkItem.Path.ToWalkValues()) - for _, cmd := range state.program { - cmd.exec(&state) - } - if !quiet { - pathValues, err := walk.Compound(state.path) - if err != nil { - panic("Tried to convert invalid atoms to values") - } - path := walk.PathFromWalkValues(pathValues) - values, err := walk.Compound(state.value) - if err != nil { - panic("Tried to convert invalid atoms to values") - } - for _, value := range values { - state.out <- walk.WalkItem { - Value: value, - Path: path, - } - } + + for { + walkItem, err := state.in.Read() + if err != nil { + break + } + state.value = walkItem.Value + state.path = walkItem.Path + for _, cmd := range state.program { + cmd.exec(&state) + } + if !quiet { + pathValues, err := walk.Compound(state.path) + if err != nil { + panic("Tried to convert invalid atoms to values") } + path := walk.PathFromWalkValues(pathValues) + state.out.Print(path, state.value) } - close(state.out) - }() - - walk.JsonOut(state.out) + } + + state.in.AssertDone() + state.out.AssertDone() } \ No newline at end of file diff --git a/walk/walk.go b/walk/walk.go index 949b6a2..008713b 100644 --- a/walk/walk.go +++ b/walk/walk.go @@ -1,16 +1,19 @@ package walk import ( - "io" - "encoding/json" "fmt" "strings" "math" "unicode/utf8" + "bufio" + "strconv" ) // int or string type PathSegment interface {} +func stringPathSegment(segment PathSegment) string { + return fmt.Sprintf("%v", segment) +} type Path []PathSegment func (path Path) ToWalkValues() []WalkValue { var values []WalkValue @@ -205,295 +208,425 @@ type WalkValue interface { } type WalkItem struct { - Value WalkValue - Path Path -} - -type WalkItemStream struct { - channel chan WalkItem - rewinds []WalkItem + Value []Atom + Path []Atom } -func (stream *WalkItemStream) next() (WalkItem, bool) { - if len(stream.rewinds) == 0 { - item, hasItem := <- stream.channel - return item, hasItem - } - item := stream.rewinds[len(stream.rewinds)-1] - stream.rewinds = stream.rewinds[0:len(stream.rewinds)-1] - return item, true -} +type JSONInStructure int +const ( + JSONInRoot JSONInStructure = iota + JSONInMap + JSONInArray + JSONInValueEnd +) -func (stream *WalkItemStream) rewind(item WalkItem) { - stream.rewinds = append(stream.rewinds, item) +type JSONIn struct { + path []Atom + reader *bufio.Reader + structure []JSONInStructure } -func (stream *WalkItemStream) peek() (WalkItem, bool) { - item, hasItem := stream.next() - if !hasItem { - return item, false +func NewJSONIn(reader *bufio.Reader) JSONIn { + return JSONIn { + path: nil, + reader: reader, + structure: []JSONInStructure{JSONInRoot}, } - stream.rewind(item) - return item, true } -func tokenToValue(token json.Token) WalkValue { - switch token.(type) { - case nil: - return ValueNull {} - case bool: - return ValueBool(token.(bool)) - case float64: - return ValueNumber(token.(float64)) - case string: - return ValueString(token.(string)) - default: - panic("Can't convert JSON token to value") - } -} - -func readValue(dec *json.Decoder, path Path, out chan WalkItem) bool { - if !dec.More() { - return true - } - t, err := dec.Token() - if err == io.EOF { - return true - } else if err != nil { - panic("Invalid JSON") - } - switch t.(type) { - case nil, string, float64, bool: - v := tokenToValue(t) - out <- WalkItem {v, path} - return false - case json.Delim: - switch rune(t.(json.Delim)) { - case '[': - out <- WalkItem {ArrayBegin, path} - index := 0 - for dec.More() { - empty := readValue(dec, append(path, index), out) - if empty { - break - } - index += 1 - } - t, err := dec.Token() - if err != nil { - panic("Invalid JSON") - } - delim, isDelim := t.(json.Delim) - if !isDelim || delim != ']' { - panic("Expected ] in JSON") - } - out <- WalkItem{ArrayEnd, path} - return false - case '{': - out <- WalkItem {MapBegin, path} - for dec.More() { - t, _ := dec.Token() - key, keyIsString := t.(string) - if !keyIsString { - panic("Invalid JSON") - } - empty := readValue(dec, append(path, key), out) - if empty { - panic("Invalid JSON") - } - } - t, err := dec.Token() - if err != nil { - panic("Invalid JSON") - } - delim, isDelim := t.(json.Delim) - if !isDelim || delim != '}' { - panic("Expected } in JSON") - } - out <- WalkItem {MapEnd, path} - return false - default: - panic("Error parsing JSON") - } - default: - panic("Invalid JSON token") - } -} - -func startWalk(dec *json.Decoder, out chan WalkItem) { - isEmpty := readValue(dec, nil, out) - if isEmpty { - panic("Missing JSON input") +func isWhitespace(r rune) bool { + for _, ws := range " \t\r\n" { + if r == ws { + return true + } } - close(out) + return false } -func Json(r io.Reader) chan WalkItem { - dec := json.NewDecoder(r) - out := make(chan WalkItem) - go startWalk(dec, out) - return out +func isNumberRune(r rune) bool { + return '0' <= r && r <= '9' || r == '.' } -func printIndent(indent int) { - for i := 0; i < indent; i += 1 { - fmt.Print("\t") +func (in *JSONIn) popPath() { + if len(in.path) == 0 { + panic("Tried to pop from empty path") } -} - -func jsonOutArray(in *WalkItemStream, indent int) { - fmt.Println("[") - token, hasToken := in.next() - if !hasToken { - panic("Missing ] in output JSON") - } - terminal, isTerminal := token.Value.(TerminalValue) - if isTerminal && terminal == ArrayEnd { - fmt.Print("\n") - printIndent(indent) - fmt.Print("]") + finalAtom := in.path[len(in.path) - 1] + if finalAtom.Typ != AtomStringTerminal { + in.path = in.path[:len(in.path) - 1] return } - in.rewind(token) + i := len(in.path) - 2 for { - valueToken := jsonOutValue(in, indent + 1, true) - if valueToken != nil { - panic("Missing value in output JSON array") - } - token, hasToken := in.next() - if !hasToken { - panic("Missing ] in output JSON") + if i < 0 { + panic("Missing string begin in path") } - terminal, isTerminal := token.Value.(TerminalValue) - if isTerminal && terminal == ArrayEnd { - fmt.Print("\n") - printIndent(indent) - fmt.Print("]") - return + if in.path[i].Typ == AtomStringTerminal { + break } - in.rewind(token) - fmt.Println(",") + i-- } + in.path = in.path[:i] } -func jsonOutMap(in *WalkItemStream, indent int) { - fmt.Println("{") - token, hasToken := in.next() - if !hasToken { - panic("Missing } in output JSON") - } - terminal, isTerminal := token.Value.(TerminalValue) - if isTerminal && terminal == MapEnd { - fmt.Print("\n") - printIndent(indent) - fmt.Print("}") - return - } - in.rewind(token) +func (in *JSONIn) nextNonWsRune() (rune, error) { for { - keyToken, hasKeyToken := in.peek() - if !hasKeyToken { - panic("Missing map element") - } - printIndent(indent + 1) - if len(keyToken.Path) == 0 { - panic("Map element missing key") - } - key := keyToken.Path[len(keyToken.Path)-1] - switch key.(type) { - case int: - fmt.Print(key.(int)) - case string: - fmt.Printf("%q", key.(string)) - default: - panic("Invalid path segment") + r, _, err := in.reader.ReadRune() + if err != nil { + return 0, err } - fmt.Print(": ") - valueToken := jsonOutValue(in, indent + 1, false) - if valueToken != nil { - panic("Missing value int output JSON map") + if !isWhitespace(r) { + return r, nil } - token, hasToken := in.next() - if !hasToken { - panic("Missing } in output JSON") - } - terminal, isTerminal := token.Value.(TerminalValue) - if isTerminal && terminal == MapEnd { - fmt.Print("\n") - printIndent(indent) - fmt.Print("}") - return - } - in.rewind(token) - fmt.Println(",") } } -func jsonOutValue(in *WalkItemStream, indent int, doIndent bool) WalkValue { - token, hasToken := in.next() - if !hasToken { - panic("Missing JSON token in output") +func (in *JSONIn) requireString(criteria string) { + for _, r := range criteria { + in.require(r) + } +} + +func (in *JSONIn) require(criterion rune) { + r, _, err := in.reader.ReadRune() + if err != nil { + panic("Error while reading required rune: " + err.Error()) + } + if r != criterion { + panic("Required rune not read") + } +} + +func (in *JSONIn) Read() (WalkItem, error) { + item, err := in.read() + if err != nil { + return item, err } - switch v := token.Value.(type) { - case ValueNull: - if doIndent { - printIndent(indent) + return WalkItem { + Value: item.Value, + Path: append([]Atom{}, item.Path...), + }, err +} + +func (in *JSONIn) read() (WalkItem, error) { + restart: + // TODO: Escaping + // TODO: Don't allow trailing commas + // TODO: Proper float parsing with e and stuff + r, err := in.nextNonWsRune() + if err != nil { + return WalkItem {}, err + } + state := in.structure[len(in.structure) - 1] + switch state { + case JSONInMap: + in.popPath() + if r == '}' { + in.structure[len(in.structure) - 1] = JSONInValueEnd + return WalkItem { + Value: []Atom{NewAtomTerminal(MapEnd)}, + Path: in.path, + }, nil + } + if r != '"' { + panic("Expected key, found something else") } - fmt.Printf("null") - return nil - case ValueBool: - if doIndent { - printIndent(indent) + in.path = append(in.path, NewAtomStringTerminal()) + for { + r, _, err = in.reader.ReadRune() + if err != nil { + return WalkItem {}, err + } + if r == '"' { + break + } + if r == '\\' { + r, _, err = in.reader.ReadRune() + if err != nil { + panic("Missing rune after \\") + } + in.path = append(in.path, NewAtomStringRune(r)) + continue + } + in.path = append(in.path, NewAtomStringRune(r)) + } + in.path = append(in.path, NewAtomStringTerminal()) + r, err = in.nextNonWsRune() + if err != nil { + panic("Expected : got: " + err.Error()) + } + if r != ':' { + panic("Expected : after key") } - if token.Value.(ValueBool) { - fmt.Print("true") + r, err = in.nextNonWsRune() + if err != nil { + panic("Missing map value after key: " + err.Error()) + } + case JSONInArray: + if r == ']' { + in.structure[len(in.structure) - 1] = JSONInValueEnd + in.popPath() + return WalkItem { + Value: []Atom{NewAtomTerminal(ArrayEnd)}, + Path: in.path, + }, nil + } + prevIndex := in.path[len(in.path) - 1] + if prevIndex.Typ == AtomNull { + prevIndex.Typ = AtomNumber + prevIndex.data = math.Float64bits(0) + } else if prevIndex.Typ == AtomNumber { + prevIndex.data = math.Float64bits(math.Float64frombits(prevIndex.data) + 1) } else { - fmt.Print("false") + panic("Invalid index in array input") } - return nil - case ValueNumber: - if doIndent { - printIndent(indent) + in.path[len(in.path) - 1] = prevIndex + case JSONInRoot: + case JSONInValueEnd: + in.structure = in.structure[:len(in.structure) - 1] + underState := in.structure[len(in.structure) - 1] + if underState == JSONInRoot { + panic("More input after root JSON object ends") + } else if underState == JSONInMap && r == '}' { + in.structure[len(in.structure) - 1] = JSONInValueEnd + in.popPath() + return WalkItem { + Value: []Atom{NewAtomTerminal(MapEnd)}, + Path: in.path, + }, nil + } else if underState == JSONInArray && r == ']' { + in.structure[len(in.structure) - 1] = JSONInValueEnd + in.popPath() + return WalkItem { + Value: []Atom{NewAtomTerminal(ArrayEnd)}, + Path: in.path, + }, nil } - fmt.Printf("%v", token.Value) - return nil - case ValueString: - if doIndent { - printIndent(indent) + if r != ',' { + panic("Expected , after JSON value, found: \"" + string(r) + "\"") } - fmt.Printf("%q", string(v)) - return nil - case TerminalValue: - switch token.Value.(TerminalValue) { - case ArrayBegin: - if doIndent { - printIndent(indent) + goto restart + default: + panic("Invalid JSONIn state") + } + switch r { + case 'n': + in.requireString("ull") + in.structure = append(in.structure, JSONInValueEnd) + return WalkItem { + Value: []Atom{NewAtomNull()}, + Path: in.path, + }, nil + case 'f': + in.requireString("alse") + in.structure = append(in.structure, JSONInValueEnd) + return WalkItem { + Value: []Atom{NewAtomBool(false)}, + Path: in.path, + }, nil + case 't': + in.requireString("rue") + in.structure = append(in.structure, JSONInValueEnd) + return WalkItem { + Value: []Atom{NewAtomBool(true)}, + Path: in.path, + }, nil + case '"': + value := make([]Atom, 0, 2) + value = append(value, NewAtomStringTerminal()) + for { + r, _, err = in.reader.ReadRune() + if err != nil { + panic("Missing closing terminal in string input: " + err.Error()) + } + if r == '"' { + break + } + if r == '\\' { + r, _, err = in.reader.ReadRune() + if err != nil { + panic("Missing rune after \\") } - jsonOutArray(in, indent) - return nil - case MapBegin: - if doIndent { - printIndent(indent) + value = append(value, NewAtomStringRune(r)) + continue + } + value = append(value, NewAtomStringRune(r)) + } + value = append(value, NewAtomStringTerminal()) + in.structure = append(in.structure, JSONInValueEnd) + return WalkItem { + Value: value, + Path: in.path, + }, nil + case '{': + in.structure = append(in.structure, JSONInMap) + in.path = append(in.path, NewAtomNull()) + return WalkItem { + Value: []Atom{NewAtomTerminal(MapBegin)}, + Path: in.path[:len(in.path) - 1], + }, nil + case '[': + in.structure = append(in.structure, JSONInArray) + in.path = append(in.path, NewAtomNull()) + return WalkItem { + Value: []Atom{NewAtomTerminal(ArrayBegin)}, + Path: in.path[:len(in.path) - 1], + }, nil + } + if isNumberRune(r) { + var builder strings.Builder + builder.WriteRune(r) + for { + r, _, err = in.reader.ReadRune() + if err != nil || !isNumberRune(r) { + break + } + builder.WriteRune(r) + } + in.reader.UnreadRune() + number, parseError := strconv.ParseFloat(builder.String(), 64) + if parseError != nil { + panic("Invalid number") + } + in.structure = append(in.structure, JSONInValueEnd) + return WalkItem { + Value: []Atom{NewAtomNumber(number)}, + Path: in.path, + }, nil + } + panic("Invalid JSON value") +} + +func (in *JSONIn) AssertDone() { + if len(in.structure) != 2 || in.structure[0] != JSONInRoot || in.structure[1] != JSONInValueEnd { + panic("Input ended on incomplete JSON root") + } +} + +type JSONOutStructure int +const ( + JSONOutRoot JSONOutStructure = iota + JSONOutMap + JSONOutArray + JSONOutString + JSONOutValueEnd +) + +type JSONOut struct { + structure []JSONOutStructure +} + +func (out *JSONOut) indent(adjust int) { + fmt.Print(strings.Repeat("\t", len(out.structure) - 1 + adjust)) +} + +func (out *JSONOut) atomOut(key string, atom Atom) { + state := out.structure[len(out.structure) - 1] + switch state { + case JSONOutRoot, JSONOutMap, JSONOutArray: + switch atom.Typ { + case AtomNull, AtomBool, AtomNumber: + out.indent(0) + if state == JSONOutMap { + fmt.Printf("%q: ", key) + } + fmt.Print(atom.String()) + out.structure = append(out.structure, JSONOutValueEnd) + case AtomStringTerminal: + out.indent(0) + if state == JSONOutMap { + fmt.Printf("%q: ", key) + } + fmt.Print("\"") + out.structure = append(out.structure, JSONOutString) + case AtomTerminal: + switch TerminalValue(atom.data) { + case MapBegin: + out.indent(0) + if state == JSONOutMap { + fmt.Printf("%q: ", key) + } + fmt.Print("{\n") + out.structure = append(out.structure, JSONOutMap) + case ArrayBegin: + out.indent(0) + if state == JSONOutMap { + fmt.Printf("%q: ", key) + } + fmt.Print("[\n") + out.structure = append(out.structure, JSONOutArray) + case MapEnd: + out.indent(-1) + if state != JSONOutMap { + panic("Map ended while not inside a map") + } + fmt.Print("}") + out.structure[len(out.structure) - 1] = JSONOutValueEnd + case ArrayEnd: + out.indent(-1) + if state != JSONOutArray { + panic("Array ended while not inside a array") + } + fmt.Print("]") + out.structure[len(out.structure) - 1] = JSONOutValueEnd + default: + panic("Invalid TerminalValue") } - jsonOutMap(in, indent) - return nil default: - return token.Value + panic("Invalid AtomType in root value") + } + case JSONOutValueEnd: + out.structure = out.structure[:len(out.structure) - 1] + underState := out.structure[len(out.structure) - 1] + if underState == JSONOutMap && atom.Typ == AtomTerminal && TerminalValue(atom.data) == MapEnd { + fmt.Print("\n") + out.indent(-1) + fmt.Print("}") + out.structure[len(out.structure) - 1] = JSONOutValueEnd + } else if underState == JSONOutArray && atom.Typ == AtomTerminal && TerminalValue(atom.data) == ArrayEnd { + fmt.Print("\n") + out.indent(-1) + fmt.Print("]") + out.structure[len(out.structure) - 1] = JSONOutValueEnd + } else if underState == JSONOutRoot { + panic("Tried to output JSON after root value has concluded") + } else { + fmt.Print(",\n") + out.atomOut(key, atom) + } + case JSONOutString: + if atom.Typ == AtomStringTerminal { + fmt.Print("\"") + out.structure[len(out.structure) - 1] = JSONOutValueEnd + } else { + fmt.Print(atom.String()) } default: - panic("Invalid WalkValue") + panic("Invalid JSONOutState") } } -func JsonOut(in chan WalkItem) { - stream := WalkItemStream { - channel: in, - rewinds: nil, +func (out *JSONOut) Print(path Path, values []Atom) { + var segment PathSegment + if len(path) > 0 { + segment = path[len(path) - 1] } - if jsonOutValue(&stream, 0, true) != nil { - panic("Invalid output JSON") + segmentString := stringPathSegment(segment) + for _, atom := range values { + out.atomOut(segmentString, atom) + } +} + +func (out *JSONOut) AssertDone() { + if len(out.structure) != 2 || out.structure[0] != JSONOutRoot || out.structure[1] != JSONOutValueEnd { + panic("Program ended with incomplete JSON output") + } +} + +func NewJSONOut() JSONOut { + return JSONOut { + structure: []JSONOutStructure{JSONOutRoot}, } - fmt.Print("\n") } func ConcatData(first []Atom, second []Atom) []Atom { -- cgit v1.2.3