From 11bfa042040803061fa8cf04ab00d0e815ebafad Mon Sep 17 00:00:00 2001 From: Charlie Stanton Date: Tue, 25 Apr 2023 17:27:47 +0100 Subject: Completely rewrites the JSON parser to make more extensive use of slices and decrease the number mallocs --- walk/read.go | 452 +++++++++++++++++++++++++++++++++++++++++------------------ 1 file changed, 316 insertions(+), 136 deletions(-) (limited to 'walk/read.go') diff --git a/walk/read.go b/walk/read.go index 6f3acfe..58dfcae 100644 --- a/walk/read.go +++ b/walk/read.go @@ -5,13 +5,41 @@ import ( "math" "strings" "strconv" + "fmt" ) +type ReadAction interface { + String() string +} + +type ActionReadValue struct {} +func (_ ActionReadValue) String() string { + return "read" +} + +type ActionAppendPath struct { + atoms []Atom +} +func (action ActionAppendPath) String() string { + return fmt.Sprintf("append(%v)", action.atoms) +} + +type ActionPopPath struct {} +func (_ ActionPopPath) String() string { + return "pop" +} + +type ActionIncrementPath struct {} +func (_ ActionIncrementPath) String() string { + return "increment" +} + type JSONInStructure int const ( JSONInRoot JSONInStructure = iota JSONInMap JSONInArray + JSONInString JSONInValueEnd ) @@ -19,6 +47,11 @@ type JSONIn struct { path []Atom reader *bufio.Reader structure []JSONInStructure + readBuffer []Atom + readIndex int + readBufferCapacity int + actionBuffer []ReadAction + actionIndex int } func NewJSONIn(reader *bufio.Reader) JSONIn { @@ -26,6 +59,11 @@ func NewJSONIn(reader *bufio.Reader) JSONIn { path: make([]Atom, 0, 256), reader: reader, structure: []JSONInStructure{JSONInRoot}, + readBuffer: make([]Atom, 0, 256), + readIndex: 0, + readBufferCapacity: 256, + actionBuffer: make([]ReadAction, 0, 256), + actionIndex: 0, } } @@ -127,159 +165,301 @@ func (in *JSONIn) readString(out []Atom) []Atom { return out } -func (in *JSONIn) Read() (WalkItem, error) { - restart: - // TODO: Escaping - // TODO: Don't allow trailing commas - // TODO: Proper float parsing with e and stuff - r, err := in.nextNonWsRune() - if err != nil { - return WalkItem {}, err +// Returns the first full value of a list of atoms and also a boolean to indicate if there isn't a value at the beginning +func firstValue(atoms []Atom) ([]Atom, bool) { + if len(atoms) == 0 { + return nil, true + } + if atoms[0].Typ != AtomStringTerminal { + return atoms[0:1], false + } + i := 1 + for { + if i == len(atoms) { + return nil, true + } + if atoms[i].Typ == AtomStringTerminal { + return atoms[0:i+1], false + } + i++ } - state := in.structure[len(in.structure) - 1] - switch state { - case JSONInMap: - in.popPath() - if r == '}' { - in.structure[len(in.structure) - 1] = JSONInValueEnd +} + +func (in *JSONIn) Read() (WalkItem, error) { + actionLoop: for { + if in.actionIndex == len(in.actionBuffer) { + in.actionIndex = 0 + in.readIndex = 0 + in.actionBuffer = in.actionBuffer[:0] + in.readBuffer = in.readBuffer[:0] + structure, err := in.fillReadBuffer(in.structure) + in.structure = structure + if len(in.actionBuffer) == 0 { + return WalkItem{}, err + } + } + action := in.actionBuffer[in.actionIndex] + switch a := action.(type) { + case ActionReadValue: + value, incomplete := firstValue(in.readBuffer[in.readIndex:]) + if incomplete { + if in.readIndex == 0 { + newReadBuffer := make([]Atom, len(in.readBuffer), in.readBufferCapacity * 2) + in.readBufferCapacity *= 2 + copy(newReadBuffer, in.readBuffer) + in.readBuffer = newReadBuffer + structure, _ := in.fillReadBuffer(in.structure) + in.structure = structure + continue actionLoop + } + copy(in.readBuffer, in.readBuffer[in.readIndex:]) + in.readBuffer = in.readBuffer[:len(in.readBuffer) - in.readIndex] + in.readIndex = 0 + copy(in.actionBuffer, in.actionBuffer[in.actionIndex:]) + in.actionBuffer = in.actionBuffer[:len(in.actionBuffer) - in.actionIndex] + in.actionIndex = 0 + structure, _ := in.fillReadBuffer(in.structure) + in.structure = structure + continue actionLoop + } + in.readIndex += len(value) + in.actionIndex++ return WalkItem { - Value: []Atom{NewAtomTerminal(MapEnd)}, + Value: value, Path: in.path, }, nil + case ActionAppendPath: + in.path = append(in.path, a.atoms...) + case ActionPopPath: + in.popPath() + case ActionIncrementPath: + prevIndex := in.path[len(in.path) - 1] + if prevIndex.Typ == AtomNull { + prevIndex.Typ = AtomNumber + prevIndex.data = math.Float64bits(0) + } else if prevIndex.Typ == AtomNumber { + prevIndex.data = math.Float64bits(math.Float64frombits(prevIndex.data) + 1) + } else { + panic("Invalid index in array input. Type: " + fmt.Sprintf("%v", prevIndex.Typ)) + } + in.path[len(in.path) - 1] = prevIndex + default: + panic("Invalid ReadAction") + } + in.actionIndex++ + } +} + +func (in *JSONIn) AssertDone() { + if len(in.structure) != 1 || in.structure[0] != JSONInRoot { + panic("Input ended on incomplete JSON root") + } +} + +func (in *JSONIn) pushReadBuffer(atom Atom) bool { + in.readBuffer = append(in.readBuffer, atom) + return len(in.readBuffer) == in.readBufferCapacity +} + +func (in *JSONIn) pushActionBuffer(action ReadAction) { + in.actionBuffer = append(in.actionBuffer, action) +} + +func (in *JSONIn) fillReadBuffer(structure []JSONInStructure) ([]JSONInStructure, error) { + valueStart: { + state := structure[len(structure) - 1] + switch state { + case JSONInString: + structure = structure[:len(structure) - 1] + goto string + case JSONInValueEnd: + structure = structure[:len(structure) - 1] + goto valueEnd + case JSONInMap: + goto mapValue + case JSONInArray: + goto arrayValue + case JSONInRoot: + goto value + } + } + value: { + r, err := in.nextNonWsRune() + if err != nil { + return structure, err + } + switch r { + case 'n': + in.requireString("ull") + in.pushActionBuffer(ActionReadValue{}) + if in.pushReadBuffer(NewAtomNull()) { + return append(structure, JSONInValueEnd), nil + } + goto valueEnd + case 'f': + in.requireString("alse") + in.pushActionBuffer(ActionReadValue{}) + if in.pushReadBuffer(NewAtomBool(false)) { + return append(structure, JSONInValueEnd), nil + } + goto valueEnd + case 't': + in.requireString("rue") + in.pushActionBuffer(ActionReadValue{}) + if in.pushReadBuffer(NewAtomBool(true)) { + return append(structure, JSONInValueEnd), nil + } + goto valueEnd + case '"': + in.pushActionBuffer(ActionReadValue{}) + if in.pushReadBuffer(NewAtomStringTerminal()) { + return append(structure, JSONInString), nil + } + goto string + case '{': + structure = append(structure, JSONInMap) + in.pushActionBuffer(ActionReadValue{}) + in.pushActionBuffer(ActionAppendPath {[]Atom{NewAtomNull()}}) + if in.pushReadBuffer(NewAtomTerminal(MapBegin)) { + return structure, nil + } + goto mapValue + case '[': + structure = append(structure, JSONInArray) + in.pushActionBuffer(ActionReadValue{}) + in.pushActionBuffer(ActionAppendPath {[]Atom{NewAtomNull()}}) + if in.pushReadBuffer(NewAtomTerminal(ArrayBegin)) { + return structure, nil + } + goto arrayValue + } + if isNumberRune(r) { + var builder strings.Builder + builder.WriteRune(r) + for { + r, _, err = in.reader.ReadRune() + if err != nil { + break + } + if !isNumberRune(r) { + in.reader.UnreadRune() + break + } + builder.WriteRune(r) } - if r != '"' { - panic("Expected key, found something else") + number, parseError := strconv.ParseFloat(builder.String(), 64) + if parseError != nil { + panic("Invalid number") } - in.path = in.readString(in.path) - r, err = in.nextNonWsRune() - if err != nil { - panic("Expected : got: " + err.Error()) + in.pushActionBuffer(ActionReadValue{}) + if in.pushReadBuffer(NewAtomNumber(number)) { + return append(structure, JSONInValueEnd), nil } - if r != ':' { - panic("Expected : after key") + goto valueEnd + } + panic("Invalid JSON value starting with: " + string(r)) + } + string: { + r, _, err := in.reader.ReadRune() + if err != nil { + panic("Missing closing terminal in string input: " + err.Error()) + } + if r == '"' { + if in.pushReadBuffer(NewAtomStringTerminal()) { + return append(structure, JSONInValueEnd), nil } - r, err = in.nextNonWsRune() + goto valueEnd + } + if r == '\\' { + r, _, err = in.reader.ReadRune() if err != nil { - panic("Missing map value after key: " + err.Error()) - } - case JSONInArray: - if r == ']' { - in.structure[len(in.structure) - 1] = JSONInValueEnd - in.popPath() - return WalkItem { - Value: []Atom{NewAtomTerminal(ArrayEnd)}, - Path: in.path, - }, nil + panic("Missing rune after \\") } - prevIndex := in.path[len(in.path) - 1] - if prevIndex.Typ == AtomNull { - prevIndex.Typ = AtomNumber - prevIndex.data = math.Float64bits(0) - } else if prevIndex.Typ == AtomNumber { - prevIndex.data = math.Float64bits(math.Float64frombits(prevIndex.data) + 1) - } else { - panic("Invalid index in array input") + if in.pushReadBuffer(NewAtomStringRune(r)) { + return append(structure, JSONInString), nil } - in.path[len(in.path) - 1] = prevIndex - case JSONInRoot: - case JSONInValueEnd: - in.structure = in.structure[:len(in.structure) - 1] - underState := in.structure[len(in.structure) - 1] - if underState == JSONInRoot { - panic("More input after root JSON object ends") - } else if underState == JSONInMap && r == '}' { - in.structure[len(in.structure) - 1] = JSONInValueEnd - in.popPath() - return WalkItem { - Value: []Atom{NewAtomTerminal(MapEnd)}, - Path: in.path, - }, nil - } else if underState == JSONInArray && r == ']' { - in.structure[len(in.structure) - 1] = JSONInValueEnd - in.popPath() - return WalkItem { - Value: []Atom{NewAtomTerminal(ArrayEnd)}, - Path: in.path, - }, nil + goto string + } + if in.pushReadBuffer(NewAtomStringRune(r)) { + return append(structure, JSONInString), nil + } + goto string + } + valueEnd: { + r, err := in.nextNonWsRune() + if err != nil { + return structure, err + } + underState := structure[len(structure) - 1] + if underState == JSONInRoot { + panic("More input after root JSON object ends") + } else if underState == JSONInMap && r == '}' { + structure = structure[:len(structure) - 1] + in.pushActionBuffer(ActionPopPath{}) + in.pushActionBuffer(ActionReadValue{}) + if in.pushReadBuffer(NewAtomTerminal(MapEnd)) { + return append(structure, JSONInValueEnd), nil } - if r != ',' { - panic("Expected , after JSON value, found: \"" + string(r) + "\"") + goto valueEnd + } else if underState == JSONInArray && r == ']' { + structure = structure[:len(structure) - 1] + in.pushActionBuffer(ActionPopPath{}) + in.pushActionBuffer(ActionReadValue{}) + if in.pushReadBuffer(NewAtomTerminal(ArrayEnd)) { + return append(structure, JSONInValueEnd), nil } - goto restart - default: - panic("Invalid JSONIn state") - } - switch r { - case 'n': - in.requireString("ull") - in.structure = append(in.structure, JSONInValueEnd) - return WalkItem { - Value: []Atom{NewAtomNull()}, - Path: in.path, - }, nil - case 'f': - in.requireString("alse") - in.structure = append(in.structure, JSONInValueEnd) - return WalkItem { - Value: []Atom{NewAtomBool(false)}, - Path: in.path, - }, nil - case 't': - in.requireString("rue") - in.structure = append(in.structure, JSONInValueEnd) - return WalkItem { - Value: []Atom{NewAtomBool(true)}, - Path: in.path, - }, nil - case '"': - value := make([]Atom, 0, 64) - value = in.readString(value) - in.structure = append(in.structure, JSONInValueEnd) - return WalkItem { - Value: value, - Path: in.path, - }, nil - case '{': - in.structure = append(in.structure, JSONInMap) - in.path = append(in.path, NewAtomNull()) - return WalkItem { - Value: []Atom{NewAtomTerminal(MapBegin)}, - Path: in.path[:len(in.path) - 1], - }, nil - case '[': - in.structure = append(in.structure, JSONInArray) - in.path = append(in.path, NewAtomNull()) - return WalkItem { - Value: []Atom{NewAtomTerminal(ArrayBegin)}, - Path: in.path[:len(in.path) - 1], - }, nil + goto valueEnd + } + if r != ',' { + panic("Expected , after JSON value, found: \"" + string(r) + "\"") + } + goto valueStart } - if isNumberRune(r) { - var builder strings.Builder - builder.WriteRune(r) - for { - r, _, err = in.reader.ReadRune() - if err != nil || !isNumberRune(r) { - break + mapValue: { + in.pushActionBuffer(ActionPopPath{}) + r, err := in.nextNonWsRune() + if err != nil { + panic("Missing value inside object") + } + if r == '}' { + structure = structure[:len(structure) - 1] + in.pushActionBuffer(ActionReadValue{}) + if in.pushReadBuffer(NewAtomTerminal(MapEnd)) { + return append(structure, JSONInValueEnd), nil } - builder.WriteRune(r) + goto valueEnd } - in.reader.UnreadRune() - number, parseError := strconv.ParseFloat(builder.String(), 64) - if parseError != nil { - panic("Invalid number") + if r != '"' { + panic("Expected key found something else") } - in.structure = append(in.structure, JSONInValueEnd) - return WalkItem { - Value: []Atom{NewAtomNumber(number)}, - Path: in.path, - }, nil + var keyAtoms []Atom + keyAtoms = in.readString(keyAtoms) + in.pushActionBuffer(ActionAppendPath {keyAtoms}) + r, err = in.nextNonWsRune() + if err != nil { + panic("Expected : got: " + err.Error()) + } + if r != ':' { + panic("Expected : after key") + } + goto value } - panic("Invalid JSON value") -} - -func (in *JSONIn) AssertDone() { - if len(in.structure) != 2 || in.structure[0] != JSONInRoot || in.structure[1] != JSONInValueEnd { - panic("Input ended on incomplete JSON root") + arrayValue: { + r, err := in.nextNonWsRune() + if err != nil { + panic("Missing value inside array") + } + if r == ']' { + structure = structure[:len(structure) - 1] + in.pushActionBuffer(ActionPopPath{}) + in.pushActionBuffer(ActionReadValue{}) + if in.pushReadBuffer(NewAtomTerminal(ArrayEnd)) { + return append(structure, JSONInValueEnd), nil + } + goto valueEnd + } + in.reader.UnreadRune() + in.pushActionBuffer(ActionIncrementPath{}) + goto value } } -- cgit v1.2.3