123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655 |
- /*
- * Copyright 2021 ByteDance Inc.
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
- package ast
- import (
- `fmt`
- `github.com/bytedance/sonic/internal/native/types`
- `github.com/bytedance/sonic/internal/rt`
- )
- const (
- _DEFAULT_NODE_CAP int = 8
- _APPEND_GROW_SHIFT = 1
- )
- const (
- _ERR_NOT_FOUND types.ParsingError = 33
- _ERR_UNSUPPORT_TYPE types.ParsingError = 34
- )
- var (
- // ErrNotExist means both key and value doesn't exist
- ErrNotExist error = newError(_ERR_NOT_FOUND, "value not exists")
- // ErrUnsupportType means API on the node is unsupported
- ErrUnsupportType error = newError(_ERR_UNSUPPORT_TYPE, "unsupported type")
- )
- type Parser struct {
- p int
- s string
- noLazy bool
- skipValue bool
- dbuf *byte
- }
- /** Parser Private Methods **/
- func (self *Parser) delim() types.ParsingError {
- n := len(self.s)
- p := self.lspace(self.p)
- /* check for EOF */
- if p >= n {
- return types.ERR_EOF
- }
- /* check for the delimtier */
- if self.s[p] != ':' {
- return types.ERR_INVALID_CHAR
- }
- /* update the read pointer */
- self.p = p + 1
- return 0
- }
- func (self *Parser) object() types.ParsingError {
- n := len(self.s)
- p := self.lspace(self.p)
- /* check for EOF */
- if p >= n {
- return types.ERR_EOF
- }
- /* check for the delimtier */
- if self.s[p] != '{' {
- return types.ERR_INVALID_CHAR
- }
- /* update the read pointer */
- self.p = p + 1
- return 0
- }
- func (self *Parser) array() types.ParsingError {
- n := len(self.s)
- p := self.lspace(self.p)
- /* check for EOF */
- if p >= n {
- return types.ERR_EOF
- }
- /* check for the delimtier */
- if self.s[p] != '[' {
- return types.ERR_INVALID_CHAR
- }
- /* update the read pointer */
- self.p = p + 1
- return 0
- }
- func (self *Parser) lspace(sp int) int {
- ns := len(self.s)
- for ; sp<ns && isSpace(self.s[sp]); sp+=1 {}
- return sp
- }
- func (self *Parser) decodeArray(ret *linkedNodes) (Node, types.ParsingError) {
- sp := self.p
- ns := len(self.s)
- /* check for EOF */
- if self.p = self.lspace(sp); self.p >= ns {
- return Node{}, types.ERR_EOF
- }
- /* check for empty array */
- if self.s[self.p] == ']' {
- self.p++
- return Node{t: types.V_ARRAY}, 0
- }
- /* allocate array space and parse every element */
- for {
- var val Node
- var err types.ParsingError
- if self.skipValue {
- /* skip the value */
- var start int
- if start, err = self.skipFast(); err != 0 {
- return Node{}, err
- }
- if self.p > ns {
- return Node{}, types.ERR_EOF
- }
- t := switchRawType(self.s[start])
- if t == _V_NONE {
- return Node{}, types.ERR_INVALID_CHAR
- }
- val = newRawNode(self.s[start:self.p], t)
- }else{
- /* decode the value */
- if val, err = self.Parse(); err != 0 {
- return Node{}, err
- }
- }
- /* add the value to result */
- ret.Add(val)
- self.p = self.lspace(self.p)
- /* check for EOF */
- if self.p >= ns {
- return Node{}, types.ERR_EOF
- }
- /* check for the next character */
- switch self.s[self.p] {
- case ',' : self.p++
- case ']' : self.p++; return newArray(ret), 0
- default:
- // if val.isLazy() {
- // return newLazyArray(self, ret), 0
- // }
- return Node{}, types.ERR_INVALID_CHAR
- }
- }
- }
- func (self *Parser) decodeObject(ret *linkedPairs) (Node, types.ParsingError) {
- sp := self.p
- ns := len(self.s)
- /* check for EOF */
- if self.p = self.lspace(sp); self.p >= ns {
- return Node{}, types.ERR_EOF
- }
- /* check for empty object */
- if self.s[self.p] == '}' {
- self.p++
- return Node{t: types.V_OBJECT}, 0
- }
- /* decode each pair */
- for {
- var val Node
- var njs types.JsonState
- var err types.ParsingError
- /* decode the key */
- if njs = self.decodeValue(); njs.Vt != types.V_STRING {
- return Node{}, types.ERR_INVALID_CHAR
- }
- /* extract the key */
- idx := self.p - 1
- key := self.s[njs.Iv:idx]
- /* check for escape sequence */
- if njs.Ep != -1 {
- if key, err = unquote(key); err != 0 {
- return Node{}, err
- }
- }
- /* expect a ':' delimiter */
- if err = self.delim(); err != 0 {
- return Node{}, err
- }
-
- if self.skipValue {
- /* skip the value */
- var start int
- if start, err = self.skipFast(); err != 0 {
- return Node{}, err
- }
- if self.p > ns {
- return Node{}, types.ERR_EOF
- }
- t := switchRawType(self.s[start])
- if t == _V_NONE {
- return Node{}, types.ERR_INVALID_CHAR
- }
- val = newRawNode(self.s[start:self.p], t)
- } else {
- /* decode the value */
- if val, err = self.Parse(); err != 0 {
- return Node{}, err
- }
- }
- /* add the value to result */
- // FIXME: ret's address may change here, thus previous referred node in ret may be invalid !!
- ret.Add(Pair{Key: key, Value: val})
- self.p = self.lspace(self.p)
- /* check for EOF */
- if self.p >= ns {
- return Node{}, types.ERR_EOF
- }
- /* check for the next character */
- switch self.s[self.p] {
- case ',' : self.p++
- case '}' : self.p++; return newObject(ret), 0
- default:
- // if val.isLazy() {
- // return newLazyObject(self, ret), 0
- // }
- return Node{}, types.ERR_INVALID_CHAR
- }
- }
- }
- func (self *Parser) decodeString(iv int64, ep int) (Node, types.ParsingError) {
- p := self.p - 1
- s := self.s[iv:p]
- /* fast path: no escape sequence */
- if ep == -1 {
- return NewString(s), 0
- }
- /* unquote the string */
- out, err := unquote(s)
- /* check for errors */
- if err != 0 {
- return Node{}, err
- } else {
- return newBytes(rt.Str2Mem(out)), 0
- }
- }
- /** Parser Interface **/
- func (self *Parser) Pos() int {
- return self.p
- }
- func (self *Parser) Parse() (Node, types.ParsingError) {
- switch val := self.decodeValue(); val.Vt {
- case types.V_EOF : return Node{}, types.ERR_EOF
- case types.V_NULL : return nullNode, 0
- case types.V_TRUE : return trueNode, 0
- case types.V_FALSE : return falseNode, 0
- case types.V_STRING : return self.decodeString(val.Iv, val.Ep)
- case types.V_ARRAY:
- if p := skipBlank(self.s, self.p); p >= self.p && self.s[p] == ']' {
- self.p = p + 1
- return Node{t: types.V_ARRAY}, 0
- }
- if self.noLazy {
- return self.decodeArray(new(linkedNodes))
- }
- return newLazyArray(self), 0
- case types.V_OBJECT:
- if p := skipBlank(self.s, self.p); p >= self.p && self.s[p] == '}' {
- self.p = p + 1
- return Node{t: types.V_OBJECT}, 0
- }
- if self.noLazy {
- return self.decodeObject(new(linkedPairs))
- }
- return newLazyObject(self), 0
- case types.V_DOUBLE : return NewNumber(self.s[val.Ep:self.p]), 0
- case types.V_INTEGER : return NewNumber(self.s[val.Ep:self.p]), 0
- default : return Node{}, types.ParsingError(-val.Vt)
- }
- }
- func (self *Parser) searchKey(match string) types.ParsingError {
- ns := len(self.s)
- if err := self.object(); err != 0 {
- return err
- }
- /* check for EOF */
- if self.p = self.lspace(self.p); self.p >= ns {
- return types.ERR_EOF
- }
- /* check for empty object */
- if self.s[self.p] == '}' {
- self.p++
- return _ERR_NOT_FOUND
- }
- var njs types.JsonState
- var err types.ParsingError
- /* decode each pair */
- for {
- /* decode the key */
- if njs = self.decodeValue(); njs.Vt != types.V_STRING {
- return types.ERR_INVALID_CHAR
- }
- /* extract the key */
- idx := self.p - 1
- key := self.s[njs.Iv:idx]
- /* check for escape sequence */
- if njs.Ep != -1 {
- if key, err = unquote(key); err != 0 {
- return err
- }
- }
- /* expect a ':' delimiter */
- if err = self.delim(); err != 0 {
- return err
- }
- /* skip value */
- if key != match {
- if _, err = self.skipFast(); err != 0 {
- return err
- }
- } else {
- return 0
- }
- /* check for EOF */
- self.p = self.lspace(self.p)
- if self.p >= ns {
- return types.ERR_EOF
- }
- /* check for the next character */
- switch self.s[self.p] {
- case ',':
- self.p++
- case '}':
- self.p++
- return _ERR_NOT_FOUND
- default:
- return types.ERR_INVALID_CHAR
- }
- }
- }
- func (self *Parser) searchIndex(idx int) types.ParsingError {
- ns := len(self.s)
- if err := self.array(); err != 0 {
- return err
- }
- /* check for EOF */
- if self.p = self.lspace(self.p); self.p >= ns {
- return types.ERR_EOF
- }
- /* check for empty array */
- if self.s[self.p] == ']' {
- self.p++
- return _ERR_NOT_FOUND
- }
- var err types.ParsingError
- /* allocate array space and parse every element */
- for i := 0; i < idx; i++ {
- /* decode the value */
- if _, err = self.skipFast(); err != 0 {
- return err
- }
- /* check for EOF */
- self.p = self.lspace(self.p)
- if self.p >= ns {
- return types.ERR_EOF
- }
- /* check for the next character */
- switch self.s[self.p] {
- case ',':
- self.p++
- case ']':
- self.p++
- return _ERR_NOT_FOUND
- default:
- return types.ERR_INVALID_CHAR
- }
- }
- return 0
- }
- func (self *Node) skipNextNode() *Node {
- if !self.isLazy() {
- return nil
- }
- parser, stack := self.getParserAndArrayStack()
- ret := &stack.v
- sp := parser.p
- ns := len(parser.s)
- /* check for EOF */
- if parser.p = parser.lspace(sp); parser.p >= ns {
- return newSyntaxError(parser.syntaxError(types.ERR_EOF))
- }
- /* check for empty array */
- if parser.s[parser.p] == ']' {
- parser.p++
- self.setArray(ret)
- return nil
- }
- var val Node
- /* skip the value */
- if start, err := parser.skipFast(); err != 0 {
- return newSyntaxError(parser.syntaxError(err))
- } else {
- t := switchRawType(parser.s[start])
- if t == _V_NONE {
- return newSyntaxError(parser.syntaxError(types.ERR_INVALID_CHAR))
- }
- val = newRawNode(parser.s[start:parser.p], t)
- }
- /* add the value to result */
- ret.Add(val)
- self.l++
- parser.p = parser.lspace(parser.p)
- /* check for EOF */
- if parser.p >= ns {
- return newSyntaxError(parser.syntaxError(types.ERR_EOF))
- }
- /* check for the next character */
- switch parser.s[parser.p] {
- case ',':
- parser.p++
- return ret.At(ret.Len()-1)
- case ']':
- parser.p++
- self.setArray(ret)
- return ret.At(ret.Len()-1)
- default:
- return newSyntaxError(parser.syntaxError(types.ERR_INVALID_CHAR))
- }
- }
- func (self *Node) skipNextPair() (*Pair) {
- if !self.isLazy() {
- return nil
- }
- parser, stack := self.getParserAndObjectStack()
- ret := &stack.v
- sp := parser.p
- ns := len(parser.s)
- /* check for EOF */
- if parser.p = parser.lspace(sp); parser.p >= ns {
- return &Pair{"", *newSyntaxError(parser.syntaxError(types.ERR_EOF))}
- }
- /* check for empty object */
- if parser.s[parser.p] == '}' {
- parser.p++
- self.setObject(ret)
- return nil
- }
- /* decode one pair */
- var val Node
- var njs types.JsonState
- var err types.ParsingError
- /* decode the key */
- if njs = parser.decodeValue(); njs.Vt != types.V_STRING {
- return &Pair{"", *newSyntaxError(parser.syntaxError(types.ERR_INVALID_CHAR))}
- }
- /* extract the key */
- idx := parser.p - 1
- key := parser.s[njs.Iv:idx]
- /* check for escape sequence */
- if njs.Ep != -1 {
- if key, err = unquote(key); err != 0 {
- return &Pair{key, *newSyntaxError(parser.syntaxError(err))}
- }
- }
- /* expect a ':' delimiter */
- if err = parser.delim(); err != 0 {
- return &Pair{key, *newSyntaxError(parser.syntaxError(err))}
- }
- /* skip the value */
- if start, err := parser.skipFast(); err != 0 {
- return &Pair{key, *newSyntaxError(parser.syntaxError(err))}
- } else {
- t := switchRawType(parser.s[start])
- if t == _V_NONE {
- return &Pair{key, *newSyntaxError(parser.syntaxError(types.ERR_INVALID_CHAR))}
- }
- val = newRawNode(parser.s[start:parser.p], t)
- }
- /* add the value to result */
- ret.Add(Pair{Key: key, Value: val})
- self.l++
- parser.p = parser.lspace(parser.p)
- /* check for EOF */
- if parser.p >= ns {
- return &Pair{key, *newSyntaxError(parser.syntaxError(types.ERR_EOF))}
- }
- /* check for the next character */
- switch parser.s[parser.p] {
- case ',':
- parser.p++
- return ret.At(ret.Len()-1)
- case '}':
- parser.p++
- self.setObject(ret)
- return ret.At(ret.Len()-1)
- default:
- return &Pair{key, *newSyntaxError(parser.syntaxError(types.ERR_INVALID_CHAR))}
- }
- }
- /** Parser Factory **/
- // Loads parse all json into interface{}
- func Loads(src string) (int, interface{}, error) {
- ps := &Parser{s: src}
- np, err := ps.Parse()
- /* check for errors */
- if err != 0 {
- return 0, nil, ps.ExportError(err)
- } else {
- x, err := np.Interface()
- if err != nil {
- return 0, nil, err
- }
- return ps.Pos(), x, nil
- }
- }
- // LoadsUseNumber parse all json into interface{}, with numeric nodes casted to json.Number
- func LoadsUseNumber(src string) (int, interface{}, error) {
- ps := &Parser{s: src}
- np, err := ps.Parse()
- /* check for errors */
- if err != 0 {
- return 0, nil, err
- } else {
- x, err := np.InterfaceUseNumber()
- if err != nil {
- return 0, nil, err
- }
- return ps.Pos(), x, nil
- }
- }
- // NewParser returns pointer of new allocated parser
- func NewParser(src string) *Parser {
- return &Parser{s: src}
- }
- // NewParser returns new allocated parser
- func NewParserObj(src string) Parser {
- return Parser{s: src}
- }
- // decodeNumber controls if parser decodes the number values instead of skip them
- // WARN: once you set decodeNumber(true), please set decodeNumber(false) before you drop the parser
- // otherwise the memory CANNOT be reused
- func (self *Parser) decodeNumber(decode bool) {
- if !decode && self.dbuf != nil {
- types.FreeDbuf(self.dbuf)
- self.dbuf = nil
- return
- }
- if decode && self.dbuf == nil {
- self.dbuf = types.NewDbuf()
- }
- }
- // ExportError converts types.ParsingError to std Error
- func (self *Parser) ExportError(err types.ParsingError) error {
- if err == _ERR_NOT_FOUND {
- return ErrNotExist
- }
- return fmt.Errorf("%q", SyntaxError{
- Pos : self.p,
- Src : self.s,
- Code: err,
- }.Description())
- }
|