parser.go 16 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655
  1. /*
  2. * Copyright 2021 ByteDance Inc.
  3. *
  4. * Licensed under the Apache License, Version 2.0 (the "License");
  5. * you may not use this file except in compliance with the License.
  6. * You may obtain a copy of the License at
  7. *
  8. * http://www.apache.org/licenses/LICENSE-2.0
  9. *
  10. * Unless required by applicable law or agreed to in writing, software
  11. * distributed under the License is distributed on an "AS IS" BASIS,
  12. * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  13. * See the License for the specific language governing permissions and
  14. * limitations under the License.
  15. */
  16. package ast
  17. import (
  18. `fmt`
  19. `github.com/bytedance/sonic/internal/native/types`
  20. `github.com/bytedance/sonic/internal/rt`
  21. )
  22. const (
  23. _DEFAULT_NODE_CAP int = 8
  24. _APPEND_GROW_SHIFT = 1
  25. )
  26. const (
  27. _ERR_NOT_FOUND types.ParsingError = 33
  28. _ERR_UNSUPPORT_TYPE types.ParsingError = 34
  29. )
  30. var (
  31. // ErrNotExist means both key and value doesn't exist
  32. ErrNotExist error = newError(_ERR_NOT_FOUND, "value not exists")
  33. // ErrUnsupportType means API on the node is unsupported
  34. ErrUnsupportType error = newError(_ERR_UNSUPPORT_TYPE, "unsupported type")
  35. )
  36. type Parser struct {
  37. p int
  38. s string
  39. noLazy bool
  40. skipValue bool
  41. dbuf *byte
  42. }
  43. /** Parser Private Methods **/
  44. func (self *Parser) delim() types.ParsingError {
  45. n := len(self.s)
  46. p := self.lspace(self.p)
  47. /* check for EOF */
  48. if p >= n {
  49. return types.ERR_EOF
  50. }
  51. /* check for the delimtier */
  52. if self.s[p] != ':' {
  53. return types.ERR_INVALID_CHAR
  54. }
  55. /* update the read pointer */
  56. self.p = p + 1
  57. return 0
  58. }
  59. func (self *Parser) object() types.ParsingError {
  60. n := len(self.s)
  61. p := self.lspace(self.p)
  62. /* check for EOF */
  63. if p >= n {
  64. return types.ERR_EOF
  65. }
  66. /* check for the delimtier */
  67. if self.s[p] != '{' {
  68. return types.ERR_INVALID_CHAR
  69. }
  70. /* update the read pointer */
  71. self.p = p + 1
  72. return 0
  73. }
  74. func (self *Parser) array() types.ParsingError {
  75. n := len(self.s)
  76. p := self.lspace(self.p)
  77. /* check for EOF */
  78. if p >= n {
  79. return types.ERR_EOF
  80. }
  81. /* check for the delimtier */
  82. if self.s[p] != '[' {
  83. return types.ERR_INVALID_CHAR
  84. }
  85. /* update the read pointer */
  86. self.p = p + 1
  87. return 0
  88. }
  89. func (self *Parser) lspace(sp int) int {
  90. ns := len(self.s)
  91. for ; sp<ns && isSpace(self.s[sp]); sp+=1 {}
  92. return sp
  93. }
  94. func (self *Parser) decodeArray(ret *linkedNodes) (Node, types.ParsingError) {
  95. sp := self.p
  96. ns := len(self.s)
  97. /* check for EOF */
  98. if self.p = self.lspace(sp); self.p >= ns {
  99. return Node{}, types.ERR_EOF
  100. }
  101. /* check for empty array */
  102. if self.s[self.p] == ']' {
  103. self.p++
  104. return Node{t: types.V_ARRAY}, 0
  105. }
  106. /* allocate array space and parse every element */
  107. for {
  108. var val Node
  109. var err types.ParsingError
  110. if self.skipValue {
  111. /* skip the value */
  112. var start int
  113. if start, err = self.skipFast(); err != 0 {
  114. return Node{}, err
  115. }
  116. if self.p > ns {
  117. return Node{}, types.ERR_EOF
  118. }
  119. t := switchRawType(self.s[start])
  120. if t == _V_NONE {
  121. return Node{}, types.ERR_INVALID_CHAR
  122. }
  123. val = newRawNode(self.s[start:self.p], t)
  124. }else{
  125. /* decode the value */
  126. if val, err = self.Parse(); err != 0 {
  127. return Node{}, err
  128. }
  129. }
  130. /* add the value to result */
  131. ret.Add(val)
  132. self.p = self.lspace(self.p)
  133. /* check for EOF */
  134. if self.p >= ns {
  135. return Node{}, types.ERR_EOF
  136. }
  137. /* check for the next character */
  138. switch self.s[self.p] {
  139. case ',' : self.p++
  140. case ']' : self.p++; return newArray(ret), 0
  141. default:
  142. // if val.isLazy() {
  143. // return newLazyArray(self, ret), 0
  144. // }
  145. return Node{}, types.ERR_INVALID_CHAR
  146. }
  147. }
  148. }
  149. func (self *Parser) decodeObject(ret *linkedPairs) (Node, types.ParsingError) {
  150. sp := self.p
  151. ns := len(self.s)
  152. /* check for EOF */
  153. if self.p = self.lspace(sp); self.p >= ns {
  154. return Node{}, types.ERR_EOF
  155. }
  156. /* check for empty object */
  157. if self.s[self.p] == '}' {
  158. self.p++
  159. return Node{t: types.V_OBJECT}, 0
  160. }
  161. /* decode each pair */
  162. for {
  163. var val Node
  164. var njs types.JsonState
  165. var err types.ParsingError
  166. /* decode the key */
  167. if njs = self.decodeValue(); njs.Vt != types.V_STRING {
  168. return Node{}, types.ERR_INVALID_CHAR
  169. }
  170. /* extract the key */
  171. idx := self.p - 1
  172. key := self.s[njs.Iv:idx]
  173. /* check for escape sequence */
  174. if njs.Ep != -1 {
  175. if key, err = unquote(key); err != 0 {
  176. return Node{}, err
  177. }
  178. }
  179. /* expect a ':' delimiter */
  180. if err = self.delim(); err != 0 {
  181. return Node{}, err
  182. }
  183. if self.skipValue {
  184. /* skip the value */
  185. var start int
  186. if start, err = self.skipFast(); err != 0 {
  187. return Node{}, err
  188. }
  189. if self.p > ns {
  190. return Node{}, types.ERR_EOF
  191. }
  192. t := switchRawType(self.s[start])
  193. if t == _V_NONE {
  194. return Node{}, types.ERR_INVALID_CHAR
  195. }
  196. val = newRawNode(self.s[start:self.p], t)
  197. } else {
  198. /* decode the value */
  199. if val, err = self.Parse(); err != 0 {
  200. return Node{}, err
  201. }
  202. }
  203. /* add the value to result */
  204. // FIXME: ret's address may change here, thus previous referred node in ret may be invalid !!
  205. ret.Add(Pair{Key: key, Value: val})
  206. self.p = self.lspace(self.p)
  207. /* check for EOF */
  208. if self.p >= ns {
  209. return Node{}, types.ERR_EOF
  210. }
  211. /* check for the next character */
  212. switch self.s[self.p] {
  213. case ',' : self.p++
  214. case '}' : self.p++; return newObject(ret), 0
  215. default:
  216. // if val.isLazy() {
  217. // return newLazyObject(self, ret), 0
  218. // }
  219. return Node{}, types.ERR_INVALID_CHAR
  220. }
  221. }
  222. }
  223. func (self *Parser) decodeString(iv int64, ep int) (Node, types.ParsingError) {
  224. p := self.p - 1
  225. s := self.s[iv:p]
  226. /* fast path: no escape sequence */
  227. if ep == -1 {
  228. return NewString(s), 0
  229. }
  230. /* unquote the string */
  231. out, err := unquote(s)
  232. /* check for errors */
  233. if err != 0 {
  234. return Node{}, err
  235. } else {
  236. return newBytes(rt.Str2Mem(out)), 0
  237. }
  238. }
  239. /** Parser Interface **/
  240. func (self *Parser) Pos() int {
  241. return self.p
  242. }
  243. func (self *Parser) Parse() (Node, types.ParsingError) {
  244. switch val := self.decodeValue(); val.Vt {
  245. case types.V_EOF : return Node{}, types.ERR_EOF
  246. case types.V_NULL : return nullNode, 0
  247. case types.V_TRUE : return trueNode, 0
  248. case types.V_FALSE : return falseNode, 0
  249. case types.V_STRING : return self.decodeString(val.Iv, val.Ep)
  250. case types.V_ARRAY:
  251. if p := skipBlank(self.s, self.p); p >= self.p && self.s[p] == ']' {
  252. self.p = p + 1
  253. return Node{t: types.V_ARRAY}, 0
  254. }
  255. if self.noLazy {
  256. return self.decodeArray(new(linkedNodes))
  257. }
  258. return newLazyArray(self), 0
  259. case types.V_OBJECT:
  260. if p := skipBlank(self.s, self.p); p >= self.p && self.s[p] == '}' {
  261. self.p = p + 1
  262. return Node{t: types.V_OBJECT}, 0
  263. }
  264. if self.noLazy {
  265. return self.decodeObject(new(linkedPairs))
  266. }
  267. return newLazyObject(self), 0
  268. case types.V_DOUBLE : return NewNumber(self.s[val.Ep:self.p]), 0
  269. case types.V_INTEGER : return NewNumber(self.s[val.Ep:self.p]), 0
  270. default : return Node{}, types.ParsingError(-val.Vt)
  271. }
  272. }
  273. func (self *Parser) searchKey(match string) types.ParsingError {
  274. ns := len(self.s)
  275. if err := self.object(); err != 0 {
  276. return err
  277. }
  278. /* check for EOF */
  279. if self.p = self.lspace(self.p); self.p >= ns {
  280. return types.ERR_EOF
  281. }
  282. /* check for empty object */
  283. if self.s[self.p] == '}' {
  284. self.p++
  285. return _ERR_NOT_FOUND
  286. }
  287. var njs types.JsonState
  288. var err types.ParsingError
  289. /* decode each pair */
  290. for {
  291. /* decode the key */
  292. if njs = self.decodeValue(); njs.Vt != types.V_STRING {
  293. return types.ERR_INVALID_CHAR
  294. }
  295. /* extract the key */
  296. idx := self.p - 1
  297. key := self.s[njs.Iv:idx]
  298. /* check for escape sequence */
  299. if njs.Ep != -1 {
  300. if key, err = unquote(key); err != 0 {
  301. return err
  302. }
  303. }
  304. /* expect a ':' delimiter */
  305. if err = self.delim(); err != 0 {
  306. return err
  307. }
  308. /* skip value */
  309. if key != match {
  310. if _, err = self.skipFast(); err != 0 {
  311. return err
  312. }
  313. } else {
  314. return 0
  315. }
  316. /* check for EOF */
  317. self.p = self.lspace(self.p)
  318. if self.p >= ns {
  319. return types.ERR_EOF
  320. }
  321. /* check for the next character */
  322. switch self.s[self.p] {
  323. case ',':
  324. self.p++
  325. case '}':
  326. self.p++
  327. return _ERR_NOT_FOUND
  328. default:
  329. return types.ERR_INVALID_CHAR
  330. }
  331. }
  332. }
  333. func (self *Parser) searchIndex(idx int) types.ParsingError {
  334. ns := len(self.s)
  335. if err := self.array(); err != 0 {
  336. return err
  337. }
  338. /* check for EOF */
  339. if self.p = self.lspace(self.p); self.p >= ns {
  340. return types.ERR_EOF
  341. }
  342. /* check for empty array */
  343. if self.s[self.p] == ']' {
  344. self.p++
  345. return _ERR_NOT_FOUND
  346. }
  347. var err types.ParsingError
  348. /* allocate array space and parse every element */
  349. for i := 0; i < idx; i++ {
  350. /* decode the value */
  351. if _, err = self.skipFast(); err != 0 {
  352. return err
  353. }
  354. /* check for EOF */
  355. self.p = self.lspace(self.p)
  356. if self.p >= ns {
  357. return types.ERR_EOF
  358. }
  359. /* check for the next character */
  360. switch self.s[self.p] {
  361. case ',':
  362. self.p++
  363. case ']':
  364. self.p++
  365. return _ERR_NOT_FOUND
  366. default:
  367. return types.ERR_INVALID_CHAR
  368. }
  369. }
  370. return 0
  371. }
  372. func (self *Node) skipNextNode() *Node {
  373. if !self.isLazy() {
  374. return nil
  375. }
  376. parser, stack := self.getParserAndArrayStack()
  377. ret := &stack.v
  378. sp := parser.p
  379. ns := len(parser.s)
  380. /* check for EOF */
  381. if parser.p = parser.lspace(sp); parser.p >= ns {
  382. return newSyntaxError(parser.syntaxError(types.ERR_EOF))
  383. }
  384. /* check for empty array */
  385. if parser.s[parser.p] == ']' {
  386. parser.p++
  387. self.setArray(ret)
  388. return nil
  389. }
  390. var val Node
  391. /* skip the value */
  392. if start, err := parser.skipFast(); err != 0 {
  393. return newSyntaxError(parser.syntaxError(err))
  394. } else {
  395. t := switchRawType(parser.s[start])
  396. if t == _V_NONE {
  397. return newSyntaxError(parser.syntaxError(types.ERR_INVALID_CHAR))
  398. }
  399. val = newRawNode(parser.s[start:parser.p], t)
  400. }
  401. /* add the value to result */
  402. ret.Add(val)
  403. self.l++
  404. parser.p = parser.lspace(parser.p)
  405. /* check for EOF */
  406. if parser.p >= ns {
  407. return newSyntaxError(parser.syntaxError(types.ERR_EOF))
  408. }
  409. /* check for the next character */
  410. switch parser.s[parser.p] {
  411. case ',':
  412. parser.p++
  413. return ret.At(ret.Len()-1)
  414. case ']':
  415. parser.p++
  416. self.setArray(ret)
  417. return ret.At(ret.Len()-1)
  418. default:
  419. return newSyntaxError(parser.syntaxError(types.ERR_INVALID_CHAR))
  420. }
  421. }
  422. func (self *Node) skipNextPair() (*Pair) {
  423. if !self.isLazy() {
  424. return nil
  425. }
  426. parser, stack := self.getParserAndObjectStack()
  427. ret := &stack.v
  428. sp := parser.p
  429. ns := len(parser.s)
  430. /* check for EOF */
  431. if parser.p = parser.lspace(sp); parser.p >= ns {
  432. return &Pair{"", *newSyntaxError(parser.syntaxError(types.ERR_EOF))}
  433. }
  434. /* check for empty object */
  435. if parser.s[parser.p] == '}' {
  436. parser.p++
  437. self.setObject(ret)
  438. return nil
  439. }
  440. /* decode one pair */
  441. var val Node
  442. var njs types.JsonState
  443. var err types.ParsingError
  444. /* decode the key */
  445. if njs = parser.decodeValue(); njs.Vt != types.V_STRING {
  446. return &Pair{"", *newSyntaxError(parser.syntaxError(types.ERR_INVALID_CHAR))}
  447. }
  448. /* extract the key */
  449. idx := parser.p - 1
  450. key := parser.s[njs.Iv:idx]
  451. /* check for escape sequence */
  452. if njs.Ep != -1 {
  453. if key, err = unquote(key); err != 0 {
  454. return &Pair{key, *newSyntaxError(parser.syntaxError(err))}
  455. }
  456. }
  457. /* expect a ':' delimiter */
  458. if err = parser.delim(); err != 0 {
  459. return &Pair{key, *newSyntaxError(parser.syntaxError(err))}
  460. }
  461. /* skip the value */
  462. if start, err := parser.skipFast(); err != 0 {
  463. return &Pair{key, *newSyntaxError(parser.syntaxError(err))}
  464. } else {
  465. t := switchRawType(parser.s[start])
  466. if t == _V_NONE {
  467. return &Pair{key, *newSyntaxError(parser.syntaxError(types.ERR_INVALID_CHAR))}
  468. }
  469. val = newRawNode(parser.s[start:parser.p], t)
  470. }
  471. /* add the value to result */
  472. ret.Add(Pair{Key: key, Value: val})
  473. self.l++
  474. parser.p = parser.lspace(parser.p)
  475. /* check for EOF */
  476. if parser.p >= ns {
  477. return &Pair{key, *newSyntaxError(parser.syntaxError(types.ERR_EOF))}
  478. }
  479. /* check for the next character */
  480. switch parser.s[parser.p] {
  481. case ',':
  482. parser.p++
  483. return ret.At(ret.Len()-1)
  484. case '}':
  485. parser.p++
  486. self.setObject(ret)
  487. return ret.At(ret.Len()-1)
  488. default:
  489. return &Pair{key, *newSyntaxError(parser.syntaxError(types.ERR_INVALID_CHAR))}
  490. }
  491. }
  492. /** Parser Factory **/
  493. // Loads parse all json into interface{}
  494. func Loads(src string) (int, interface{}, error) {
  495. ps := &Parser{s: src}
  496. np, err := ps.Parse()
  497. /* check for errors */
  498. if err != 0 {
  499. return 0, nil, ps.ExportError(err)
  500. } else {
  501. x, err := np.Interface()
  502. if err != nil {
  503. return 0, nil, err
  504. }
  505. return ps.Pos(), x, nil
  506. }
  507. }
  508. // LoadsUseNumber parse all json into interface{}, with numeric nodes casted to json.Number
  509. func LoadsUseNumber(src string) (int, interface{}, error) {
  510. ps := &Parser{s: src}
  511. np, err := ps.Parse()
  512. /* check for errors */
  513. if err != 0 {
  514. return 0, nil, err
  515. } else {
  516. x, err := np.InterfaceUseNumber()
  517. if err != nil {
  518. return 0, nil, err
  519. }
  520. return ps.Pos(), x, nil
  521. }
  522. }
  523. // NewParser returns pointer of new allocated parser
  524. func NewParser(src string) *Parser {
  525. return &Parser{s: src}
  526. }
  527. // NewParser returns new allocated parser
  528. func NewParserObj(src string) Parser {
  529. return Parser{s: src}
  530. }
  531. // decodeNumber controls if parser decodes the number values instead of skip them
  532. // WARN: once you set decodeNumber(true), please set decodeNumber(false) before you drop the parser
  533. // otherwise the memory CANNOT be reused
  534. func (self *Parser) decodeNumber(decode bool) {
  535. if !decode && self.dbuf != nil {
  536. types.FreeDbuf(self.dbuf)
  537. self.dbuf = nil
  538. return
  539. }
  540. if decode && self.dbuf == nil {
  541. self.dbuf = types.NewDbuf()
  542. }
  543. }
  544. // ExportError converts types.ParsingError to std Error
  545. func (self *Parser) ExportError(err types.ParsingError) error {
  546. if err == _ERR_NOT_FOUND {
  547. return ErrNotExist
  548. }
  549. return fmt.Errorf("%q", SyntaxError{
  550. Pos : self.p,
  551. Src : self.s,
  552. Code: err,
  553. }.Description())
  554. }