decode.go 15 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585
  1. /*
  2. * Copyright 2022 ByteDance Inc.
  3. *
  4. * Licensed under the Apache License, Version 2.0 (the "License");
  5. * you may not use this file except in compliance with the License.
  6. * You may obtain a copy of the License at
  7. *
  8. * http://www.apache.org/licenses/LICENSE-2.0
  9. *
  10. * Unless required by applicable law or agreed to in writing, software
  11. * distributed under the License is distributed on an "AS IS" BASIS,
  12. * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  13. * See the License for the specific language governing permissions and
  14. * limitations under the License.
  15. */
  16. package ast
  17. import (
  18. `encoding/base64`
  19. `runtime`
  20. `strconv`
  21. `unsafe`
  22. `github.com/bytedance/sonic/internal/native/types`
  23. `github.com/bytedance/sonic/internal/rt`
  24. )
  25. const _blankCharsMask = (1 << ' ') | (1 << '\t') | (1 << '\r') | (1 << '\n')
  26. const (
  27. bytesNull = "null"
  28. bytesTrue = "true"
  29. bytesFalse = "false"
  30. bytesObject = "{}"
  31. bytesArray = "[]"
  32. )
  33. func isSpace(c byte) bool {
  34. return (int(1<<c) & _blankCharsMask) != 0
  35. }
  36. //go:nocheckptr
  37. func skipBlank(src string, pos int) int {
  38. se := uintptr(rt.IndexChar(src, len(src)))
  39. sp := uintptr(rt.IndexChar(src, pos))
  40. for sp < se {
  41. if !isSpace(*(*byte)(unsafe.Pointer(sp))) {
  42. break
  43. }
  44. sp += 1
  45. }
  46. if sp >= se {
  47. return -int(types.ERR_EOF)
  48. }
  49. runtime.KeepAlive(src)
  50. return int(sp - uintptr(rt.IndexChar(src, 0)))
  51. }
  52. func decodeNull(src string, pos int) (ret int) {
  53. ret = pos + 4
  54. if ret > len(src) {
  55. return -int(types.ERR_EOF)
  56. }
  57. if src[pos:ret] == bytesNull {
  58. return ret
  59. } else {
  60. return -int(types.ERR_INVALID_CHAR)
  61. }
  62. }
  63. func decodeTrue(src string, pos int) (ret int) {
  64. ret = pos + 4
  65. if ret > len(src) {
  66. return -int(types.ERR_EOF)
  67. }
  68. if src[pos:ret] == bytesTrue {
  69. return ret
  70. } else {
  71. return -int(types.ERR_INVALID_CHAR)
  72. }
  73. }
  74. func decodeFalse(src string, pos int) (ret int) {
  75. ret = pos + 5
  76. if ret > len(src) {
  77. return -int(types.ERR_EOF)
  78. }
  79. if src[pos:ret] == bytesFalse {
  80. return ret
  81. }
  82. return -int(types.ERR_INVALID_CHAR)
  83. }
  84. //go:nocheckptr
  85. func decodeString(src string, pos int) (ret int, v string) {
  86. ret, ep := skipString(src, pos)
  87. if ep == -1 {
  88. (*rt.GoString)(unsafe.Pointer(&v)).Ptr = rt.IndexChar(src, pos+1)
  89. (*rt.GoString)(unsafe.Pointer(&v)).Len = ret - pos - 2
  90. return ret, v
  91. }
  92. vv, ok := unquoteBytes(rt.Str2Mem(src[pos:ret]))
  93. if !ok {
  94. return -int(types.ERR_INVALID_CHAR), ""
  95. }
  96. runtime.KeepAlive(src)
  97. return ret, rt.Mem2Str(vv)
  98. }
  99. func decodeBinary(src string, pos int) (ret int, v []byte) {
  100. var vv string
  101. ret, vv = decodeString(src, pos)
  102. if ret < 0 {
  103. return ret, nil
  104. }
  105. var err error
  106. v, err = base64.StdEncoding.DecodeString(vv)
  107. if err != nil {
  108. return -int(types.ERR_INVALID_CHAR), nil
  109. }
  110. return ret, v
  111. }
  112. func isDigit(c byte) bool {
  113. return c >= '0' && c <= '9'
  114. }
  115. //go:nocheckptr
  116. func decodeInt64(src string, pos int) (ret int, v int64, err error) {
  117. sp := uintptr(rt.IndexChar(src, pos))
  118. ss := uintptr(sp)
  119. se := uintptr(rt.IndexChar(src, len(src)))
  120. if uintptr(sp) >= se {
  121. return -int(types.ERR_EOF), 0, nil
  122. }
  123. if c := *(*byte)(unsafe.Pointer(sp)); c == '-' {
  124. sp += 1
  125. }
  126. if sp == se {
  127. return -int(types.ERR_EOF), 0, nil
  128. }
  129. for ; sp < se; sp += uintptr(1) {
  130. if !isDigit(*(*byte)(unsafe.Pointer(sp))) {
  131. break
  132. }
  133. }
  134. if sp < se {
  135. if c := *(*byte)(unsafe.Pointer(sp)); c == '.' || c == 'e' || c == 'E' {
  136. return -int(types.ERR_INVALID_NUMBER_FMT), 0, nil
  137. }
  138. }
  139. var vv string
  140. ret = int(uintptr(sp) - uintptr((*rt.GoString)(unsafe.Pointer(&src)).Ptr))
  141. (*rt.GoString)(unsafe.Pointer(&vv)).Ptr = unsafe.Pointer(ss)
  142. (*rt.GoString)(unsafe.Pointer(&vv)).Len = ret - pos
  143. v, err = strconv.ParseInt(vv, 10, 64)
  144. if err != nil {
  145. //NOTICE: allow overflow here
  146. if err.(*strconv.NumError).Err == strconv.ErrRange {
  147. return ret, 0, err
  148. }
  149. return -int(types.ERR_INVALID_CHAR), 0, err
  150. }
  151. runtime.KeepAlive(src)
  152. return ret, v, nil
  153. }
  154. func isNumberChars(c byte) bool {
  155. return (c >= '0' && c <= '9') || c == '+' || c == '-' || c == 'e' || c == 'E' || c == '.'
  156. }
  157. //go:nocheckptr
  158. func decodeFloat64(src string, pos int) (ret int, v float64, err error) {
  159. sp := uintptr(rt.IndexChar(src, pos))
  160. ss := uintptr(sp)
  161. se := uintptr(rt.IndexChar(src, len(src)))
  162. if uintptr(sp) >= se {
  163. return -int(types.ERR_EOF), 0, nil
  164. }
  165. if c := *(*byte)(unsafe.Pointer(sp)); c == '-' {
  166. sp += 1
  167. }
  168. if sp == se {
  169. return -int(types.ERR_EOF), 0, nil
  170. }
  171. for ; sp < se; sp += uintptr(1) {
  172. if !isNumberChars(*(*byte)(unsafe.Pointer(sp))) {
  173. break
  174. }
  175. }
  176. var vv string
  177. ret = int(uintptr(sp) - uintptr((*rt.GoString)(unsafe.Pointer(&src)).Ptr))
  178. (*rt.GoString)(unsafe.Pointer(&vv)).Ptr = unsafe.Pointer(ss)
  179. (*rt.GoString)(unsafe.Pointer(&vv)).Len = ret - pos
  180. v, err = strconv.ParseFloat(vv, 64)
  181. if err != nil {
  182. //NOTICE: allow overflow here
  183. if err.(*strconv.NumError).Err == strconv.ErrRange {
  184. return ret, 0, err
  185. }
  186. return -int(types.ERR_INVALID_CHAR), 0, err
  187. }
  188. runtime.KeepAlive(src)
  189. return ret, v, nil
  190. }
  191. func decodeValue(src string, pos int, skipnum bool) (ret int, v types.JsonState) {
  192. pos = skipBlank(src, pos)
  193. if pos < 0 {
  194. return pos, types.JsonState{Vt: types.ValueType(pos)}
  195. }
  196. switch c := src[pos]; c {
  197. case 'n':
  198. ret = decodeNull(src, pos)
  199. if ret < 0 {
  200. return ret, types.JsonState{Vt: types.ValueType(ret)}
  201. }
  202. return ret, types.JsonState{Vt: types.V_NULL}
  203. case '"':
  204. var ep int
  205. ret, ep = skipString(src, pos)
  206. if ret < 0 {
  207. return ret, types.JsonState{Vt: types.ValueType(ret)}
  208. }
  209. return ret, types.JsonState{Vt: types.V_STRING, Iv: int64(pos + 1), Ep: ep}
  210. case '{':
  211. return pos + 1, types.JsonState{Vt: types.V_OBJECT}
  212. case '[':
  213. return pos + 1, types.JsonState{Vt: types.V_ARRAY}
  214. case 't':
  215. ret = decodeTrue(src, pos)
  216. if ret < 0 {
  217. return ret, types.JsonState{Vt: types.ValueType(ret)}
  218. }
  219. return ret, types.JsonState{Vt: types.V_TRUE}
  220. case 'f':
  221. ret = decodeFalse(src, pos)
  222. if ret < 0 {
  223. return ret, types.JsonState{Vt: types.ValueType(ret)}
  224. }
  225. return ret, types.JsonState{Vt: types.V_FALSE}
  226. case '-', '+', '0', '1', '2', '3', '4', '5', '6', '7', '8', '9':
  227. if skipnum {
  228. ret = skipNumber(src, pos)
  229. if ret >= 0 {
  230. return ret, types.JsonState{Vt: types.V_DOUBLE, Iv: 0, Ep: pos}
  231. } else {
  232. return ret, types.JsonState{Vt: types.ValueType(ret)}
  233. }
  234. } else {
  235. var iv int64
  236. ret, iv, _ = decodeInt64(src, pos)
  237. if ret >= 0 {
  238. return ret, types.JsonState{Vt: types.V_INTEGER, Iv: iv, Ep: pos}
  239. } else if ret != -int(types.ERR_INVALID_NUMBER_FMT) {
  240. return ret, types.JsonState{Vt: types.ValueType(ret)}
  241. }
  242. var fv float64
  243. ret, fv, _ = decodeFloat64(src, pos)
  244. if ret >= 0 {
  245. return ret, types.JsonState{Vt: types.V_DOUBLE, Dv: fv, Ep: pos}
  246. } else {
  247. return ret, types.JsonState{Vt: types.ValueType(ret)}
  248. }
  249. }
  250. default:
  251. return -int(types.ERR_INVALID_CHAR), types.JsonState{Vt:-types.ValueType(types.ERR_INVALID_CHAR)}
  252. }
  253. }
  254. //go:nocheckptr
  255. func skipNumber(src string, pos int) (ret int) {
  256. sp := uintptr(rt.IndexChar(src, pos))
  257. se := uintptr(rt.IndexChar(src, len(src)))
  258. if uintptr(sp) >= se {
  259. return -int(types.ERR_EOF)
  260. }
  261. if c := *(*byte)(unsafe.Pointer(sp)); c == '-' {
  262. sp += 1
  263. }
  264. ss := sp
  265. var pointer bool
  266. var exponent bool
  267. var lastIsDigit bool
  268. var nextNeedDigit = true
  269. for ; sp < se; sp += uintptr(1) {
  270. c := *(*byte)(unsafe.Pointer(sp))
  271. if isDigit(c) {
  272. lastIsDigit = true
  273. nextNeedDigit = false
  274. continue
  275. } else if nextNeedDigit {
  276. return -int(types.ERR_INVALID_CHAR)
  277. } else if c == '.' {
  278. if !lastIsDigit || pointer || exponent || sp == ss {
  279. return -int(types.ERR_INVALID_CHAR)
  280. }
  281. pointer = true
  282. lastIsDigit = false
  283. nextNeedDigit = true
  284. continue
  285. } else if c == 'e' || c == 'E' {
  286. if !lastIsDigit || exponent {
  287. return -int(types.ERR_INVALID_CHAR)
  288. }
  289. if sp == se-1 {
  290. return -int(types.ERR_EOF)
  291. }
  292. exponent = true
  293. lastIsDigit = false
  294. nextNeedDigit = false
  295. continue
  296. } else if c == '-' || c == '+' {
  297. if prev := *(*byte)(unsafe.Pointer(sp - 1)); prev != 'e' && prev != 'E' {
  298. return -int(types.ERR_INVALID_CHAR)
  299. }
  300. lastIsDigit = false
  301. nextNeedDigit = true
  302. continue
  303. } else {
  304. break
  305. }
  306. }
  307. if nextNeedDigit {
  308. return -int(types.ERR_EOF)
  309. }
  310. runtime.KeepAlive(src)
  311. return int(uintptr(sp) - uintptr((*rt.GoString)(unsafe.Pointer(&src)).Ptr))
  312. }
  313. //go:nocheckptr
  314. func skipString(src string, pos int) (ret int, ep int) {
  315. if pos+1 >= len(src) {
  316. return -int(types.ERR_EOF), -1
  317. }
  318. sp := uintptr(rt.IndexChar(src, pos))
  319. se := uintptr(rt.IndexChar(src, len(src)))
  320. // not start with quote
  321. if *(*byte)(unsafe.Pointer(sp)) != '"' {
  322. return -int(types.ERR_INVALID_CHAR), -1
  323. }
  324. sp += 1
  325. ep = -1
  326. for sp < se {
  327. c := *(*byte)(unsafe.Pointer(sp))
  328. if c == '\\' {
  329. if ep == -1 {
  330. ep = int(uintptr(sp) - uintptr((*rt.GoString)(unsafe.Pointer(&src)).Ptr))
  331. }
  332. sp += 2
  333. continue
  334. }
  335. sp += 1
  336. if c == '"' {
  337. return int(uintptr(sp) - uintptr((*rt.GoString)(unsafe.Pointer(&src)).Ptr)), ep
  338. }
  339. }
  340. runtime.KeepAlive(src)
  341. // not found the closed quote until EOF
  342. return -int(types.ERR_EOF), -1
  343. }
  344. //go:nocheckptr
  345. func skipPair(src string, pos int, lchar byte, rchar byte) (ret int) {
  346. if pos+1 >= len(src) {
  347. return -int(types.ERR_EOF)
  348. }
  349. sp := uintptr(rt.IndexChar(src, pos))
  350. se := uintptr(rt.IndexChar(src, len(src)))
  351. if *(*byte)(unsafe.Pointer(sp)) != lchar {
  352. return -int(types.ERR_INVALID_CHAR)
  353. }
  354. sp += 1
  355. nbrace := 1
  356. inquote := false
  357. for sp < se {
  358. c := *(*byte)(unsafe.Pointer(sp))
  359. if c == '\\' {
  360. sp += 2
  361. continue
  362. } else if c == '"' {
  363. inquote = !inquote
  364. } else if c == lchar {
  365. if !inquote {
  366. nbrace += 1
  367. }
  368. } else if c == rchar {
  369. if !inquote {
  370. nbrace -= 1
  371. if nbrace == 0 {
  372. sp += 1
  373. break
  374. }
  375. }
  376. }
  377. sp += 1
  378. }
  379. if nbrace != 0 {
  380. return -int(types.ERR_INVALID_CHAR)
  381. }
  382. runtime.KeepAlive(src)
  383. return int(uintptr(sp) - uintptr((*rt.GoString)(unsafe.Pointer(&src)).Ptr))
  384. }
  385. func skipValueFast(src string, pos int) (ret int, start int) {
  386. pos = skipBlank(src, pos)
  387. if pos < 0 {
  388. return pos, -1
  389. }
  390. switch c := src[pos]; c {
  391. case 'n':
  392. ret = decodeNull(src, pos)
  393. case '"':
  394. ret, _ = skipString(src, pos)
  395. case '{':
  396. ret = skipPair(src, pos, '{', '}')
  397. case '[':
  398. ret = skipPair(src, pos, '[', ']')
  399. case 't':
  400. ret = decodeTrue(src, pos)
  401. case 'f':
  402. ret = decodeFalse(src, pos)
  403. case '-', '+', '0', '1', '2', '3', '4', '5', '6', '7', '8', '9':
  404. ret = skipNumber(src, pos)
  405. default:
  406. ret = -int(types.ERR_INVALID_CHAR)
  407. }
  408. return ret, pos
  409. }
  410. func skipValue(src string, pos int) (ret int, start int) {
  411. pos = skipBlank(src, pos)
  412. if pos < 0 {
  413. return pos, -1
  414. }
  415. switch c := src[pos]; c {
  416. case 'n':
  417. ret = decodeNull(src, pos)
  418. case '"':
  419. ret, _ = skipString(src, pos)
  420. case '{':
  421. ret, _ = skipObject(src, pos)
  422. case '[':
  423. ret, _ = skipArray(src, pos)
  424. case 't':
  425. ret = decodeTrue(src, pos)
  426. case 'f':
  427. ret = decodeFalse(src, pos)
  428. case '-', '+', '0', '1', '2', '3', '4', '5', '6', '7', '8', '9':
  429. ret = skipNumber(src, pos)
  430. default:
  431. ret = -int(types.ERR_INVALID_CHAR)
  432. }
  433. return ret, pos
  434. }
  435. func skipObject(src string, pos int) (ret int, start int) {
  436. start = skipBlank(src, pos)
  437. if start < 0 {
  438. return start, -1
  439. }
  440. if src[start] != '{' {
  441. return -int(types.ERR_INVALID_CHAR), -1
  442. }
  443. pos = start + 1
  444. pos = skipBlank(src, pos)
  445. if pos < 0 {
  446. return pos, -1
  447. }
  448. if src[pos] == '}' {
  449. return pos + 1, start
  450. }
  451. for {
  452. pos, _ = skipString(src, pos)
  453. if pos < 0 {
  454. return pos, -1
  455. }
  456. pos = skipBlank(src, pos)
  457. if pos < 0 {
  458. return pos, -1
  459. }
  460. if src[pos] != ':' {
  461. return -int(types.ERR_INVALID_CHAR), -1
  462. }
  463. pos++
  464. pos, _ = skipValue(src, pos)
  465. if pos < 0 {
  466. return pos, -1
  467. }
  468. pos = skipBlank(src, pos)
  469. if pos < 0 {
  470. return pos, -1
  471. }
  472. if src[pos] == '}' {
  473. return pos + 1, start
  474. }
  475. if src[pos] != ',' {
  476. return -int(types.ERR_INVALID_CHAR), -1
  477. }
  478. pos++
  479. pos = skipBlank(src, pos)
  480. if pos < 0 {
  481. return pos, -1
  482. }
  483. }
  484. }
  485. func skipArray(src string, pos int) (ret int, start int) {
  486. start = skipBlank(src, pos)
  487. if start < 0 {
  488. return start, -1
  489. }
  490. if src[start] != '[' {
  491. return -int(types.ERR_INVALID_CHAR), -1
  492. }
  493. pos = start + 1
  494. pos = skipBlank(src, pos)
  495. if pos < 0 {
  496. return pos, -1
  497. }
  498. if src[pos] == ']' {
  499. return pos + 1, start
  500. }
  501. for {
  502. pos, _ = skipValue(src, pos)
  503. if pos < 0 {
  504. return pos, -1
  505. }
  506. pos = skipBlank(src, pos)
  507. if pos < 0 {
  508. return pos, -1
  509. }
  510. if src[pos] == ']' {
  511. return pos + 1, start
  512. }
  513. if src[pos] != ',' {
  514. return -int(types.ERR_INVALID_CHAR), -1
  515. }
  516. pos++
  517. }
  518. }