123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585 |
- /*
- * Copyright 2022 ByteDance Inc.
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
- package ast
- import (
- `encoding/base64`
- `runtime`
- `strconv`
- `unsafe`
- `github.com/bytedance/sonic/internal/native/types`
- `github.com/bytedance/sonic/internal/rt`
- )
- const _blankCharsMask = (1 << ' ') | (1 << '\t') | (1 << '\r') | (1 << '\n')
- const (
- bytesNull = "null"
- bytesTrue = "true"
- bytesFalse = "false"
- bytesObject = "{}"
- bytesArray = "[]"
- )
- func isSpace(c byte) bool {
- return (int(1<<c) & _blankCharsMask) != 0
- }
- //go:nocheckptr
- func skipBlank(src string, pos int) int {
- se := uintptr(rt.IndexChar(src, len(src)))
- sp := uintptr(rt.IndexChar(src, pos))
- for sp < se {
- if !isSpace(*(*byte)(unsafe.Pointer(sp))) {
- break
- }
- sp += 1
- }
- if sp >= se {
- return -int(types.ERR_EOF)
- }
- runtime.KeepAlive(src)
- return int(sp - uintptr(rt.IndexChar(src, 0)))
- }
- func decodeNull(src string, pos int) (ret int) {
- ret = pos + 4
- if ret > len(src) {
- return -int(types.ERR_EOF)
- }
- if src[pos:ret] == bytesNull {
- return ret
- } else {
- return -int(types.ERR_INVALID_CHAR)
- }
- }
- func decodeTrue(src string, pos int) (ret int) {
- ret = pos + 4
- if ret > len(src) {
- return -int(types.ERR_EOF)
- }
- if src[pos:ret] == bytesTrue {
- return ret
- } else {
- return -int(types.ERR_INVALID_CHAR)
- }
- }
- func decodeFalse(src string, pos int) (ret int) {
- ret = pos + 5
- if ret > len(src) {
- return -int(types.ERR_EOF)
- }
- if src[pos:ret] == bytesFalse {
- return ret
- }
- return -int(types.ERR_INVALID_CHAR)
- }
- //go:nocheckptr
- func decodeString(src string, pos int) (ret int, v string) {
- ret, ep := skipString(src, pos)
- if ep == -1 {
- (*rt.GoString)(unsafe.Pointer(&v)).Ptr = rt.IndexChar(src, pos+1)
- (*rt.GoString)(unsafe.Pointer(&v)).Len = ret - pos - 2
- return ret, v
- }
- vv, ok := unquoteBytes(rt.Str2Mem(src[pos:ret]))
- if !ok {
- return -int(types.ERR_INVALID_CHAR), ""
- }
- runtime.KeepAlive(src)
- return ret, rt.Mem2Str(vv)
- }
- func decodeBinary(src string, pos int) (ret int, v []byte) {
- var vv string
- ret, vv = decodeString(src, pos)
- if ret < 0 {
- return ret, nil
- }
- var err error
- v, err = base64.StdEncoding.DecodeString(vv)
- if err != nil {
- return -int(types.ERR_INVALID_CHAR), nil
- }
- return ret, v
- }
- func isDigit(c byte) bool {
- return c >= '0' && c <= '9'
- }
- //go:nocheckptr
- func decodeInt64(src string, pos int) (ret int, v int64, err error) {
- sp := uintptr(rt.IndexChar(src, pos))
- ss := uintptr(sp)
- se := uintptr(rt.IndexChar(src, len(src)))
- if uintptr(sp) >= se {
- return -int(types.ERR_EOF), 0, nil
- }
- if c := *(*byte)(unsafe.Pointer(sp)); c == '-' {
- sp += 1
- }
- if sp == se {
- return -int(types.ERR_EOF), 0, nil
- }
- for ; sp < se; sp += uintptr(1) {
- if !isDigit(*(*byte)(unsafe.Pointer(sp))) {
- break
- }
- }
- if sp < se {
- if c := *(*byte)(unsafe.Pointer(sp)); c == '.' || c == 'e' || c == 'E' {
- return -int(types.ERR_INVALID_NUMBER_FMT), 0, nil
- }
- }
- var vv string
- ret = int(uintptr(sp) - uintptr((*rt.GoString)(unsafe.Pointer(&src)).Ptr))
- (*rt.GoString)(unsafe.Pointer(&vv)).Ptr = unsafe.Pointer(ss)
- (*rt.GoString)(unsafe.Pointer(&vv)).Len = ret - pos
- v, err = strconv.ParseInt(vv, 10, 64)
- if err != nil {
- //NOTICE: allow overflow here
- if err.(*strconv.NumError).Err == strconv.ErrRange {
- return ret, 0, err
- }
- return -int(types.ERR_INVALID_CHAR), 0, err
- }
- runtime.KeepAlive(src)
- return ret, v, nil
- }
- func isNumberChars(c byte) bool {
- return (c >= '0' && c <= '9') || c == '+' || c == '-' || c == 'e' || c == 'E' || c == '.'
- }
- //go:nocheckptr
- func decodeFloat64(src string, pos int) (ret int, v float64, err error) {
- sp := uintptr(rt.IndexChar(src, pos))
- ss := uintptr(sp)
- se := uintptr(rt.IndexChar(src, len(src)))
- if uintptr(sp) >= se {
- return -int(types.ERR_EOF), 0, nil
- }
- if c := *(*byte)(unsafe.Pointer(sp)); c == '-' {
- sp += 1
- }
- if sp == se {
- return -int(types.ERR_EOF), 0, nil
- }
- for ; sp < se; sp += uintptr(1) {
- if !isNumberChars(*(*byte)(unsafe.Pointer(sp))) {
- break
- }
- }
- var vv string
- ret = int(uintptr(sp) - uintptr((*rt.GoString)(unsafe.Pointer(&src)).Ptr))
- (*rt.GoString)(unsafe.Pointer(&vv)).Ptr = unsafe.Pointer(ss)
- (*rt.GoString)(unsafe.Pointer(&vv)).Len = ret - pos
- v, err = strconv.ParseFloat(vv, 64)
- if err != nil {
- //NOTICE: allow overflow here
- if err.(*strconv.NumError).Err == strconv.ErrRange {
- return ret, 0, err
- }
- return -int(types.ERR_INVALID_CHAR), 0, err
- }
- runtime.KeepAlive(src)
- return ret, v, nil
- }
- func decodeValue(src string, pos int, skipnum bool) (ret int, v types.JsonState) {
- pos = skipBlank(src, pos)
- if pos < 0 {
- return pos, types.JsonState{Vt: types.ValueType(pos)}
- }
- switch c := src[pos]; c {
- case 'n':
- ret = decodeNull(src, pos)
- if ret < 0 {
- return ret, types.JsonState{Vt: types.ValueType(ret)}
- }
- return ret, types.JsonState{Vt: types.V_NULL}
- case '"':
- var ep int
- ret, ep = skipString(src, pos)
- if ret < 0 {
- return ret, types.JsonState{Vt: types.ValueType(ret)}
- }
- return ret, types.JsonState{Vt: types.V_STRING, Iv: int64(pos + 1), Ep: ep}
- case '{':
- return pos + 1, types.JsonState{Vt: types.V_OBJECT}
- case '[':
- return pos + 1, types.JsonState{Vt: types.V_ARRAY}
- case 't':
- ret = decodeTrue(src, pos)
- if ret < 0 {
- return ret, types.JsonState{Vt: types.ValueType(ret)}
- }
- return ret, types.JsonState{Vt: types.V_TRUE}
- case 'f':
- ret = decodeFalse(src, pos)
- if ret < 0 {
- return ret, types.JsonState{Vt: types.ValueType(ret)}
- }
- return ret, types.JsonState{Vt: types.V_FALSE}
- case '-', '+', '0', '1', '2', '3', '4', '5', '6', '7', '8', '9':
- if skipnum {
- ret = skipNumber(src, pos)
- if ret >= 0 {
- return ret, types.JsonState{Vt: types.V_DOUBLE, Iv: 0, Ep: pos}
- } else {
- return ret, types.JsonState{Vt: types.ValueType(ret)}
- }
- } else {
- var iv int64
- ret, iv, _ = decodeInt64(src, pos)
- if ret >= 0 {
- return ret, types.JsonState{Vt: types.V_INTEGER, Iv: iv, Ep: pos}
- } else if ret != -int(types.ERR_INVALID_NUMBER_FMT) {
- return ret, types.JsonState{Vt: types.ValueType(ret)}
- }
- var fv float64
- ret, fv, _ = decodeFloat64(src, pos)
- if ret >= 0 {
- return ret, types.JsonState{Vt: types.V_DOUBLE, Dv: fv, Ep: pos}
- } else {
- return ret, types.JsonState{Vt: types.ValueType(ret)}
- }
- }
-
- default:
- return -int(types.ERR_INVALID_CHAR), types.JsonState{Vt:-types.ValueType(types.ERR_INVALID_CHAR)}
- }
- }
- //go:nocheckptr
- func skipNumber(src string, pos int) (ret int) {
- sp := uintptr(rt.IndexChar(src, pos))
- se := uintptr(rt.IndexChar(src, len(src)))
- if uintptr(sp) >= se {
- return -int(types.ERR_EOF)
- }
- if c := *(*byte)(unsafe.Pointer(sp)); c == '-' {
- sp += 1
- }
- ss := sp
- var pointer bool
- var exponent bool
- var lastIsDigit bool
- var nextNeedDigit = true
- for ; sp < se; sp += uintptr(1) {
- c := *(*byte)(unsafe.Pointer(sp))
- if isDigit(c) {
- lastIsDigit = true
- nextNeedDigit = false
- continue
- } else if nextNeedDigit {
- return -int(types.ERR_INVALID_CHAR)
- } else if c == '.' {
- if !lastIsDigit || pointer || exponent || sp == ss {
- return -int(types.ERR_INVALID_CHAR)
- }
- pointer = true
- lastIsDigit = false
- nextNeedDigit = true
- continue
- } else if c == 'e' || c == 'E' {
- if !lastIsDigit || exponent {
- return -int(types.ERR_INVALID_CHAR)
- }
- if sp == se-1 {
- return -int(types.ERR_EOF)
- }
- exponent = true
- lastIsDigit = false
- nextNeedDigit = false
- continue
- } else if c == '-' || c == '+' {
- if prev := *(*byte)(unsafe.Pointer(sp - 1)); prev != 'e' && prev != 'E' {
- return -int(types.ERR_INVALID_CHAR)
- }
- lastIsDigit = false
- nextNeedDigit = true
- continue
- } else {
- break
- }
- }
- if nextNeedDigit {
- return -int(types.ERR_EOF)
- }
- runtime.KeepAlive(src)
- return int(uintptr(sp) - uintptr((*rt.GoString)(unsafe.Pointer(&src)).Ptr))
- }
- //go:nocheckptr
- func skipString(src string, pos int) (ret int, ep int) {
- if pos+1 >= len(src) {
- return -int(types.ERR_EOF), -1
- }
- sp := uintptr(rt.IndexChar(src, pos))
- se := uintptr(rt.IndexChar(src, len(src)))
- // not start with quote
- if *(*byte)(unsafe.Pointer(sp)) != '"' {
- return -int(types.ERR_INVALID_CHAR), -1
- }
- sp += 1
- ep = -1
- for sp < se {
- c := *(*byte)(unsafe.Pointer(sp))
- if c == '\\' {
- if ep == -1 {
- ep = int(uintptr(sp) - uintptr((*rt.GoString)(unsafe.Pointer(&src)).Ptr))
- }
- sp += 2
- continue
- }
- sp += 1
- if c == '"' {
- return int(uintptr(sp) - uintptr((*rt.GoString)(unsafe.Pointer(&src)).Ptr)), ep
- }
- }
- runtime.KeepAlive(src)
- // not found the closed quote until EOF
- return -int(types.ERR_EOF), -1
- }
- //go:nocheckptr
- func skipPair(src string, pos int, lchar byte, rchar byte) (ret int) {
- if pos+1 >= len(src) {
- return -int(types.ERR_EOF)
- }
- sp := uintptr(rt.IndexChar(src, pos))
- se := uintptr(rt.IndexChar(src, len(src)))
- if *(*byte)(unsafe.Pointer(sp)) != lchar {
- return -int(types.ERR_INVALID_CHAR)
- }
- sp += 1
- nbrace := 1
- inquote := false
- for sp < se {
- c := *(*byte)(unsafe.Pointer(sp))
- if c == '\\' {
- sp += 2
- continue
- } else if c == '"' {
- inquote = !inquote
- } else if c == lchar {
- if !inquote {
- nbrace += 1
- }
- } else if c == rchar {
- if !inquote {
- nbrace -= 1
- if nbrace == 0 {
- sp += 1
- break
- }
- }
- }
- sp += 1
- }
- if nbrace != 0 {
- return -int(types.ERR_INVALID_CHAR)
- }
- runtime.KeepAlive(src)
- return int(uintptr(sp) - uintptr((*rt.GoString)(unsafe.Pointer(&src)).Ptr))
- }
- func skipValueFast(src string, pos int) (ret int, start int) {
- pos = skipBlank(src, pos)
- if pos < 0 {
- return pos, -1
- }
- switch c := src[pos]; c {
- case 'n':
- ret = decodeNull(src, pos)
- case '"':
- ret, _ = skipString(src, pos)
- case '{':
- ret = skipPair(src, pos, '{', '}')
- case '[':
- ret = skipPair(src, pos, '[', ']')
- case 't':
- ret = decodeTrue(src, pos)
- case 'f':
- ret = decodeFalse(src, pos)
- case '-', '+', '0', '1', '2', '3', '4', '5', '6', '7', '8', '9':
- ret = skipNumber(src, pos)
- default:
- ret = -int(types.ERR_INVALID_CHAR)
- }
- return ret, pos
- }
- func skipValue(src string, pos int) (ret int, start int) {
- pos = skipBlank(src, pos)
- if pos < 0 {
- return pos, -1
- }
- switch c := src[pos]; c {
- case 'n':
- ret = decodeNull(src, pos)
- case '"':
- ret, _ = skipString(src, pos)
- case '{':
- ret, _ = skipObject(src, pos)
- case '[':
- ret, _ = skipArray(src, pos)
- case 't':
- ret = decodeTrue(src, pos)
- case 'f':
- ret = decodeFalse(src, pos)
- case '-', '+', '0', '1', '2', '3', '4', '5', '6', '7', '8', '9':
- ret = skipNumber(src, pos)
- default:
- ret = -int(types.ERR_INVALID_CHAR)
- }
- return ret, pos
- }
- func skipObject(src string, pos int) (ret int, start int) {
- start = skipBlank(src, pos)
- if start < 0 {
- return start, -1
- }
- if src[start] != '{' {
- return -int(types.ERR_INVALID_CHAR), -1
- }
- pos = start + 1
- pos = skipBlank(src, pos)
- if pos < 0 {
- return pos, -1
- }
- if src[pos] == '}' {
- return pos + 1, start
- }
- for {
- pos, _ = skipString(src, pos)
- if pos < 0 {
- return pos, -1
- }
- pos = skipBlank(src, pos)
- if pos < 0 {
- return pos, -1
- }
- if src[pos] != ':' {
- return -int(types.ERR_INVALID_CHAR), -1
- }
- pos++
- pos, _ = skipValue(src, pos)
- if pos < 0 {
- return pos, -1
- }
- pos = skipBlank(src, pos)
- if pos < 0 {
- return pos, -1
- }
- if src[pos] == '}' {
- return pos + 1, start
- }
- if src[pos] != ',' {
- return -int(types.ERR_INVALID_CHAR), -1
- }
- pos++
- pos = skipBlank(src, pos)
- if pos < 0 {
- return pos, -1
- }
- }
- }
- func skipArray(src string, pos int) (ret int, start int) {
- start = skipBlank(src, pos)
- if start < 0 {
- return start, -1
- }
- if src[start] != '[' {
- return -int(types.ERR_INVALID_CHAR), -1
- }
- pos = start + 1
- pos = skipBlank(src, pos)
- if pos < 0 {
- return pos, -1
- }
- if src[pos] == ']' {
- return pos + 1, start
- }
- for {
- pos, _ = skipValue(src, pos)
- if pos < 0 {
- return pos, -1
- }
- pos = skipBlank(src, pos)
- if pos < 0 {
- return pos, -1
- }
- if src[pos] == ']' {
- return pos + 1, start
- }
- if src[pos] != ',' {
- return -int(types.ERR_INVALID_CHAR), -1
- }
- pos++
- }
- }
|