decoder.go 7.3 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255
  1. /*
  2. * Copyright 2021 ByteDance Inc.
  3. *
  4. * Licensed under the Apache License, Version 2.0 (the "License");
  5. * you may not use this file except in compliance with the License.
  6. * You may obtain a copy of the License at
  7. *
  8. * http://www.apache.org/licenses/LICENSE-2.0
  9. *
  10. * Unless required by applicable law or agreed to in writing, software
  11. * distributed under the License is distributed on an "AS IS" BASIS,
  12. * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  13. * See the License for the specific language governing permissions and
  14. * limitations under the License.
  15. */
  16. package decoder
  17. import (
  18. `unsafe`
  19. `encoding/json`
  20. `reflect`
  21. `runtime`
  22. `github.com/bytedance/sonic/internal/native`
  23. `github.com/bytedance/sonic/internal/native/types`
  24. `github.com/bytedance/sonic/internal/rt`
  25. `github.com/bytedance/sonic/option`
  26. `github.com/bytedance/sonic/utf8`
  27. )
  28. const (
  29. _F_use_int64 = 0
  30. _F_disable_urc = 2
  31. _F_disable_unknown = 3
  32. _F_copy_string = 4
  33. _F_use_number = types.B_USE_NUMBER
  34. _F_validate_string = types.B_VALIDATE_STRING
  35. _F_allow_control = types.B_ALLOW_CONTROL
  36. )
  37. type Options uint64
  38. const (
  39. OptionUseInt64 Options = 1 << _F_use_int64
  40. OptionUseNumber Options = 1 << _F_use_number
  41. OptionUseUnicodeErrors Options = 1 << _F_disable_urc
  42. OptionDisableUnknown Options = 1 << _F_disable_unknown
  43. OptionCopyString Options = 1 << _F_copy_string
  44. OptionValidateString Options = 1 << _F_validate_string
  45. )
  46. func (self *Decoder) SetOptions(opts Options) {
  47. if (opts & OptionUseNumber != 0) && (opts & OptionUseInt64 != 0) {
  48. panic("can't set OptionUseInt64 and OptionUseNumber both!")
  49. }
  50. self.f = uint64(opts)
  51. }
  52. // Decoder is the decoder context object
  53. type Decoder struct {
  54. i int
  55. f uint64
  56. s string
  57. }
  58. // NewDecoder creates a new decoder instance.
  59. func NewDecoder(s string) *Decoder {
  60. return &Decoder{s: s}
  61. }
  62. // Pos returns the current decoding position.
  63. func (self *Decoder) Pos() int {
  64. return self.i
  65. }
  66. func (self *Decoder) Reset(s string) {
  67. self.s = s
  68. self.i = 0
  69. // self.f = 0
  70. }
  71. func (self *Decoder) CheckTrailings() error {
  72. pos := self.i
  73. buf := self.s
  74. /* skip all the trailing spaces */
  75. if pos != len(buf) {
  76. for pos < len(buf) && (types.SPACE_MASK & (1 << buf[pos])) != 0 {
  77. pos++
  78. }
  79. }
  80. /* then it must be at EOF */
  81. if pos == len(buf) {
  82. return nil
  83. }
  84. /* junk after JSON value */
  85. return SyntaxError {
  86. Src : buf,
  87. Pos : pos,
  88. Code : types.ERR_INVALID_CHAR,
  89. }
  90. }
  91. // Decode parses the JSON-encoded data from current position and stores the result
  92. // in the value pointed to by val.
  93. func (self *Decoder) Decode(val interface{}) error {
  94. /* validate json if needed */
  95. if (self.f & (1 << _F_validate_string)) != 0 && !utf8.ValidateString(self.s){
  96. dbuf := utf8.CorrectWith(nil, rt.Str2Mem(self.s), "\ufffd")
  97. self.s = rt.Mem2Str(dbuf)
  98. }
  99. vv := rt.UnpackEface(val)
  100. vp := vv.Value
  101. /* check for nil type */
  102. if vv.Type == nil {
  103. return &json.InvalidUnmarshalError{}
  104. }
  105. /* must be a non-nil pointer */
  106. if vp == nil || vv.Type.Kind() != reflect.Ptr {
  107. return &json.InvalidUnmarshalError{Type: vv.Type.Pack()}
  108. }
  109. etp := rt.PtrElem(vv.Type)
  110. /* check the defined pointer type for issue 379 */
  111. if vv.Type.IsNamed() {
  112. newp := vp
  113. etp = vv.Type
  114. vp = unsafe.Pointer(&newp)
  115. }
  116. /* create a new stack, and call the decoder */
  117. sb := newStack()
  118. nb, err := decodeTypedPointer(self.s, self.i, etp, vp, sb, self.f)
  119. /* return the stack back */
  120. self.i = nb
  121. freeStack(sb)
  122. /* avoid GC ahead */
  123. runtime.KeepAlive(vv)
  124. return err
  125. }
  126. // UseInt64 indicates the Decoder to unmarshal an integer into an interface{} as an
  127. // int64 instead of as a float64.
  128. func (self *Decoder) UseInt64() {
  129. self.f |= 1 << _F_use_int64
  130. self.f &^= 1 << _F_use_number
  131. }
  132. // UseNumber indicates the Decoder to unmarshal a number into an interface{} as a
  133. // json.Number instead of as a float64.
  134. func (self *Decoder) UseNumber() {
  135. self.f &^= 1 << _F_use_int64
  136. self.f |= 1 << _F_use_number
  137. }
  138. // UseUnicodeErrors indicates the Decoder to return an error when encounter invalid
  139. // UTF-8 escape sequences.
  140. func (self *Decoder) UseUnicodeErrors() {
  141. self.f |= 1 << _F_disable_urc
  142. }
  143. // DisallowUnknownFields indicates the Decoder to return an error when the destination
  144. // is a struct and the input contains object keys which do not match any
  145. // non-ignored, exported fields in the destination.
  146. func (self *Decoder) DisallowUnknownFields() {
  147. self.f |= 1 << _F_disable_unknown
  148. }
  149. // CopyString indicates the Decoder to decode string values by copying instead of referring.
  150. func (self *Decoder) CopyString() {
  151. self.f |= 1 << _F_copy_string
  152. }
  153. // ValidateString causes the Decoder to validate string values when decoding string value
  154. // in JSON. Validation is that, returning error when unescaped control chars(0x00-0x1f) or
  155. // invalid UTF-8 chars in the string value of JSON.
  156. func (self *Decoder) ValidateString() {
  157. self.f |= 1 << _F_validate_string
  158. }
  159. // Pretouch compiles vt ahead-of-time to avoid JIT compilation on-the-fly, in
  160. // order to reduce the first-hit latency.
  161. //
  162. // Opts are the compile options, for example, "option.WithCompileRecursiveDepth" is
  163. // a compile option to set the depth of recursive compile for the nested struct type.
  164. func Pretouch(vt reflect.Type, opts ...option.CompileOption) error {
  165. cfg := option.DefaultCompileOptions()
  166. for _, opt := range opts {
  167. opt(&cfg)
  168. }
  169. return pretouchRec(map[reflect.Type]bool{vt:true}, cfg)
  170. }
  171. func pretouchType(_vt reflect.Type, opts option.CompileOptions) (map[reflect.Type]bool, error) {
  172. /* compile function */
  173. compiler := newCompiler().apply(opts)
  174. decoder := func(vt *rt.GoType, _ ...interface{}) (interface{}, error) {
  175. if pp, err := compiler.compile(_vt); err != nil {
  176. return nil, err
  177. } else {
  178. as := newAssembler(pp)
  179. as.name = _vt.String()
  180. return as.Load(), nil
  181. }
  182. }
  183. /* find or compile */
  184. vt := rt.UnpackType(_vt)
  185. if val := programCache.Get(vt); val != nil {
  186. return nil, nil
  187. } else if _, err := programCache.Compute(vt, decoder); err == nil {
  188. return compiler.rec, nil
  189. } else {
  190. return nil, err
  191. }
  192. }
  193. func pretouchRec(vtm map[reflect.Type]bool, opts option.CompileOptions) error {
  194. if opts.RecursiveDepth < 0 || len(vtm) == 0 {
  195. return nil
  196. }
  197. next := make(map[reflect.Type]bool)
  198. for vt := range(vtm) {
  199. sub, err := pretouchType(vt, opts)
  200. if err != nil {
  201. return err
  202. }
  203. for svt := range(sub) {
  204. next[svt] = true
  205. }
  206. }
  207. opts.RecursiveDepth -= 1
  208. return pretouchRec(next, opts)
  209. }
  210. // Skip skips only one json value, and returns first non-blank character position and its ending position if it is valid.
  211. // Otherwise, returns negative error code using start and invalid character position using end
  212. func Skip(data []byte) (start int, end int) {
  213. s := rt.Mem2Str(data)
  214. p := 0
  215. m := types.NewStateMachine()
  216. ret := native.SkipOne(&s, &p, m, uint64(0))
  217. types.FreeStateMachine(m)
  218. return ret, p
  219. }