encoder.go 9.5 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341
  1. /*
  2. * Copyright 2021 ByteDance Inc.
  3. *
  4. * Licensed under the Apache License, Version 2.0 (the "License");
  5. * you may not use this file except in compliance with the License.
  6. * You may obtain a copy of the License at
  7. *
  8. * http://www.apache.org/licenses/LICENSE-2.0
  9. *
  10. * Unless required by applicable law or agreed to in writing, software
  11. * distributed under the License is distributed on an "AS IS" BASIS,
  12. * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  13. * See the License for the specific language governing permissions and
  14. * limitations under the License.
  15. */
  16. package encoder
  17. import (
  18. `bytes`
  19. `encoding/json`
  20. `reflect`
  21. `runtime`
  22. `unsafe`
  23. `github.com/bytedance/sonic/internal/native`
  24. `github.com/bytedance/sonic/internal/native/types`
  25. `github.com/bytedance/sonic/internal/rt`
  26. `github.com/bytedance/sonic/utf8`
  27. `github.com/bytedance/sonic/option`
  28. )
  29. // Options is a set of encoding options.
  30. type Options uint64
  31. const (
  32. bitSortMapKeys = iota
  33. bitEscapeHTML
  34. bitCompactMarshaler
  35. bitNoQuoteTextMarshaler
  36. bitNoNullSliceOrMap
  37. bitValidateString
  38. bitNoValidateJSONMarshaler
  39. // used for recursive compile
  40. bitPointerValue = 63
  41. )
  42. const (
  43. // SortMapKeys indicates that the keys of a map needs to be sorted
  44. // before serializing into JSON.
  45. // WARNING: This hurts performance A LOT, USE WITH CARE.
  46. SortMapKeys Options = 1 << bitSortMapKeys
  47. // EscapeHTML indicates encoder to escape all HTML characters
  48. // after serializing into JSON (see https://pkg.go.dev/encoding/json#HTMLEscape).
  49. // WARNING: This hurts performance A LOT, USE WITH CARE.
  50. EscapeHTML Options = 1 << bitEscapeHTML
  51. // CompactMarshaler indicates that the output JSON from json.Marshaler
  52. // is always compact and needs no validation
  53. CompactMarshaler Options = 1 << bitCompactMarshaler
  54. // NoQuoteTextMarshaler indicates that the output text from encoding.TextMarshaler
  55. // is always escaped string and needs no quoting
  56. NoQuoteTextMarshaler Options = 1 << bitNoQuoteTextMarshaler
  57. // NoNullSliceOrMap indicates all empty Array or Object are encoded as '[]' or '{}',
  58. // instead of 'null'
  59. NoNullSliceOrMap Options = 1 << bitNoNullSliceOrMap
  60. // ValidateString indicates that encoder should validate the input string
  61. // before encoding it into JSON.
  62. ValidateString Options = 1 << bitValidateString
  63. // NoValidateJSONMarshaler indicates that the encoder should not validate the output string
  64. // after encoding the JSONMarshaler to JSON.
  65. NoValidateJSONMarshaler Options = 1 << bitNoValidateJSONMarshaler
  66. // CompatibleWithStd is used to be compatible with std encoder.
  67. CompatibleWithStd Options = SortMapKeys | EscapeHTML | CompactMarshaler
  68. )
  69. // Encoder represents a specific set of encoder configurations.
  70. type Encoder struct {
  71. Opts Options
  72. prefix string
  73. indent string
  74. }
  75. // Encode returns the JSON encoding of v.
  76. func (self *Encoder) Encode(v interface{}) ([]byte, error) {
  77. if self.indent != "" || self.prefix != "" {
  78. return EncodeIndented(v, self.prefix, self.indent, self.Opts)
  79. }
  80. return Encode(v, self.Opts)
  81. }
  82. // SortKeys enables the SortMapKeys option.
  83. func (self *Encoder) SortKeys() *Encoder {
  84. self.Opts |= SortMapKeys
  85. return self
  86. }
  87. // SetEscapeHTML specifies if option EscapeHTML opens
  88. func (self *Encoder) SetEscapeHTML(f bool) {
  89. if f {
  90. self.Opts |= EscapeHTML
  91. } else {
  92. self.Opts &= ^EscapeHTML
  93. }
  94. }
  95. // SetValidateString specifies if option ValidateString opens
  96. func (self *Encoder) SetValidateString(f bool) {
  97. if f {
  98. self.Opts |= ValidateString
  99. } else {
  100. self.Opts &= ^ValidateString
  101. }
  102. }
  103. // SetNoValidateJSONMarshaler specifies if option NoValidateJSONMarshaler opens
  104. func (self *Encoder) SetNoValidateJSONMarshaler(f bool) {
  105. if f {
  106. self.Opts |= NoValidateJSONMarshaler
  107. } else {
  108. self.Opts &= ^NoValidateJSONMarshaler
  109. }
  110. }
  111. // SetCompactMarshaler specifies if option CompactMarshaler opens
  112. func (self *Encoder) SetCompactMarshaler(f bool) {
  113. if f {
  114. self.Opts |= CompactMarshaler
  115. } else {
  116. self.Opts &= ^CompactMarshaler
  117. }
  118. }
  119. // SetNoQuoteTextMarshaler specifies if option NoQuoteTextMarshaler opens
  120. func (self *Encoder) SetNoQuoteTextMarshaler(f bool) {
  121. if f {
  122. self.Opts |= NoQuoteTextMarshaler
  123. } else {
  124. self.Opts &= ^NoQuoteTextMarshaler
  125. }
  126. }
  127. // SetIndent instructs the encoder to format each subsequent encoded
  128. // value as if indented by the package-level function EncodeIndent().
  129. // Calling SetIndent("", "") disables indentation.
  130. func (enc *Encoder) SetIndent(prefix, indent string) {
  131. enc.prefix = prefix
  132. enc.indent = indent
  133. }
  134. // Quote returns the JSON-quoted version of s.
  135. func Quote(s string) string {
  136. var n int
  137. var p []byte
  138. /* check for empty string */
  139. if s == "" {
  140. return `""`
  141. }
  142. /* allocate space for result */
  143. n = len(s) + 2
  144. p = make([]byte, 0, n)
  145. /* call the encoder */
  146. _ = encodeString(&p, s)
  147. return rt.Mem2Str(p)
  148. }
  149. // Encode returns the JSON encoding of val, encoded with opts.
  150. func Encode(val interface{}, opts Options) ([]byte, error) {
  151. var ret []byte
  152. buf := newBytes()
  153. err := encodeInto(&buf, val, opts)
  154. /* check for errors */
  155. if err != nil {
  156. freeBytes(buf)
  157. return nil, err
  158. }
  159. /* htmlescape or correct UTF-8 if opts enable */
  160. old := buf
  161. buf = encodeFinish(old, opts)
  162. pbuf := ((*rt.GoSlice)(unsafe.Pointer(&buf))).Ptr
  163. pold := ((*rt.GoSlice)(unsafe.Pointer(&old))).Ptr
  164. /* return when allocated a new buffer */
  165. if pbuf != pold {
  166. freeBytes(old)
  167. return buf, nil
  168. }
  169. /* make a copy of the result */
  170. ret = make([]byte, len(buf))
  171. copy(ret, buf)
  172. freeBytes(buf)
  173. /* return the buffer into pool */
  174. return ret, nil
  175. }
  176. // EncodeInto is like Encode but uses a user-supplied buffer instead of allocating
  177. // a new one.
  178. func EncodeInto(buf *[]byte, val interface{}, opts Options) error {
  179. err := encodeInto(buf, val, opts)
  180. if err != nil {
  181. return err
  182. }
  183. *buf = encodeFinish(*buf, opts)
  184. return err
  185. }
  186. func encodeInto(buf *[]byte, val interface{}, opts Options) error {
  187. stk := newStack()
  188. efv := rt.UnpackEface(val)
  189. err := encodeTypedPointer(buf, efv.Type, &efv.Value, stk, uint64(opts))
  190. /* return the stack into pool */
  191. if err != nil {
  192. resetStack(stk)
  193. }
  194. freeStack(stk)
  195. /* avoid GC ahead */
  196. runtime.KeepAlive(buf)
  197. runtime.KeepAlive(efv)
  198. return err
  199. }
  200. func encodeFinish(buf []byte, opts Options) []byte {
  201. if opts & EscapeHTML != 0 {
  202. buf = HTMLEscape(nil, buf)
  203. }
  204. if opts & ValidateString != 0 && !utf8.Validate(buf) {
  205. buf = utf8.CorrectWith(nil, buf, `\ufffd`)
  206. }
  207. return buf
  208. }
  209. var typeByte = rt.UnpackType(reflect.TypeOf(byte(0)))
  210. // HTMLEscape appends to dst the JSON-encoded src with <, >, &, U+2028 and U+2029
  211. // characters inside string literals changed to \u003c, \u003e, \u0026, \u2028, \u2029
  212. // so that the JSON will be safe to embed inside HTML <script> tags.
  213. // For historical reasons, web browsers don't honor standard HTML
  214. // escaping within <script> tags, so an alternative JSON encoding must
  215. // be used.
  216. func HTMLEscape(dst []byte, src []byte) []byte {
  217. return htmlEscape(dst, src)
  218. }
  219. // EncodeIndented is like Encode but applies Indent to format the output.
  220. // Each JSON element in the output will begin on a new line beginning with prefix
  221. // followed by one or more copies of indent according to the indentation nesting.
  222. func EncodeIndented(val interface{}, prefix string, indent string, opts Options) ([]byte, error) {
  223. var err error
  224. var out []byte
  225. var buf *bytes.Buffer
  226. /* encode into the buffer */
  227. out = newBytes()
  228. err = EncodeInto(&out, val, opts)
  229. /* check for errors */
  230. if err != nil {
  231. freeBytes(out)
  232. return nil, err
  233. }
  234. /* indent the JSON */
  235. buf = newBuffer()
  236. err = json.Indent(buf, out, prefix, indent)
  237. /* check for errors */
  238. if err != nil {
  239. freeBytes(out)
  240. freeBuffer(buf)
  241. return nil, err
  242. }
  243. /* copy to the result buffer */
  244. ret := make([]byte, buf.Len())
  245. copy(ret, buf.Bytes())
  246. /* return the buffers into pool */
  247. freeBytes(out)
  248. freeBuffer(buf)
  249. return ret, nil
  250. }
  251. // Pretouch compiles vt ahead-of-time to avoid JIT compilation on-the-fly, in
  252. // order to reduce the first-hit latency.
  253. //
  254. // Opts are the compile options, for example, "option.WithCompileRecursiveDepth" is
  255. // a compile option to set the depth of recursive compile for the nested struct type.
  256. func Pretouch(vt reflect.Type, opts ...option.CompileOption) error {
  257. cfg := option.DefaultCompileOptions()
  258. for _, opt := range opts {
  259. opt(&cfg)
  260. }
  261. return pretouchRec(map[reflect.Type]uint8{vt: 0}, cfg)
  262. }
  263. // Valid validates json and returns first non-blank character position,
  264. // if it is only one valid json value.
  265. // Otherwise returns invalid character position using start.
  266. //
  267. // Note: it does not check for the invalid UTF-8 characters.
  268. func Valid(data []byte) (ok bool, start int) {
  269. n := len(data)
  270. if n == 0 {
  271. return false, -1
  272. }
  273. s := rt.Mem2Str(data)
  274. p := 0
  275. m := types.NewStateMachine()
  276. ret := native.ValidateOne(&s, &p, m)
  277. types.FreeStateMachine(m)
  278. if ret < 0 {
  279. return false, p-1
  280. }
  281. /* check for trailing spaces */
  282. for ;p < n; p++ {
  283. if (types.SPACE_MASK & (1 << data[p])) == 0 {
  284. return false, p
  285. }
  286. }
  287. return true, ret
  288. }