123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341 |
- /*
- * Copyright 2021 ByteDance Inc.
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
- package encoder
- import (
- `bytes`
- `encoding/json`
- `reflect`
- `runtime`
- `unsafe`
- `github.com/bytedance/sonic/internal/native`
- `github.com/bytedance/sonic/internal/native/types`
- `github.com/bytedance/sonic/internal/rt`
- `github.com/bytedance/sonic/utf8`
- `github.com/bytedance/sonic/option`
- )
- // Options is a set of encoding options.
- type Options uint64
- const (
- bitSortMapKeys = iota
- bitEscapeHTML
- bitCompactMarshaler
- bitNoQuoteTextMarshaler
- bitNoNullSliceOrMap
- bitValidateString
- bitNoValidateJSONMarshaler
- // used for recursive compile
- bitPointerValue = 63
- )
- const (
- // SortMapKeys indicates that the keys of a map needs to be sorted
- // before serializing into JSON.
- // WARNING: This hurts performance A LOT, USE WITH CARE.
- SortMapKeys Options = 1 << bitSortMapKeys
- // EscapeHTML indicates encoder to escape all HTML characters
- // after serializing into JSON (see https://pkg.go.dev/encoding/json#HTMLEscape).
- // WARNING: This hurts performance A LOT, USE WITH CARE.
- EscapeHTML Options = 1 << bitEscapeHTML
- // CompactMarshaler indicates that the output JSON from json.Marshaler
- // is always compact and needs no validation
- CompactMarshaler Options = 1 << bitCompactMarshaler
- // NoQuoteTextMarshaler indicates that the output text from encoding.TextMarshaler
- // is always escaped string and needs no quoting
- NoQuoteTextMarshaler Options = 1 << bitNoQuoteTextMarshaler
- // NoNullSliceOrMap indicates all empty Array or Object are encoded as '[]' or '{}',
- // instead of 'null'
- NoNullSliceOrMap Options = 1 << bitNoNullSliceOrMap
- // ValidateString indicates that encoder should validate the input string
- // before encoding it into JSON.
- ValidateString Options = 1 << bitValidateString
- // NoValidateJSONMarshaler indicates that the encoder should not validate the output string
- // after encoding the JSONMarshaler to JSON.
- NoValidateJSONMarshaler Options = 1 << bitNoValidateJSONMarshaler
-
- // CompatibleWithStd is used to be compatible with std encoder.
- CompatibleWithStd Options = SortMapKeys | EscapeHTML | CompactMarshaler
- )
- // Encoder represents a specific set of encoder configurations.
- type Encoder struct {
- Opts Options
- prefix string
- indent string
- }
- // Encode returns the JSON encoding of v.
- func (self *Encoder) Encode(v interface{}) ([]byte, error) {
- if self.indent != "" || self.prefix != "" {
- return EncodeIndented(v, self.prefix, self.indent, self.Opts)
- }
- return Encode(v, self.Opts)
- }
- // SortKeys enables the SortMapKeys option.
- func (self *Encoder) SortKeys() *Encoder {
- self.Opts |= SortMapKeys
- return self
- }
- // SetEscapeHTML specifies if option EscapeHTML opens
- func (self *Encoder) SetEscapeHTML(f bool) {
- if f {
- self.Opts |= EscapeHTML
- } else {
- self.Opts &= ^EscapeHTML
- }
- }
- // SetValidateString specifies if option ValidateString opens
- func (self *Encoder) SetValidateString(f bool) {
- if f {
- self.Opts |= ValidateString
- } else {
- self.Opts &= ^ValidateString
- }
- }
- // SetNoValidateJSONMarshaler specifies if option NoValidateJSONMarshaler opens
- func (self *Encoder) SetNoValidateJSONMarshaler(f bool) {
- if f {
- self.Opts |= NoValidateJSONMarshaler
- } else {
- self.Opts &= ^NoValidateJSONMarshaler
- }
- }
- // SetCompactMarshaler specifies if option CompactMarshaler opens
- func (self *Encoder) SetCompactMarshaler(f bool) {
- if f {
- self.Opts |= CompactMarshaler
- } else {
- self.Opts &= ^CompactMarshaler
- }
- }
- // SetNoQuoteTextMarshaler specifies if option NoQuoteTextMarshaler opens
- func (self *Encoder) SetNoQuoteTextMarshaler(f bool) {
- if f {
- self.Opts |= NoQuoteTextMarshaler
- } else {
- self.Opts &= ^NoQuoteTextMarshaler
- }
- }
- // SetIndent instructs the encoder to format each subsequent encoded
- // value as if indented by the package-level function EncodeIndent().
- // Calling SetIndent("", "") disables indentation.
- func (enc *Encoder) SetIndent(prefix, indent string) {
- enc.prefix = prefix
- enc.indent = indent
- }
- // Quote returns the JSON-quoted version of s.
- func Quote(s string) string {
- var n int
- var p []byte
- /* check for empty string */
- if s == "" {
- return `""`
- }
- /* allocate space for result */
- n = len(s) + 2
- p = make([]byte, 0, n)
- /* call the encoder */
- _ = encodeString(&p, s)
- return rt.Mem2Str(p)
- }
- // Encode returns the JSON encoding of val, encoded with opts.
- func Encode(val interface{}, opts Options) ([]byte, error) {
- var ret []byte
- buf := newBytes()
- err := encodeInto(&buf, val, opts)
- /* check for errors */
- if err != nil {
- freeBytes(buf)
- return nil, err
- }
- /* htmlescape or correct UTF-8 if opts enable */
- old := buf
- buf = encodeFinish(old, opts)
- pbuf := ((*rt.GoSlice)(unsafe.Pointer(&buf))).Ptr
- pold := ((*rt.GoSlice)(unsafe.Pointer(&old))).Ptr
- /* return when allocated a new buffer */
- if pbuf != pold {
- freeBytes(old)
- return buf, nil
- }
- /* make a copy of the result */
- ret = make([]byte, len(buf))
- copy(ret, buf)
- freeBytes(buf)
- /* return the buffer into pool */
- return ret, nil
- }
- // EncodeInto is like Encode but uses a user-supplied buffer instead of allocating
- // a new one.
- func EncodeInto(buf *[]byte, val interface{}, opts Options) error {
- err := encodeInto(buf, val, opts)
- if err != nil {
- return err
- }
- *buf = encodeFinish(*buf, opts)
- return err
- }
- func encodeInto(buf *[]byte, val interface{}, opts Options) error {
- stk := newStack()
- efv := rt.UnpackEface(val)
- err := encodeTypedPointer(buf, efv.Type, &efv.Value, stk, uint64(opts))
- /* return the stack into pool */
- if err != nil {
- resetStack(stk)
- }
- freeStack(stk)
- /* avoid GC ahead */
- runtime.KeepAlive(buf)
- runtime.KeepAlive(efv)
- return err
- }
- func encodeFinish(buf []byte, opts Options) []byte {
- if opts & EscapeHTML != 0 {
- buf = HTMLEscape(nil, buf)
- }
- if opts & ValidateString != 0 && !utf8.Validate(buf) {
- buf = utf8.CorrectWith(nil, buf, `\ufffd`)
- }
- return buf
- }
- var typeByte = rt.UnpackType(reflect.TypeOf(byte(0)))
- // HTMLEscape appends to dst the JSON-encoded src with <, >, &, U+2028 and U+2029
- // characters inside string literals changed to \u003c, \u003e, \u0026, \u2028, \u2029
- // so that the JSON will be safe to embed inside HTML <script> tags.
- // For historical reasons, web browsers don't honor standard HTML
- // escaping within <script> tags, so an alternative JSON encoding must
- // be used.
- func HTMLEscape(dst []byte, src []byte) []byte {
- return htmlEscape(dst, src)
- }
- // EncodeIndented is like Encode but applies Indent to format the output.
- // Each JSON element in the output will begin on a new line beginning with prefix
- // followed by one or more copies of indent according to the indentation nesting.
- func EncodeIndented(val interface{}, prefix string, indent string, opts Options) ([]byte, error) {
- var err error
- var out []byte
- var buf *bytes.Buffer
- /* encode into the buffer */
- out = newBytes()
- err = EncodeInto(&out, val, opts)
- /* check for errors */
- if err != nil {
- freeBytes(out)
- return nil, err
- }
- /* indent the JSON */
- buf = newBuffer()
- err = json.Indent(buf, out, prefix, indent)
- /* check for errors */
- if err != nil {
- freeBytes(out)
- freeBuffer(buf)
- return nil, err
- }
- /* copy to the result buffer */
- ret := make([]byte, buf.Len())
- copy(ret, buf.Bytes())
- /* return the buffers into pool */
- freeBytes(out)
- freeBuffer(buf)
- return ret, nil
- }
- // Pretouch compiles vt ahead-of-time to avoid JIT compilation on-the-fly, in
- // order to reduce the first-hit latency.
- //
- // Opts are the compile options, for example, "option.WithCompileRecursiveDepth" is
- // a compile option to set the depth of recursive compile for the nested struct type.
- func Pretouch(vt reflect.Type, opts ...option.CompileOption) error {
- cfg := option.DefaultCompileOptions()
- for _, opt := range opts {
- opt(&cfg)
- }
- return pretouchRec(map[reflect.Type]uint8{vt: 0}, cfg)
- }
- // Valid validates json and returns first non-blank character position,
- // if it is only one valid json value.
- // Otherwise returns invalid character position using start.
- //
- // Note: it does not check for the invalid UTF-8 characters.
- func Valid(data []byte) (ok bool, start int) {
- n := len(data)
- if n == 0 {
- return false, -1
- }
- s := rt.Mem2Str(data)
- p := 0
- m := types.NewStateMachine()
- ret := native.ValidateOne(&s, &p, m)
- types.FreeStateMachine(m)
- if ret < 0 {
- return false, p-1
- }
- /* check for trailing spaces */
- for ;p < n; p++ {
- if (types.SPACE_MASK & (1 << data[p])) == 0 {
- return false, p
- }
- }
- return true, ret
- }
|