reader.go 13 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607
  1. // Copyright (c) 2012-2020 Ugorji Nwoke. All rights reserved.
  2. // Use of this source code is governed by a MIT license found in the LICENSE file.
  3. package codec
  4. import (
  5. "bufio"
  6. "bytes"
  7. "io"
  8. "strings"
  9. )
  10. // decReader abstracts the reading source, allowing implementations that can
  11. // read from an io.Reader or directly off a byte slice with zero-copying.
  12. type decReader interface {
  13. // readx will return a view of the []byte if decoding from a []byte, OR
  14. // read into the implementation scratch buffer if possible i.e. n < len(scratchbuf), OR
  15. // create a new []byte and read into that
  16. readx(n uint) []byte
  17. readb([]byte)
  18. readn1() byte
  19. readn2() [2]byte
  20. readn3() [3]byte
  21. readn4() [4]byte
  22. readn8() [8]byte
  23. // readn1eof() (v uint8, eof bool)
  24. // // read up to 8 bytes at a time
  25. // readn(num uint8) (v [8]byte)
  26. numread() uint // number of bytes read
  27. // skip any whitespace characters, and return the first non-matching byte
  28. skipWhitespace() (token byte)
  29. // jsonReadNum will include last read byte in first element of slice,
  30. // and continue numeric characters until it sees a non-numeric char
  31. // or EOF. If it sees a non-numeric character, it will unread that.
  32. jsonReadNum() []byte
  33. // jsonReadAsisChars will read json plain characters (anything but " or \)
  34. // and return a slice terminated by a non-json asis character.
  35. jsonReadAsisChars() []byte
  36. // skip will skip any byte that matches, and return the first non-matching byte
  37. // skip(accept *bitset256) (token byte)
  38. // readTo will read any byte that matches, stopping once no-longer matching.
  39. // readTo(accept *bitset256) (out []byte)
  40. // readUntil will read, only stopping once it matches the 'stop' byte (which it excludes).
  41. readUntil(stop byte) (out []byte)
  42. }
  43. // ------------------------------------------------
  44. type unreadByteStatus uint8
  45. // unreadByteStatus goes from
  46. // undefined (when initialized) -- (read) --> canUnread -- (unread) --> canRead ...
  47. const (
  48. unreadByteUndefined unreadByteStatus = iota
  49. unreadByteCanRead
  50. unreadByteCanUnread
  51. )
  52. // const defBufReaderSize = 4096
  53. // --------------------
  54. // ioReaderByteScanner contains the io.Reader and io.ByteScanner interfaces
  55. type ioReaderByteScanner interface {
  56. io.Reader
  57. io.ByteScanner
  58. // ReadByte() (byte, error)
  59. // UnreadByte() error
  60. // Read(p []byte) (n int, err error)
  61. }
  62. // ioReaderByteScannerT does a simple wrapper of a io.ByteScanner
  63. // over a io.Reader
  64. type ioReaderByteScannerT struct {
  65. r io.Reader
  66. l byte // last byte
  67. ls unreadByteStatus // last byte status
  68. _ [2]byte // padding
  69. b [4]byte // tiny buffer for reading single bytes
  70. }
  71. func (z *ioReaderByteScannerT) ReadByte() (c byte, err error) {
  72. if z.ls == unreadByteCanRead {
  73. z.ls = unreadByteCanUnread
  74. c = z.l
  75. } else {
  76. _, err = z.Read(z.b[:1])
  77. c = z.b[0]
  78. }
  79. return
  80. }
  81. func (z *ioReaderByteScannerT) UnreadByte() (err error) {
  82. switch z.ls {
  83. case unreadByteCanUnread:
  84. z.ls = unreadByteCanRead
  85. case unreadByteCanRead:
  86. err = errDecUnreadByteLastByteNotRead
  87. case unreadByteUndefined:
  88. err = errDecUnreadByteNothingToRead
  89. default:
  90. err = errDecUnreadByteUnknown
  91. }
  92. return
  93. }
  94. func (z *ioReaderByteScannerT) Read(p []byte) (n int, err error) {
  95. if len(p) == 0 {
  96. return
  97. }
  98. var firstByte bool
  99. if z.ls == unreadByteCanRead {
  100. z.ls = unreadByteCanUnread
  101. p[0] = z.l
  102. if len(p) == 1 {
  103. n = 1
  104. return
  105. }
  106. firstByte = true
  107. p = p[1:]
  108. }
  109. n, err = z.r.Read(p)
  110. if n > 0 {
  111. if err == io.EOF && n == len(p) {
  112. err = nil // read was successful, so postpone EOF (till next time)
  113. }
  114. z.l = p[n-1]
  115. z.ls = unreadByteCanUnread
  116. }
  117. if firstByte {
  118. n++
  119. }
  120. return
  121. }
  122. func (z *ioReaderByteScannerT) reset(r io.Reader) {
  123. z.r = r
  124. z.ls = unreadByteUndefined
  125. z.l = 0
  126. }
  127. // ioDecReader is a decReader that reads off an io.Reader.
  128. type ioDecReader struct {
  129. rr ioReaderByteScannerT // the reader passed in, wrapped into a reader+bytescanner
  130. n uint // num read
  131. blist *bytesFreelist
  132. bufr []byte // buffer for readTo/readUntil
  133. br ioReaderByteScanner // main reader used for Read|ReadByte|UnreadByte
  134. bb *bufio.Reader // created internally, and reused on reset if needed
  135. x [64 + 40]byte // for: get struct field name, swallow valueTypeBytes, etc
  136. }
  137. func (z *ioDecReader) reset(r io.Reader, bufsize int, blist *bytesFreelist) {
  138. z.blist = blist
  139. z.n = 0
  140. z.bufr = z.blist.check(z.bufr, 256)
  141. z.br = nil
  142. var ok bool
  143. if bufsize <= 0 {
  144. z.br, ok = r.(ioReaderByteScanner)
  145. if !ok {
  146. z.rr.reset(r)
  147. z.br = &z.rr
  148. }
  149. return
  150. }
  151. // bufsize > 0 ...
  152. // if bytes.[Buffer|Reader], no value in adding extra buffer
  153. // if bufio.Reader, no value in extra buffer unless size changes
  154. switch bb := r.(type) {
  155. case *strings.Reader:
  156. z.br = bb
  157. case *bytes.Buffer:
  158. z.br = bb
  159. case *bytes.Reader:
  160. z.br = bb
  161. case *bufio.Reader:
  162. if bb.Size() == bufsize {
  163. z.br = bb
  164. }
  165. }
  166. if z.br == nil {
  167. if z.bb != nil && z.bb.Size() == bufsize {
  168. z.bb.Reset(r)
  169. } else {
  170. z.bb = bufio.NewReaderSize(r, bufsize)
  171. }
  172. z.br = z.bb
  173. }
  174. }
  175. func (z *ioDecReader) numread() uint {
  176. return z.n
  177. }
  178. func (z *ioDecReader) readn1() (b uint8) {
  179. b, err := z.br.ReadByte()
  180. halt.onerror(err)
  181. z.n++
  182. return
  183. }
  184. func (z *ioDecReader) readn2() (bs [2]byte) {
  185. z.readb(bs[:])
  186. return
  187. }
  188. func (z *ioDecReader) readn3() (bs [3]byte) {
  189. z.readb(bs[:])
  190. return
  191. }
  192. func (z *ioDecReader) readn4() (bs [4]byte) {
  193. z.readb(bs[:])
  194. return
  195. }
  196. func (z *ioDecReader) readn8() (bs [8]byte) {
  197. z.readb(bs[:])
  198. return
  199. }
  200. func (z *ioDecReader) readx(n uint) (bs []byte) {
  201. if n == 0 {
  202. return zeroByteSlice
  203. }
  204. if n < uint(len(z.x)) {
  205. bs = z.x[:n]
  206. } else {
  207. bs = make([]byte, n)
  208. }
  209. nn, err := readFull(z.br, bs)
  210. z.n += nn
  211. halt.onerror(err)
  212. return
  213. }
  214. func (z *ioDecReader) readb(bs []byte) {
  215. if len(bs) == 0 {
  216. return
  217. }
  218. nn, err := readFull(z.br, bs)
  219. z.n += nn
  220. halt.onerror(err)
  221. }
  222. // func (z *ioDecReader) readn1eof() (b uint8, eof bool) {
  223. // b, err := z.br.ReadByte()
  224. // if err == nil {
  225. // z.n++
  226. // } else if err == io.EOF {
  227. // eof = true
  228. // } else {
  229. // halt.onerror(err)
  230. // }
  231. // return
  232. // }
  233. func (z *ioDecReader) jsonReadNum() (bs []byte) {
  234. z.unreadn1()
  235. z.bufr = z.bufr[:0]
  236. LOOP:
  237. // i, eof := z.readn1eof()
  238. i, err := z.br.ReadByte()
  239. if err == io.EOF {
  240. return z.bufr
  241. }
  242. if err != nil {
  243. halt.onerror(err)
  244. }
  245. z.n++
  246. if isNumberChar(i) {
  247. z.bufr = append(z.bufr, i)
  248. goto LOOP
  249. }
  250. z.unreadn1()
  251. return z.bufr
  252. }
  253. func (z *ioDecReader) jsonReadAsisChars() (bs []byte) {
  254. z.bufr = z.bufr[:0]
  255. LOOP:
  256. i := z.readn1()
  257. z.bufr = append(z.bufr, i)
  258. if i == '"' || i == '\\' {
  259. return z.bufr
  260. }
  261. goto LOOP
  262. }
  263. func (z *ioDecReader) skipWhitespace() (token byte) {
  264. LOOP:
  265. token = z.readn1()
  266. if isWhitespaceChar(token) {
  267. goto LOOP
  268. }
  269. return
  270. }
  271. // func (z *ioDecReader) readUntil(stop byte) []byte {
  272. // z.bufr = z.bufr[:0]
  273. // LOOP:
  274. // token := z.readn1()
  275. // z.bufr = append(z.bufr, token)
  276. // if token == stop {
  277. // return z.bufr[:len(z.bufr)-1]
  278. // }
  279. // goto LOOP
  280. // }
  281. func (z *ioDecReader) readUntil(stop byte) []byte {
  282. z.bufr = z.bufr[:0]
  283. LOOP:
  284. token := z.readn1()
  285. if token == stop {
  286. return z.bufr
  287. }
  288. z.bufr = append(z.bufr, token)
  289. goto LOOP
  290. }
  291. func (z *ioDecReader) unreadn1() {
  292. err := z.br.UnreadByte()
  293. halt.onerror(err)
  294. z.n--
  295. }
  296. // ------------------------------------
  297. // bytesDecReader is a decReader that reads off a byte slice with zero copying
  298. //
  299. // Note: we do not try to convert index'ing out of bounds to an io.EOF.
  300. // instead, we let it bubble up to the exported Encode/Decode method
  301. // and recover it as an io.EOF.
  302. //
  303. // Every function here MUST defensively check bounds either explicitly
  304. // or via a bounds check.
  305. //
  306. // see panicValToErr(...) function in helper.go.
  307. type bytesDecReader struct {
  308. b []byte // data
  309. c uint // cursor
  310. }
  311. func (z *bytesDecReader) reset(in []byte) {
  312. z.b = in[:len(in):len(in)] // reslicing must not go past capacity
  313. z.c = 0
  314. }
  315. func (z *bytesDecReader) numread() uint {
  316. return z.c
  317. }
  318. // Note: slicing from a non-constant start position is more expensive,
  319. // as more computation is required to decipher the pointer start position.
  320. // However, we do it only once, and it's better than reslicing both z.b and return value.
  321. func (z *bytesDecReader) readx(n uint) (bs []byte) {
  322. // x := z.c + n
  323. // bs = z.b[z.c:x]
  324. // z.c = x
  325. bs = z.b[z.c : z.c+n]
  326. z.c += n
  327. return
  328. }
  329. func (z *bytesDecReader) readb(bs []byte) {
  330. copy(bs, z.readx(uint(len(bs))))
  331. }
  332. // MARKER: do not use this - as it calls into memmove (as the size of data to move is unknown)
  333. // func (z *bytesDecReader) readnn(bs []byte, n uint) {
  334. // x := z.c
  335. // copy(bs, z.b[x:x+n])
  336. // z.c += n
  337. // }
  338. // func (z *bytesDecReader) readn(num uint8) (bs [8]byte) {
  339. // x := z.c + uint(num)
  340. // copy(bs[:], z.b[z.c:x]) // slice z.b completely, so we get bounds error if past
  341. // z.c = x
  342. // return
  343. // }
  344. // func (z *bytesDecReader) readn1() uint8 {
  345. // z.c++
  346. // return z.b[z.c-1]
  347. // }
  348. // MARKER: readn{1,2,3,4,8} should throw an out of bounds error if past length.
  349. // MARKER: readn1: explicitly ensure bounds check is done
  350. // MARKER: readn{2,3,4,8}: ensure you slice z.b completely so we get bounds error if past end.
  351. func (z *bytesDecReader) readn1() (v uint8) {
  352. v = z.b[z.c]
  353. z.c++
  354. return
  355. }
  356. func (z *bytesDecReader) readn2() (bs [2]byte) {
  357. // copy(bs[:], z.b[z.c:z.c+2])
  358. // bs[1] = z.b[z.c+1]
  359. // bs[0] = z.b[z.c]
  360. bs = okBytes2(z.b[z.c : z.c+2])
  361. z.c += 2
  362. return
  363. }
  364. func (z *bytesDecReader) readn3() (bs [3]byte) {
  365. // copy(bs[1:], z.b[z.c:z.c+3])
  366. bs = okBytes3(z.b[z.c : z.c+3])
  367. z.c += 3
  368. return
  369. }
  370. func (z *bytesDecReader) readn4() (bs [4]byte) {
  371. // copy(bs[:], z.b[z.c:z.c+4])
  372. bs = okBytes4(z.b[z.c : z.c+4])
  373. z.c += 4
  374. return
  375. }
  376. func (z *bytesDecReader) readn8() (bs [8]byte) {
  377. // copy(bs[:], z.b[z.c:z.c+8])
  378. bs = okBytes8(z.b[z.c : z.c+8])
  379. z.c += 8
  380. return
  381. }
  382. func (z *bytesDecReader) jsonReadNum() []byte {
  383. z.c-- // unread
  384. i := z.c
  385. LOOP:
  386. // gracefully handle end of slice, as end of stream is meaningful here
  387. if i < uint(len(z.b)) && isNumberChar(z.b[i]) {
  388. i++
  389. goto LOOP
  390. }
  391. z.c, i = i, z.c
  392. // MARKER: 20230103: byteSliceOf here prevents inlining of jsonReadNum
  393. // return byteSliceOf(z.b, i, z.c)
  394. return z.b[i:z.c]
  395. }
  396. func (z *bytesDecReader) jsonReadAsisChars() []byte {
  397. i := z.c
  398. LOOP:
  399. token := z.b[i]
  400. i++
  401. if token == '"' || token == '\\' {
  402. z.c, i = i, z.c
  403. return byteSliceOf(z.b, i, z.c)
  404. // return z.b[i:z.c]
  405. }
  406. goto LOOP
  407. }
  408. func (z *bytesDecReader) skipWhitespace() (token byte) {
  409. i := z.c
  410. LOOP:
  411. token = z.b[i]
  412. if isWhitespaceChar(token) {
  413. i++
  414. goto LOOP
  415. }
  416. z.c = i + 1
  417. return
  418. }
  419. func (z *bytesDecReader) readUntil(stop byte) (out []byte) {
  420. i := z.c
  421. LOOP:
  422. if z.b[i] == stop {
  423. out = byteSliceOf(z.b, z.c, i)
  424. // out = z.b[z.c:i]
  425. z.c = i + 1
  426. return
  427. }
  428. i++
  429. goto LOOP
  430. }
  431. // --------------
  432. type decRd struct {
  433. rb bytesDecReader
  434. ri *ioDecReader
  435. decReader
  436. bytes bool // is bytes reader
  437. // MARKER: these fields below should belong directly in Encoder.
  438. // we pack them here for space efficiency and cache-line optimization.
  439. mtr bool // is maptype a known type?
  440. str bool // is slicetype a known type?
  441. be bool // is binary encoding
  442. js bool // is json handle
  443. jsms bool // is json handle, and MapKeyAsString
  444. cbor bool // is cbor handle
  445. cbreak bool // is a check breaker
  446. }
  447. // From out benchmarking, we see the following impact performance:
  448. //
  449. // - functions that are too big to inline
  450. // - interface calls (as no inlining can occur)
  451. //
  452. // decRd is designed to embed a decReader, and then re-implement some of the decReader
  453. // methods using a conditional branch.
  454. //
  455. // We only override the ones where the bytes version is inlined AND the wrapper method
  456. // (containing the bytes version alongside a conditional branch) is also inlined.
  457. //
  458. // We use ./run.sh -z to check.
  459. //
  460. // Right now, only numread and "carefully crafted" readn1 can be inlined.
  461. func (z *decRd) numread() uint {
  462. if z.bytes {
  463. return z.rb.numread()
  464. }
  465. return z.ri.numread()
  466. }
  467. func (z *decRd) readn1() (v uint8) {
  468. if z.bytes {
  469. // return z.rb.readn1()
  470. // MARKER: calling z.rb.readn1() prevents decRd.readn1 from being inlined.
  471. // copy code, to manually inline and explicitly return here.
  472. // Keep in sync with bytesDecReader.readn1
  473. v = z.rb.b[z.rb.c]
  474. z.rb.c++
  475. return
  476. }
  477. return z.ri.readn1()
  478. }
  479. // func (z *decRd) readn4() [4]byte {
  480. // if z.bytes {
  481. // return z.rb.readn4()
  482. // }
  483. // return z.ri.readn4()
  484. // }
  485. // func (z *decRd) readn3() [3]byte {
  486. // if z.bytes {
  487. // return z.rb.readn3()
  488. // }
  489. // return z.ri.readn3()
  490. // }
  491. // func (z *decRd) skipWhitespace() byte {
  492. // if z.bytes {
  493. // return z.rb.skipWhitespace()
  494. // }
  495. // return z.ri.skipWhitespace()
  496. // }
  497. type devNullReader struct{}
  498. func (devNullReader) Read(p []byte) (int, error) { return 0, io.EOF }
  499. func (devNullReader) Close() error { return nil }
  500. func readFull(r io.Reader, bs []byte) (n uint, err error) {
  501. var nn int
  502. for n < uint(len(bs)) && err == nil {
  503. nn, err = r.Read(bs[n:])
  504. if nn > 0 {
  505. if err == io.EOF {
  506. // leave EOF for next time
  507. err = nil
  508. }
  509. n += uint(nn)
  510. }
  511. }
  512. // do not do this below - it serves no purpose
  513. // if n != len(bs) && err == io.EOF { err = io.ErrUnexpectedEOF }
  514. return
  515. }
  516. var _ decReader = (*decRd)(nil)