parser.go 26 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773774775776777778779780781782783784785786787788789790791792793794795796797798799800801802803804805806807808809810811812813814815816817818819820821822823824825826827828829830831832833834835836837838839840841842843844845846847848849850851852853854855856857858859860861862863864865866867868869870871872873874875876877878879880881882883884885886887888889890891892893894895896897898899900901902903904905906907908909910911912913914915916917918919920921922923924925926927928929930931932933934935936937938939940941942943944945946947948949950951952953954955956957958959960961962963964965966967968969970971972973974975976977978979980981982983984985986987988989990991992993994995996997998999100010011002100310041005100610071008100910101011101210131014101510161017101810191020102110221023102410251026102710281029103010311032103310341035103610371038103910401041104210431044104510461047104810491050105110521053105410551056105710581059106010611062106310641065106610671068106910701071107210731074107510761077107810791080108110821083108410851086108710881089109010911092109310941095109610971098109911001101110211031104110511061107110811091110111111121113111411151116111711181119112011211122112311241125112611271128112911301131113211331134113511361137113811391140114111421143114411451146114711481149115011511152115311541155115611571158115911601161116211631164116511661167116811691170117111721173117411751176117711781179118011811182118311841185118611871188118911901191119211931194119511961197119811991200120112021203120412051206120712081209121012111212121312141215121612171218121912201221122212231224122512261227122812291230123112321233123412351236123712381239124012411242124312441245
  1. package unstable
  2. import (
  3. "bytes"
  4. "fmt"
  5. "unicode"
  6. "github.com/pelletier/go-toml/v2/internal/characters"
  7. "github.com/pelletier/go-toml/v2/internal/danger"
  8. )
  9. // ParserError describes an error relative to the content of the document.
  10. //
  11. // It cannot outlive the instance of Parser it refers to, and may cause panics
  12. // if the parser is reset.
  13. type ParserError struct {
  14. Highlight []byte
  15. Message string
  16. Key []string // optional
  17. }
  18. // Error is the implementation of the error interface.
  19. func (e *ParserError) Error() string {
  20. return e.Message
  21. }
  22. // NewParserError is a convenience function to create a ParserError
  23. //
  24. // Warning: Highlight needs to be a subslice of Parser.data, so only slices
  25. // returned by Parser.Raw are valid candidates.
  26. func NewParserError(highlight []byte, format string, args ...interface{}) error {
  27. return &ParserError{
  28. Highlight: highlight,
  29. Message: fmt.Errorf(format, args...).Error(),
  30. }
  31. }
  32. // Parser scans over a TOML-encoded document and generates an iterative AST.
  33. //
  34. // To prime the Parser, first reset it with the contents of a TOML document.
  35. // Then, process all top-level expressions sequentially. See Example.
  36. //
  37. // Don't forget to check Error() after you're done parsing.
  38. //
  39. // Each top-level expression needs to be fully processed before calling
  40. // NextExpression() again. Otherwise, calls to various Node methods may panic if
  41. // the parser has moved on the next expression.
  42. //
  43. // For performance reasons, go-toml doesn't make a copy of the input bytes to
  44. // the parser. Make sure to copy all the bytes you need to outlive the slice
  45. // given to the parser.
  46. type Parser struct {
  47. data []byte
  48. builder builder
  49. ref reference
  50. left []byte
  51. err error
  52. first bool
  53. KeepComments bool
  54. }
  55. // Data returns the slice provided to the last call to Reset.
  56. func (p *Parser) Data() []byte {
  57. return p.data
  58. }
  59. // Range returns a range description that corresponds to a given slice of the
  60. // input. If the argument is not a subslice of the parser input, this function
  61. // panics.
  62. func (p *Parser) Range(b []byte) Range {
  63. return Range{
  64. Offset: uint32(danger.SubsliceOffset(p.data, b)),
  65. Length: uint32(len(b)),
  66. }
  67. }
  68. // Raw returns the slice corresponding to the bytes in the given range.
  69. func (p *Parser) Raw(raw Range) []byte {
  70. return p.data[raw.Offset : raw.Offset+raw.Length]
  71. }
  72. // Reset brings the parser to its initial state for a given input. It wipes an
  73. // reuses internal storage to reduce allocation.
  74. func (p *Parser) Reset(b []byte) {
  75. p.builder.Reset()
  76. p.ref = invalidReference
  77. p.data = b
  78. p.left = b
  79. p.err = nil
  80. p.first = true
  81. }
  82. // NextExpression parses the next top-level expression. If an expression was
  83. // successfully parsed, it returns true. If the parser is at the end of the
  84. // document or an error occurred, it returns false.
  85. //
  86. // Retrieve the parsed expression with Expression().
  87. func (p *Parser) NextExpression() bool {
  88. if len(p.left) == 0 || p.err != nil {
  89. return false
  90. }
  91. p.builder.Reset()
  92. p.ref = invalidReference
  93. for {
  94. if len(p.left) == 0 || p.err != nil {
  95. return false
  96. }
  97. if !p.first {
  98. p.left, p.err = p.parseNewline(p.left)
  99. }
  100. if len(p.left) == 0 || p.err != nil {
  101. return false
  102. }
  103. p.ref, p.left, p.err = p.parseExpression(p.left)
  104. if p.err != nil {
  105. return false
  106. }
  107. p.first = false
  108. if p.ref.Valid() {
  109. return true
  110. }
  111. }
  112. }
  113. // Expression returns a pointer to the node representing the last successfully
  114. // parsed expression.
  115. func (p *Parser) Expression() *Node {
  116. return p.builder.NodeAt(p.ref)
  117. }
  118. // Error returns any error that has occurred during parsing.
  119. func (p *Parser) Error() error {
  120. return p.err
  121. }
  122. // Position describes a position in the input.
  123. type Position struct {
  124. // Number of bytes from the beginning of the input.
  125. Offset int
  126. // Line number, starting at 1.
  127. Line int
  128. // Column number, starting at 1.
  129. Column int
  130. }
  131. // Shape describes the position of a range in the input.
  132. type Shape struct {
  133. Start Position
  134. End Position
  135. }
  136. func (p *Parser) position(b []byte) Position {
  137. offset := danger.SubsliceOffset(p.data, b)
  138. lead := p.data[:offset]
  139. return Position{
  140. Offset: offset,
  141. Line: bytes.Count(lead, []byte{'\n'}) + 1,
  142. Column: len(lead) - bytes.LastIndex(lead, []byte{'\n'}),
  143. }
  144. }
  145. // Shape returns the shape of the given range in the input. Will
  146. // panic if the range is not a subslice of the input.
  147. func (p *Parser) Shape(r Range) Shape {
  148. raw := p.Raw(r)
  149. return Shape{
  150. Start: p.position(raw),
  151. End: p.position(raw[r.Length:]),
  152. }
  153. }
  154. func (p *Parser) parseNewline(b []byte) ([]byte, error) {
  155. if b[0] == '\n' {
  156. return b[1:], nil
  157. }
  158. if b[0] == '\r' {
  159. _, rest, err := scanWindowsNewline(b)
  160. return rest, err
  161. }
  162. return nil, NewParserError(b[0:1], "expected newline but got %#U", b[0])
  163. }
  164. func (p *Parser) parseComment(b []byte) (reference, []byte, error) {
  165. ref := invalidReference
  166. data, rest, err := scanComment(b)
  167. if p.KeepComments && err == nil {
  168. ref = p.builder.Push(Node{
  169. Kind: Comment,
  170. Raw: p.Range(data),
  171. Data: data,
  172. })
  173. }
  174. return ref, rest, err
  175. }
  176. func (p *Parser) parseExpression(b []byte) (reference, []byte, error) {
  177. // expression = ws [ comment ]
  178. // expression =/ ws keyval ws [ comment ]
  179. // expression =/ ws table ws [ comment ]
  180. ref := invalidReference
  181. b = p.parseWhitespace(b)
  182. if len(b) == 0 {
  183. return ref, b, nil
  184. }
  185. if b[0] == '#' {
  186. ref, rest, err := p.parseComment(b)
  187. return ref, rest, err
  188. }
  189. if b[0] == '\n' || b[0] == '\r' {
  190. return ref, b, nil
  191. }
  192. var err error
  193. if b[0] == '[' {
  194. ref, b, err = p.parseTable(b)
  195. } else {
  196. ref, b, err = p.parseKeyval(b)
  197. }
  198. if err != nil {
  199. return ref, nil, err
  200. }
  201. b = p.parseWhitespace(b)
  202. if len(b) > 0 && b[0] == '#' {
  203. cref, rest, err := p.parseComment(b)
  204. if cref != invalidReference {
  205. p.builder.Chain(ref, cref)
  206. }
  207. return ref, rest, err
  208. }
  209. return ref, b, nil
  210. }
  211. func (p *Parser) parseTable(b []byte) (reference, []byte, error) {
  212. // table = std-table / array-table
  213. if len(b) > 1 && b[1] == '[' {
  214. return p.parseArrayTable(b)
  215. }
  216. return p.parseStdTable(b)
  217. }
  218. func (p *Parser) parseArrayTable(b []byte) (reference, []byte, error) {
  219. // array-table = array-table-open key array-table-close
  220. // array-table-open = %x5B.5B ws ; [[ Double left square bracket
  221. // array-table-close = ws %x5D.5D ; ]] Double right square bracket
  222. ref := p.builder.Push(Node{
  223. Kind: ArrayTable,
  224. })
  225. b = b[2:]
  226. b = p.parseWhitespace(b)
  227. k, b, err := p.parseKey(b)
  228. if err != nil {
  229. return ref, nil, err
  230. }
  231. p.builder.AttachChild(ref, k)
  232. b = p.parseWhitespace(b)
  233. b, err = expect(']', b)
  234. if err != nil {
  235. return ref, nil, err
  236. }
  237. b, err = expect(']', b)
  238. return ref, b, err
  239. }
  240. func (p *Parser) parseStdTable(b []byte) (reference, []byte, error) {
  241. // std-table = std-table-open key std-table-close
  242. // std-table-open = %x5B ws ; [ Left square bracket
  243. // std-table-close = ws %x5D ; ] Right square bracket
  244. ref := p.builder.Push(Node{
  245. Kind: Table,
  246. })
  247. b = b[1:]
  248. b = p.parseWhitespace(b)
  249. key, b, err := p.parseKey(b)
  250. if err != nil {
  251. return ref, nil, err
  252. }
  253. p.builder.AttachChild(ref, key)
  254. b = p.parseWhitespace(b)
  255. b, err = expect(']', b)
  256. return ref, b, err
  257. }
  258. func (p *Parser) parseKeyval(b []byte) (reference, []byte, error) {
  259. // keyval = key keyval-sep val
  260. ref := p.builder.Push(Node{
  261. Kind: KeyValue,
  262. })
  263. key, b, err := p.parseKey(b)
  264. if err != nil {
  265. return invalidReference, nil, err
  266. }
  267. // keyval-sep = ws %x3D ws ; =
  268. b = p.parseWhitespace(b)
  269. if len(b) == 0 {
  270. return invalidReference, nil, NewParserError(b, "expected = after a key, but the document ends there")
  271. }
  272. b, err = expect('=', b)
  273. if err != nil {
  274. return invalidReference, nil, err
  275. }
  276. b = p.parseWhitespace(b)
  277. valRef, b, err := p.parseVal(b)
  278. if err != nil {
  279. return ref, b, err
  280. }
  281. p.builder.Chain(valRef, key)
  282. p.builder.AttachChild(ref, valRef)
  283. return ref, b, err
  284. }
  285. //nolint:cyclop,funlen
  286. func (p *Parser) parseVal(b []byte) (reference, []byte, error) {
  287. // val = string / boolean / array / inline-table / date-time / float / integer
  288. ref := invalidReference
  289. if len(b) == 0 {
  290. return ref, nil, NewParserError(b, "expected value, not eof")
  291. }
  292. var err error
  293. c := b[0]
  294. switch c {
  295. case '"':
  296. var raw []byte
  297. var v []byte
  298. if scanFollowsMultilineBasicStringDelimiter(b) {
  299. raw, v, b, err = p.parseMultilineBasicString(b)
  300. } else {
  301. raw, v, b, err = p.parseBasicString(b)
  302. }
  303. if err == nil {
  304. ref = p.builder.Push(Node{
  305. Kind: String,
  306. Raw: p.Range(raw),
  307. Data: v,
  308. })
  309. }
  310. return ref, b, err
  311. case '\'':
  312. var raw []byte
  313. var v []byte
  314. if scanFollowsMultilineLiteralStringDelimiter(b) {
  315. raw, v, b, err = p.parseMultilineLiteralString(b)
  316. } else {
  317. raw, v, b, err = p.parseLiteralString(b)
  318. }
  319. if err == nil {
  320. ref = p.builder.Push(Node{
  321. Kind: String,
  322. Raw: p.Range(raw),
  323. Data: v,
  324. })
  325. }
  326. return ref, b, err
  327. case 't':
  328. if !scanFollowsTrue(b) {
  329. return ref, nil, NewParserError(atmost(b, 4), "expected 'true'")
  330. }
  331. ref = p.builder.Push(Node{
  332. Kind: Bool,
  333. Data: b[:4],
  334. })
  335. return ref, b[4:], nil
  336. case 'f':
  337. if !scanFollowsFalse(b) {
  338. return ref, nil, NewParserError(atmost(b, 5), "expected 'false'")
  339. }
  340. ref = p.builder.Push(Node{
  341. Kind: Bool,
  342. Data: b[:5],
  343. })
  344. return ref, b[5:], nil
  345. case '[':
  346. return p.parseValArray(b)
  347. case '{':
  348. return p.parseInlineTable(b)
  349. default:
  350. return p.parseIntOrFloatOrDateTime(b)
  351. }
  352. }
  353. func atmost(b []byte, n int) []byte {
  354. if n >= len(b) {
  355. return b
  356. }
  357. return b[:n]
  358. }
  359. func (p *Parser) parseLiteralString(b []byte) ([]byte, []byte, []byte, error) {
  360. v, rest, err := scanLiteralString(b)
  361. if err != nil {
  362. return nil, nil, nil, err
  363. }
  364. return v, v[1 : len(v)-1], rest, nil
  365. }
  366. func (p *Parser) parseInlineTable(b []byte) (reference, []byte, error) {
  367. // inline-table = inline-table-open [ inline-table-keyvals ] inline-table-close
  368. // inline-table-open = %x7B ws ; {
  369. // inline-table-close = ws %x7D ; }
  370. // inline-table-sep = ws %x2C ws ; , Comma
  371. // inline-table-keyvals = keyval [ inline-table-sep inline-table-keyvals ]
  372. parent := p.builder.Push(Node{
  373. Kind: InlineTable,
  374. Raw: p.Range(b[:1]),
  375. })
  376. first := true
  377. var child reference
  378. b = b[1:]
  379. var err error
  380. for len(b) > 0 {
  381. previousB := b
  382. b = p.parseWhitespace(b)
  383. if len(b) == 0 {
  384. return parent, nil, NewParserError(previousB[:1], "inline table is incomplete")
  385. }
  386. if b[0] == '}' {
  387. break
  388. }
  389. if !first {
  390. b, err = expect(',', b)
  391. if err != nil {
  392. return parent, nil, err
  393. }
  394. b = p.parseWhitespace(b)
  395. }
  396. var kv reference
  397. kv, b, err = p.parseKeyval(b)
  398. if err != nil {
  399. return parent, nil, err
  400. }
  401. if first {
  402. p.builder.AttachChild(parent, kv)
  403. } else {
  404. p.builder.Chain(child, kv)
  405. }
  406. child = kv
  407. first = false
  408. }
  409. rest, err := expect('}', b)
  410. return parent, rest, err
  411. }
  412. //nolint:funlen,cyclop
  413. func (p *Parser) parseValArray(b []byte) (reference, []byte, error) {
  414. // array = array-open [ array-values ] ws-comment-newline array-close
  415. // array-open = %x5B ; [
  416. // array-close = %x5D ; ]
  417. // array-values = ws-comment-newline val ws-comment-newline array-sep array-values
  418. // array-values =/ ws-comment-newline val ws-comment-newline [ array-sep ]
  419. // array-sep = %x2C ; , Comma
  420. // ws-comment-newline = *( wschar / [ comment ] newline )
  421. arrayStart := b
  422. b = b[1:]
  423. parent := p.builder.Push(Node{
  424. Kind: Array,
  425. })
  426. // First indicates whether the parser is looking for the first element
  427. // (non-comment) of the array.
  428. first := true
  429. lastChild := invalidReference
  430. addChild := func(valueRef reference) {
  431. if lastChild == invalidReference {
  432. p.builder.AttachChild(parent, valueRef)
  433. } else {
  434. p.builder.Chain(lastChild, valueRef)
  435. }
  436. lastChild = valueRef
  437. }
  438. var err error
  439. for len(b) > 0 {
  440. cref := invalidReference
  441. cref, b, err = p.parseOptionalWhitespaceCommentNewline(b)
  442. if err != nil {
  443. return parent, nil, err
  444. }
  445. if cref != invalidReference {
  446. addChild(cref)
  447. }
  448. if len(b) == 0 {
  449. return parent, nil, NewParserError(arrayStart[:1], "array is incomplete")
  450. }
  451. if b[0] == ']' {
  452. break
  453. }
  454. if b[0] == ',' {
  455. if first {
  456. return parent, nil, NewParserError(b[0:1], "array cannot start with comma")
  457. }
  458. b = b[1:]
  459. cref, b, err = p.parseOptionalWhitespaceCommentNewline(b)
  460. if err != nil {
  461. return parent, nil, err
  462. }
  463. if cref != invalidReference {
  464. addChild(cref)
  465. }
  466. } else if !first {
  467. return parent, nil, NewParserError(b[0:1], "array elements must be separated by commas")
  468. }
  469. // TOML allows trailing commas in arrays.
  470. if len(b) > 0 && b[0] == ']' {
  471. break
  472. }
  473. var valueRef reference
  474. valueRef, b, err = p.parseVal(b)
  475. if err != nil {
  476. return parent, nil, err
  477. }
  478. addChild(valueRef)
  479. cref, b, err = p.parseOptionalWhitespaceCommentNewline(b)
  480. if err != nil {
  481. return parent, nil, err
  482. }
  483. if cref != invalidReference {
  484. addChild(cref)
  485. }
  486. first = false
  487. }
  488. rest, err := expect(']', b)
  489. return parent, rest, err
  490. }
  491. func (p *Parser) parseOptionalWhitespaceCommentNewline(b []byte) (reference, []byte, error) {
  492. rootCommentRef := invalidReference
  493. latestCommentRef := invalidReference
  494. addComment := func(ref reference) {
  495. if rootCommentRef == invalidReference {
  496. rootCommentRef = ref
  497. } else if latestCommentRef == invalidReference {
  498. p.builder.AttachChild(rootCommentRef, ref)
  499. latestCommentRef = ref
  500. } else {
  501. p.builder.Chain(latestCommentRef, ref)
  502. latestCommentRef = ref
  503. }
  504. }
  505. for len(b) > 0 {
  506. var err error
  507. b = p.parseWhitespace(b)
  508. if len(b) > 0 && b[0] == '#' {
  509. var ref reference
  510. ref, b, err = p.parseComment(b)
  511. if err != nil {
  512. return invalidReference, nil, err
  513. }
  514. if ref != invalidReference {
  515. addComment(ref)
  516. }
  517. }
  518. if len(b) == 0 {
  519. break
  520. }
  521. if b[0] == '\n' || b[0] == '\r' {
  522. b, err = p.parseNewline(b)
  523. if err != nil {
  524. return invalidReference, nil, err
  525. }
  526. } else {
  527. break
  528. }
  529. }
  530. return rootCommentRef, b, nil
  531. }
  532. func (p *Parser) parseMultilineLiteralString(b []byte) ([]byte, []byte, []byte, error) {
  533. token, rest, err := scanMultilineLiteralString(b)
  534. if err != nil {
  535. return nil, nil, nil, err
  536. }
  537. i := 3
  538. // skip the immediate new line
  539. if token[i] == '\n' {
  540. i++
  541. } else if token[i] == '\r' && token[i+1] == '\n' {
  542. i += 2
  543. }
  544. return token, token[i : len(token)-3], rest, err
  545. }
  546. //nolint:funlen,gocognit,cyclop
  547. func (p *Parser) parseMultilineBasicString(b []byte) ([]byte, []byte, []byte, error) {
  548. // ml-basic-string = ml-basic-string-delim [ newline ] ml-basic-body
  549. // ml-basic-string-delim
  550. // ml-basic-string-delim = 3quotation-mark
  551. // ml-basic-body = *mlb-content *( mlb-quotes 1*mlb-content ) [ mlb-quotes ]
  552. //
  553. // mlb-content = mlb-char / newline / mlb-escaped-nl
  554. // mlb-char = mlb-unescaped / escaped
  555. // mlb-quotes = 1*2quotation-mark
  556. // mlb-unescaped = wschar / %x21 / %x23-5B / %x5D-7E / non-ascii
  557. // mlb-escaped-nl = escape ws newline *( wschar / newline )
  558. token, escaped, rest, err := scanMultilineBasicString(b)
  559. if err != nil {
  560. return nil, nil, nil, err
  561. }
  562. i := 3
  563. // skip the immediate new line
  564. if token[i] == '\n' {
  565. i++
  566. } else if token[i] == '\r' && token[i+1] == '\n' {
  567. i += 2
  568. }
  569. // fast path
  570. startIdx := i
  571. endIdx := len(token) - len(`"""`)
  572. if !escaped {
  573. str := token[startIdx:endIdx]
  574. verr := characters.Utf8TomlValidAlreadyEscaped(str)
  575. if verr.Zero() {
  576. return token, str, rest, nil
  577. }
  578. return nil, nil, nil, NewParserError(str[verr.Index:verr.Index+verr.Size], "invalid UTF-8")
  579. }
  580. var builder bytes.Buffer
  581. // The scanner ensures that the token starts and ends with quotes and that
  582. // escapes are balanced.
  583. for i < len(token)-3 {
  584. c := token[i]
  585. //nolint:nestif
  586. if c == '\\' {
  587. // When the last non-whitespace character on a line is an unescaped \,
  588. // it will be trimmed along with all whitespace (including newlines) up
  589. // to the next non-whitespace character or closing delimiter.
  590. isLastNonWhitespaceOnLine := false
  591. j := 1
  592. findEOLLoop:
  593. for ; j < len(token)-3-i; j++ {
  594. switch token[i+j] {
  595. case ' ', '\t':
  596. continue
  597. case '\r':
  598. if token[i+j+1] == '\n' {
  599. continue
  600. }
  601. case '\n':
  602. isLastNonWhitespaceOnLine = true
  603. }
  604. break findEOLLoop
  605. }
  606. if isLastNonWhitespaceOnLine {
  607. i += j
  608. for ; i < len(token)-3; i++ {
  609. c := token[i]
  610. if !(c == '\n' || c == '\r' || c == ' ' || c == '\t') {
  611. i--
  612. break
  613. }
  614. }
  615. i++
  616. continue
  617. }
  618. // handle escaping
  619. i++
  620. c = token[i]
  621. switch c {
  622. case '"', '\\':
  623. builder.WriteByte(c)
  624. case 'b':
  625. builder.WriteByte('\b')
  626. case 'f':
  627. builder.WriteByte('\f')
  628. case 'n':
  629. builder.WriteByte('\n')
  630. case 'r':
  631. builder.WriteByte('\r')
  632. case 't':
  633. builder.WriteByte('\t')
  634. case 'e':
  635. builder.WriteByte(0x1B)
  636. case 'u':
  637. x, err := hexToRune(atmost(token[i+1:], 4), 4)
  638. if err != nil {
  639. return nil, nil, nil, err
  640. }
  641. builder.WriteRune(x)
  642. i += 4
  643. case 'U':
  644. x, err := hexToRune(atmost(token[i+1:], 8), 8)
  645. if err != nil {
  646. return nil, nil, nil, err
  647. }
  648. builder.WriteRune(x)
  649. i += 8
  650. default:
  651. return nil, nil, nil, NewParserError(token[i:i+1], "invalid escaped character %#U", c)
  652. }
  653. i++
  654. } else {
  655. size := characters.Utf8ValidNext(token[i:])
  656. if size == 0 {
  657. return nil, nil, nil, NewParserError(token[i:i+1], "invalid character %#U", c)
  658. }
  659. builder.Write(token[i : i+size])
  660. i += size
  661. }
  662. }
  663. return token, builder.Bytes(), rest, nil
  664. }
  665. func (p *Parser) parseKey(b []byte) (reference, []byte, error) {
  666. // key = simple-key / dotted-key
  667. // simple-key = quoted-key / unquoted-key
  668. //
  669. // unquoted-key = 1*( ALPHA / DIGIT / %x2D / %x5F ) ; A-Z / a-z / 0-9 / - / _
  670. // quoted-key = basic-string / literal-string
  671. // dotted-key = simple-key 1*( dot-sep simple-key )
  672. //
  673. // dot-sep = ws %x2E ws ; . Period
  674. raw, key, b, err := p.parseSimpleKey(b)
  675. if err != nil {
  676. return invalidReference, nil, err
  677. }
  678. ref := p.builder.Push(Node{
  679. Kind: Key,
  680. Raw: p.Range(raw),
  681. Data: key,
  682. })
  683. for {
  684. b = p.parseWhitespace(b)
  685. if len(b) > 0 && b[0] == '.' {
  686. b = p.parseWhitespace(b[1:])
  687. raw, key, b, err = p.parseSimpleKey(b)
  688. if err != nil {
  689. return ref, nil, err
  690. }
  691. p.builder.PushAndChain(Node{
  692. Kind: Key,
  693. Raw: p.Range(raw),
  694. Data: key,
  695. })
  696. } else {
  697. break
  698. }
  699. }
  700. return ref, b, nil
  701. }
  702. func (p *Parser) parseSimpleKey(b []byte) (raw, key, rest []byte, err error) {
  703. if len(b) == 0 {
  704. return nil, nil, nil, NewParserError(b, "expected key but found none")
  705. }
  706. // simple-key = quoted-key / unquoted-key
  707. // unquoted-key = 1*( ALPHA / DIGIT / %x2D / %x5F ) ; A-Z / a-z / 0-9 / - / _
  708. // quoted-key = basic-string / literal-string
  709. switch {
  710. case b[0] == '\'':
  711. return p.parseLiteralString(b)
  712. case b[0] == '"':
  713. return p.parseBasicString(b)
  714. case isUnquotedKeyChar(b[0]):
  715. key, rest = scanUnquotedKey(b)
  716. return key, key, rest, nil
  717. default:
  718. return nil, nil, nil, NewParserError(b[0:1], "invalid character at start of key: %c", b[0])
  719. }
  720. }
  721. //nolint:funlen,cyclop
  722. func (p *Parser) parseBasicString(b []byte) ([]byte, []byte, []byte, error) {
  723. // basic-string = quotation-mark *basic-char quotation-mark
  724. // quotation-mark = %x22 ; "
  725. // basic-char = basic-unescaped / escaped
  726. // basic-unescaped = wschar / %x21 / %x23-5B / %x5D-7E / non-ascii
  727. // escaped = escape escape-seq-char
  728. // escape-seq-char = %x22 ; " quotation mark U+0022
  729. // escape-seq-char =/ %x5C ; \ reverse solidus U+005C
  730. // escape-seq-char =/ %x62 ; b backspace U+0008
  731. // escape-seq-char =/ %x66 ; f form feed U+000C
  732. // escape-seq-char =/ %x6E ; n line feed U+000A
  733. // escape-seq-char =/ %x72 ; r carriage return U+000D
  734. // escape-seq-char =/ %x74 ; t tab U+0009
  735. // escape-seq-char =/ %x75 4HEXDIG ; uXXXX U+XXXX
  736. // escape-seq-char =/ %x55 8HEXDIG ; UXXXXXXXX U+XXXXXXXX
  737. token, escaped, rest, err := scanBasicString(b)
  738. if err != nil {
  739. return nil, nil, nil, err
  740. }
  741. startIdx := len(`"`)
  742. endIdx := len(token) - len(`"`)
  743. // Fast path. If there is no escape sequence, the string should just be
  744. // an UTF-8 encoded string, which is the same as Go. In that case,
  745. // validate the string and return a direct reference to the buffer.
  746. if !escaped {
  747. str := token[startIdx:endIdx]
  748. verr := characters.Utf8TomlValidAlreadyEscaped(str)
  749. if verr.Zero() {
  750. return token, str, rest, nil
  751. }
  752. return nil, nil, nil, NewParserError(str[verr.Index:verr.Index+verr.Size], "invalid UTF-8")
  753. }
  754. i := startIdx
  755. var builder bytes.Buffer
  756. // The scanner ensures that the token starts and ends with quotes and that
  757. // escapes are balanced.
  758. for i < len(token)-1 {
  759. c := token[i]
  760. if c == '\\' {
  761. i++
  762. c = token[i]
  763. switch c {
  764. case '"', '\\':
  765. builder.WriteByte(c)
  766. case 'b':
  767. builder.WriteByte('\b')
  768. case 'f':
  769. builder.WriteByte('\f')
  770. case 'n':
  771. builder.WriteByte('\n')
  772. case 'r':
  773. builder.WriteByte('\r')
  774. case 't':
  775. builder.WriteByte('\t')
  776. case 'e':
  777. builder.WriteByte(0x1B)
  778. case 'u':
  779. x, err := hexToRune(token[i+1:len(token)-1], 4)
  780. if err != nil {
  781. return nil, nil, nil, err
  782. }
  783. builder.WriteRune(x)
  784. i += 4
  785. case 'U':
  786. x, err := hexToRune(token[i+1:len(token)-1], 8)
  787. if err != nil {
  788. return nil, nil, nil, err
  789. }
  790. builder.WriteRune(x)
  791. i += 8
  792. default:
  793. return nil, nil, nil, NewParserError(token[i:i+1], "invalid escaped character %#U", c)
  794. }
  795. i++
  796. } else {
  797. size := characters.Utf8ValidNext(token[i:])
  798. if size == 0 {
  799. return nil, nil, nil, NewParserError(token[i:i+1], "invalid character %#U", c)
  800. }
  801. builder.Write(token[i : i+size])
  802. i += size
  803. }
  804. }
  805. return token, builder.Bytes(), rest, nil
  806. }
  807. func hexToRune(b []byte, length int) (rune, error) {
  808. if len(b) < length {
  809. return -1, NewParserError(b, "unicode point needs %d character, not %d", length, len(b))
  810. }
  811. b = b[:length]
  812. var r uint32
  813. for i, c := range b {
  814. d := uint32(0)
  815. switch {
  816. case '0' <= c && c <= '9':
  817. d = uint32(c - '0')
  818. case 'a' <= c && c <= 'f':
  819. d = uint32(c - 'a' + 10)
  820. case 'A' <= c && c <= 'F':
  821. d = uint32(c - 'A' + 10)
  822. default:
  823. return -1, NewParserError(b[i:i+1], "non-hex character")
  824. }
  825. r = r*16 + d
  826. }
  827. if r > unicode.MaxRune || 0xD800 <= r && r < 0xE000 {
  828. return -1, NewParserError(b, "escape sequence is invalid Unicode code point")
  829. }
  830. return rune(r), nil
  831. }
  832. func (p *Parser) parseWhitespace(b []byte) []byte {
  833. // ws = *wschar
  834. // wschar = %x20 ; Space
  835. // wschar =/ %x09 ; Horizontal tab
  836. _, rest := scanWhitespace(b)
  837. return rest
  838. }
  839. //nolint:cyclop
  840. func (p *Parser) parseIntOrFloatOrDateTime(b []byte) (reference, []byte, error) {
  841. switch b[0] {
  842. case 'i':
  843. if !scanFollowsInf(b) {
  844. return invalidReference, nil, NewParserError(atmost(b, 3), "expected 'inf'")
  845. }
  846. return p.builder.Push(Node{
  847. Kind: Float,
  848. Data: b[:3],
  849. Raw: p.Range(b[:3]),
  850. }), b[3:], nil
  851. case 'n':
  852. if !scanFollowsNan(b) {
  853. return invalidReference, nil, NewParserError(atmost(b, 3), "expected 'nan'")
  854. }
  855. return p.builder.Push(Node{
  856. Kind: Float,
  857. Data: b[:3],
  858. Raw: p.Range(b[:3]),
  859. }), b[3:], nil
  860. case '+', '-':
  861. return p.scanIntOrFloat(b)
  862. }
  863. if len(b) < 3 {
  864. return p.scanIntOrFloat(b)
  865. }
  866. s := 5
  867. if len(b) < s {
  868. s = len(b)
  869. }
  870. for idx, c := range b[:s] {
  871. if isDigit(c) {
  872. continue
  873. }
  874. if idx == 2 && c == ':' || (idx == 4 && c == '-') {
  875. return p.scanDateTime(b)
  876. }
  877. break
  878. }
  879. return p.scanIntOrFloat(b)
  880. }
  881. func (p *Parser) scanDateTime(b []byte) (reference, []byte, error) {
  882. // scans for contiguous characters in [0-9T:Z.+-], and up to one space if
  883. // followed by a digit.
  884. hasDate := false
  885. hasTime := false
  886. hasTz := false
  887. seenSpace := false
  888. i := 0
  889. byteLoop:
  890. for ; i < len(b); i++ {
  891. c := b[i]
  892. switch {
  893. case isDigit(c):
  894. case c == '-':
  895. hasDate = true
  896. const minOffsetOfTz = 8
  897. if i >= minOffsetOfTz {
  898. hasTz = true
  899. }
  900. case c == 'T' || c == 't' || c == ':' || c == '.':
  901. hasTime = true
  902. case c == '+' || c == '-' || c == 'Z' || c == 'z':
  903. hasTz = true
  904. case c == ' ':
  905. if !seenSpace && i+1 < len(b) && isDigit(b[i+1]) {
  906. i += 2
  907. // Avoid reaching past the end of the document in case the time
  908. // is malformed. See TestIssue585.
  909. if i >= len(b) {
  910. i--
  911. }
  912. seenSpace = true
  913. hasTime = true
  914. } else {
  915. break byteLoop
  916. }
  917. default:
  918. break byteLoop
  919. }
  920. }
  921. var kind Kind
  922. if hasTime {
  923. if hasDate {
  924. if hasTz {
  925. kind = DateTime
  926. } else {
  927. kind = LocalDateTime
  928. }
  929. } else {
  930. kind = LocalTime
  931. }
  932. } else {
  933. kind = LocalDate
  934. }
  935. return p.builder.Push(Node{
  936. Kind: kind,
  937. Data: b[:i],
  938. }), b[i:], nil
  939. }
  940. //nolint:funlen,gocognit,cyclop
  941. func (p *Parser) scanIntOrFloat(b []byte) (reference, []byte, error) {
  942. i := 0
  943. if len(b) > 2 && b[0] == '0' && b[1] != '.' && b[1] != 'e' && b[1] != 'E' {
  944. var isValidRune validRuneFn
  945. switch b[1] {
  946. case 'x':
  947. isValidRune = isValidHexRune
  948. case 'o':
  949. isValidRune = isValidOctalRune
  950. case 'b':
  951. isValidRune = isValidBinaryRune
  952. default:
  953. i++
  954. }
  955. if isValidRune != nil {
  956. i += 2
  957. for ; i < len(b); i++ {
  958. if !isValidRune(b[i]) {
  959. break
  960. }
  961. }
  962. }
  963. return p.builder.Push(Node{
  964. Kind: Integer,
  965. Data: b[:i],
  966. Raw: p.Range(b[:i]),
  967. }), b[i:], nil
  968. }
  969. isFloat := false
  970. for ; i < len(b); i++ {
  971. c := b[i]
  972. if c >= '0' && c <= '9' || c == '+' || c == '-' || c == '_' {
  973. continue
  974. }
  975. if c == '.' || c == 'e' || c == 'E' {
  976. isFloat = true
  977. continue
  978. }
  979. if c == 'i' {
  980. if scanFollowsInf(b[i:]) {
  981. return p.builder.Push(Node{
  982. Kind: Float,
  983. Data: b[:i+3],
  984. Raw: p.Range(b[:i+3]),
  985. }), b[i+3:], nil
  986. }
  987. return invalidReference, nil, NewParserError(b[i:i+1], "unexpected character 'i' while scanning for a number")
  988. }
  989. if c == 'n' {
  990. if scanFollowsNan(b[i:]) {
  991. return p.builder.Push(Node{
  992. Kind: Float,
  993. Data: b[:i+3],
  994. Raw: p.Range(b[:i+3]),
  995. }), b[i+3:], nil
  996. }
  997. return invalidReference, nil, NewParserError(b[i:i+1], "unexpected character 'n' while scanning for a number")
  998. }
  999. break
  1000. }
  1001. if i == 0 {
  1002. return invalidReference, b, NewParserError(b, "incomplete number")
  1003. }
  1004. kind := Integer
  1005. if isFloat {
  1006. kind = Float
  1007. }
  1008. return p.builder.Push(Node{
  1009. Kind: kind,
  1010. Data: b[:i],
  1011. Raw: p.Range(b[:i]),
  1012. }), b[i:], nil
  1013. }
  1014. func isDigit(r byte) bool {
  1015. return r >= '0' && r <= '9'
  1016. }
  1017. type validRuneFn func(r byte) bool
  1018. func isValidHexRune(r byte) bool {
  1019. return r >= 'a' && r <= 'f' ||
  1020. r >= 'A' && r <= 'F' ||
  1021. r >= '0' && r <= '9' ||
  1022. r == '_'
  1023. }
  1024. func isValidOctalRune(r byte) bool {
  1025. return r >= '0' && r <= '7' || r == '_'
  1026. }
  1027. func isValidBinaryRune(r byte) bool {
  1028. return r == '0' || r == '1' || r == '_'
  1029. }
  1030. func expect(x byte, b []byte) ([]byte, error) {
  1031. if len(b) == 0 {
  1032. return nil, NewParserError(b, "expected character %c but the document ended here", x)
  1033. }
  1034. if b[0] != x {
  1035. return nil, NewParserError(b[0:1], "expected character %c", x)
  1036. }
  1037. return b[1:], nil
  1038. }