123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773774775776777778779780781782783784785786787788789790791792793794795796797798799800801802803804805806807808809810811812813814815816817818819820821822823824825826827828829830831832833834835836837838839840841842843844845846847848849850851852853854855856857858859860861862863864865866867868869870871872873874875876877878879880881882883884885886887888889890891892893894895896897898899900901902903904905906907908909910911912913914915916917918919920921922923924925926927928929930931932933934935936937938939940941942943944945946947948949950951952953954955956957958959960961962963964965966967968969970971972973974975976977978979980981982983984985986987988989990991992993994995996997998999100010011002100310041005100610071008100910101011101210131014101510161017101810191020102110221023102410251026102710281029103010311032103310341035103610371038103910401041104210431044104510461047104810491050105110521053105410551056105710581059106010611062106310641065106610671068106910701071107210731074107510761077107810791080108110821083108410851086108710881089109010911092109310941095109610971098109911001101110211031104110511061107110811091110111111121113111411151116111711181119112011211122112311241125112611271128112911301131113211331134113511361137113811391140114111421143114411451146114711481149115011511152115311541155115611571158115911601161116211631164116511661167116811691170117111721173117411751176117711781179118011811182118311841185118611871188118911901191119211931194119511961197119811991200120112021203120412051206120712081209121012111212121312141215121612171218121912201221122212231224122512261227122812291230123112321233123412351236123712381239124012411242124312441245 |
- package unstable
- import (
- "bytes"
- "fmt"
- "unicode"
- "github.com/pelletier/go-toml/v2/internal/characters"
- "github.com/pelletier/go-toml/v2/internal/danger"
- )
- // ParserError describes an error relative to the content of the document.
- //
- // It cannot outlive the instance of Parser it refers to, and may cause panics
- // if the parser is reset.
- type ParserError struct {
- Highlight []byte
- Message string
- Key []string // optional
- }
- // Error is the implementation of the error interface.
- func (e *ParserError) Error() string {
- return e.Message
- }
- // NewParserError is a convenience function to create a ParserError
- //
- // Warning: Highlight needs to be a subslice of Parser.data, so only slices
- // returned by Parser.Raw are valid candidates.
- func NewParserError(highlight []byte, format string, args ...interface{}) error {
- return &ParserError{
- Highlight: highlight,
- Message: fmt.Errorf(format, args...).Error(),
- }
- }
- // Parser scans over a TOML-encoded document and generates an iterative AST.
- //
- // To prime the Parser, first reset it with the contents of a TOML document.
- // Then, process all top-level expressions sequentially. See Example.
- //
- // Don't forget to check Error() after you're done parsing.
- //
- // Each top-level expression needs to be fully processed before calling
- // NextExpression() again. Otherwise, calls to various Node methods may panic if
- // the parser has moved on the next expression.
- //
- // For performance reasons, go-toml doesn't make a copy of the input bytes to
- // the parser. Make sure to copy all the bytes you need to outlive the slice
- // given to the parser.
- type Parser struct {
- data []byte
- builder builder
- ref reference
- left []byte
- err error
- first bool
- KeepComments bool
- }
- // Data returns the slice provided to the last call to Reset.
- func (p *Parser) Data() []byte {
- return p.data
- }
- // Range returns a range description that corresponds to a given slice of the
- // input. If the argument is not a subslice of the parser input, this function
- // panics.
- func (p *Parser) Range(b []byte) Range {
- return Range{
- Offset: uint32(danger.SubsliceOffset(p.data, b)),
- Length: uint32(len(b)),
- }
- }
- // Raw returns the slice corresponding to the bytes in the given range.
- func (p *Parser) Raw(raw Range) []byte {
- return p.data[raw.Offset : raw.Offset+raw.Length]
- }
- // Reset brings the parser to its initial state for a given input. It wipes an
- // reuses internal storage to reduce allocation.
- func (p *Parser) Reset(b []byte) {
- p.builder.Reset()
- p.ref = invalidReference
- p.data = b
- p.left = b
- p.err = nil
- p.first = true
- }
- // NextExpression parses the next top-level expression. If an expression was
- // successfully parsed, it returns true. If the parser is at the end of the
- // document or an error occurred, it returns false.
- //
- // Retrieve the parsed expression with Expression().
- func (p *Parser) NextExpression() bool {
- if len(p.left) == 0 || p.err != nil {
- return false
- }
- p.builder.Reset()
- p.ref = invalidReference
- for {
- if len(p.left) == 0 || p.err != nil {
- return false
- }
- if !p.first {
- p.left, p.err = p.parseNewline(p.left)
- }
- if len(p.left) == 0 || p.err != nil {
- return false
- }
- p.ref, p.left, p.err = p.parseExpression(p.left)
- if p.err != nil {
- return false
- }
- p.first = false
- if p.ref.Valid() {
- return true
- }
- }
- }
- // Expression returns a pointer to the node representing the last successfully
- // parsed expression.
- func (p *Parser) Expression() *Node {
- return p.builder.NodeAt(p.ref)
- }
- // Error returns any error that has occurred during parsing.
- func (p *Parser) Error() error {
- return p.err
- }
- // Position describes a position in the input.
- type Position struct {
- // Number of bytes from the beginning of the input.
- Offset int
- // Line number, starting at 1.
- Line int
- // Column number, starting at 1.
- Column int
- }
- // Shape describes the position of a range in the input.
- type Shape struct {
- Start Position
- End Position
- }
- func (p *Parser) position(b []byte) Position {
- offset := danger.SubsliceOffset(p.data, b)
- lead := p.data[:offset]
- return Position{
- Offset: offset,
- Line: bytes.Count(lead, []byte{'\n'}) + 1,
- Column: len(lead) - bytes.LastIndex(lead, []byte{'\n'}),
- }
- }
- // Shape returns the shape of the given range in the input. Will
- // panic if the range is not a subslice of the input.
- func (p *Parser) Shape(r Range) Shape {
- raw := p.Raw(r)
- return Shape{
- Start: p.position(raw),
- End: p.position(raw[r.Length:]),
- }
- }
- func (p *Parser) parseNewline(b []byte) ([]byte, error) {
- if b[0] == '\n' {
- return b[1:], nil
- }
- if b[0] == '\r' {
- _, rest, err := scanWindowsNewline(b)
- return rest, err
- }
- return nil, NewParserError(b[0:1], "expected newline but got %#U", b[0])
- }
- func (p *Parser) parseComment(b []byte) (reference, []byte, error) {
- ref := invalidReference
- data, rest, err := scanComment(b)
- if p.KeepComments && err == nil {
- ref = p.builder.Push(Node{
- Kind: Comment,
- Raw: p.Range(data),
- Data: data,
- })
- }
- return ref, rest, err
- }
- func (p *Parser) parseExpression(b []byte) (reference, []byte, error) {
- // expression = ws [ comment ]
- // expression =/ ws keyval ws [ comment ]
- // expression =/ ws table ws [ comment ]
- ref := invalidReference
- b = p.parseWhitespace(b)
- if len(b) == 0 {
- return ref, b, nil
- }
- if b[0] == '#' {
- ref, rest, err := p.parseComment(b)
- return ref, rest, err
- }
- if b[0] == '\n' || b[0] == '\r' {
- return ref, b, nil
- }
- var err error
- if b[0] == '[' {
- ref, b, err = p.parseTable(b)
- } else {
- ref, b, err = p.parseKeyval(b)
- }
- if err != nil {
- return ref, nil, err
- }
- b = p.parseWhitespace(b)
- if len(b) > 0 && b[0] == '#' {
- cref, rest, err := p.parseComment(b)
- if cref != invalidReference {
- p.builder.Chain(ref, cref)
- }
- return ref, rest, err
- }
- return ref, b, nil
- }
- func (p *Parser) parseTable(b []byte) (reference, []byte, error) {
- // table = std-table / array-table
- if len(b) > 1 && b[1] == '[' {
- return p.parseArrayTable(b)
- }
- return p.parseStdTable(b)
- }
- func (p *Parser) parseArrayTable(b []byte) (reference, []byte, error) {
- // array-table = array-table-open key array-table-close
- // array-table-open = %x5B.5B ws ; [[ Double left square bracket
- // array-table-close = ws %x5D.5D ; ]] Double right square bracket
- ref := p.builder.Push(Node{
- Kind: ArrayTable,
- })
- b = b[2:]
- b = p.parseWhitespace(b)
- k, b, err := p.parseKey(b)
- if err != nil {
- return ref, nil, err
- }
- p.builder.AttachChild(ref, k)
- b = p.parseWhitespace(b)
- b, err = expect(']', b)
- if err != nil {
- return ref, nil, err
- }
- b, err = expect(']', b)
- return ref, b, err
- }
- func (p *Parser) parseStdTable(b []byte) (reference, []byte, error) {
- // std-table = std-table-open key std-table-close
- // std-table-open = %x5B ws ; [ Left square bracket
- // std-table-close = ws %x5D ; ] Right square bracket
- ref := p.builder.Push(Node{
- Kind: Table,
- })
- b = b[1:]
- b = p.parseWhitespace(b)
- key, b, err := p.parseKey(b)
- if err != nil {
- return ref, nil, err
- }
- p.builder.AttachChild(ref, key)
- b = p.parseWhitespace(b)
- b, err = expect(']', b)
- return ref, b, err
- }
- func (p *Parser) parseKeyval(b []byte) (reference, []byte, error) {
- // keyval = key keyval-sep val
- ref := p.builder.Push(Node{
- Kind: KeyValue,
- })
- key, b, err := p.parseKey(b)
- if err != nil {
- return invalidReference, nil, err
- }
- // keyval-sep = ws %x3D ws ; =
- b = p.parseWhitespace(b)
- if len(b) == 0 {
- return invalidReference, nil, NewParserError(b, "expected = after a key, but the document ends there")
- }
- b, err = expect('=', b)
- if err != nil {
- return invalidReference, nil, err
- }
- b = p.parseWhitespace(b)
- valRef, b, err := p.parseVal(b)
- if err != nil {
- return ref, b, err
- }
- p.builder.Chain(valRef, key)
- p.builder.AttachChild(ref, valRef)
- return ref, b, err
- }
- //nolint:cyclop,funlen
- func (p *Parser) parseVal(b []byte) (reference, []byte, error) {
- // val = string / boolean / array / inline-table / date-time / float / integer
- ref := invalidReference
- if len(b) == 0 {
- return ref, nil, NewParserError(b, "expected value, not eof")
- }
- var err error
- c := b[0]
- switch c {
- case '"':
- var raw []byte
- var v []byte
- if scanFollowsMultilineBasicStringDelimiter(b) {
- raw, v, b, err = p.parseMultilineBasicString(b)
- } else {
- raw, v, b, err = p.parseBasicString(b)
- }
- if err == nil {
- ref = p.builder.Push(Node{
- Kind: String,
- Raw: p.Range(raw),
- Data: v,
- })
- }
- return ref, b, err
- case '\'':
- var raw []byte
- var v []byte
- if scanFollowsMultilineLiteralStringDelimiter(b) {
- raw, v, b, err = p.parseMultilineLiteralString(b)
- } else {
- raw, v, b, err = p.parseLiteralString(b)
- }
- if err == nil {
- ref = p.builder.Push(Node{
- Kind: String,
- Raw: p.Range(raw),
- Data: v,
- })
- }
- return ref, b, err
- case 't':
- if !scanFollowsTrue(b) {
- return ref, nil, NewParserError(atmost(b, 4), "expected 'true'")
- }
- ref = p.builder.Push(Node{
- Kind: Bool,
- Data: b[:4],
- })
- return ref, b[4:], nil
- case 'f':
- if !scanFollowsFalse(b) {
- return ref, nil, NewParserError(atmost(b, 5), "expected 'false'")
- }
- ref = p.builder.Push(Node{
- Kind: Bool,
- Data: b[:5],
- })
- return ref, b[5:], nil
- case '[':
- return p.parseValArray(b)
- case '{':
- return p.parseInlineTable(b)
- default:
- return p.parseIntOrFloatOrDateTime(b)
- }
- }
- func atmost(b []byte, n int) []byte {
- if n >= len(b) {
- return b
- }
- return b[:n]
- }
- func (p *Parser) parseLiteralString(b []byte) ([]byte, []byte, []byte, error) {
- v, rest, err := scanLiteralString(b)
- if err != nil {
- return nil, nil, nil, err
- }
- return v, v[1 : len(v)-1], rest, nil
- }
- func (p *Parser) parseInlineTable(b []byte) (reference, []byte, error) {
- // inline-table = inline-table-open [ inline-table-keyvals ] inline-table-close
- // inline-table-open = %x7B ws ; {
- // inline-table-close = ws %x7D ; }
- // inline-table-sep = ws %x2C ws ; , Comma
- // inline-table-keyvals = keyval [ inline-table-sep inline-table-keyvals ]
- parent := p.builder.Push(Node{
- Kind: InlineTable,
- Raw: p.Range(b[:1]),
- })
- first := true
- var child reference
- b = b[1:]
- var err error
- for len(b) > 0 {
- previousB := b
- b = p.parseWhitespace(b)
- if len(b) == 0 {
- return parent, nil, NewParserError(previousB[:1], "inline table is incomplete")
- }
- if b[0] == '}' {
- break
- }
- if !first {
- b, err = expect(',', b)
- if err != nil {
- return parent, nil, err
- }
- b = p.parseWhitespace(b)
- }
- var kv reference
- kv, b, err = p.parseKeyval(b)
- if err != nil {
- return parent, nil, err
- }
- if first {
- p.builder.AttachChild(parent, kv)
- } else {
- p.builder.Chain(child, kv)
- }
- child = kv
- first = false
- }
- rest, err := expect('}', b)
- return parent, rest, err
- }
- //nolint:funlen,cyclop
- func (p *Parser) parseValArray(b []byte) (reference, []byte, error) {
- // array = array-open [ array-values ] ws-comment-newline array-close
- // array-open = %x5B ; [
- // array-close = %x5D ; ]
- // array-values = ws-comment-newline val ws-comment-newline array-sep array-values
- // array-values =/ ws-comment-newline val ws-comment-newline [ array-sep ]
- // array-sep = %x2C ; , Comma
- // ws-comment-newline = *( wschar / [ comment ] newline )
- arrayStart := b
- b = b[1:]
- parent := p.builder.Push(Node{
- Kind: Array,
- })
- // First indicates whether the parser is looking for the first element
- // (non-comment) of the array.
- first := true
- lastChild := invalidReference
- addChild := func(valueRef reference) {
- if lastChild == invalidReference {
- p.builder.AttachChild(parent, valueRef)
- } else {
- p.builder.Chain(lastChild, valueRef)
- }
- lastChild = valueRef
- }
- var err error
- for len(b) > 0 {
- cref := invalidReference
- cref, b, err = p.parseOptionalWhitespaceCommentNewline(b)
- if err != nil {
- return parent, nil, err
- }
- if cref != invalidReference {
- addChild(cref)
- }
- if len(b) == 0 {
- return parent, nil, NewParserError(arrayStart[:1], "array is incomplete")
- }
- if b[0] == ']' {
- break
- }
- if b[0] == ',' {
- if first {
- return parent, nil, NewParserError(b[0:1], "array cannot start with comma")
- }
- b = b[1:]
- cref, b, err = p.parseOptionalWhitespaceCommentNewline(b)
- if err != nil {
- return parent, nil, err
- }
- if cref != invalidReference {
- addChild(cref)
- }
- } else if !first {
- return parent, nil, NewParserError(b[0:1], "array elements must be separated by commas")
- }
- // TOML allows trailing commas in arrays.
- if len(b) > 0 && b[0] == ']' {
- break
- }
- var valueRef reference
- valueRef, b, err = p.parseVal(b)
- if err != nil {
- return parent, nil, err
- }
- addChild(valueRef)
- cref, b, err = p.parseOptionalWhitespaceCommentNewline(b)
- if err != nil {
- return parent, nil, err
- }
- if cref != invalidReference {
- addChild(cref)
- }
- first = false
- }
- rest, err := expect(']', b)
- return parent, rest, err
- }
- func (p *Parser) parseOptionalWhitespaceCommentNewline(b []byte) (reference, []byte, error) {
- rootCommentRef := invalidReference
- latestCommentRef := invalidReference
- addComment := func(ref reference) {
- if rootCommentRef == invalidReference {
- rootCommentRef = ref
- } else if latestCommentRef == invalidReference {
- p.builder.AttachChild(rootCommentRef, ref)
- latestCommentRef = ref
- } else {
- p.builder.Chain(latestCommentRef, ref)
- latestCommentRef = ref
- }
- }
- for len(b) > 0 {
- var err error
- b = p.parseWhitespace(b)
- if len(b) > 0 && b[0] == '#' {
- var ref reference
- ref, b, err = p.parseComment(b)
- if err != nil {
- return invalidReference, nil, err
- }
- if ref != invalidReference {
- addComment(ref)
- }
- }
- if len(b) == 0 {
- break
- }
- if b[0] == '\n' || b[0] == '\r' {
- b, err = p.parseNewline(b)
- if err != nil {
- return invalidReference, nil, err
- }
- } else {
- break
- }
- }
- return rootCommentRef, b, nil
- }
- func (p *Parser) parseMultilineLiteralString(b []byte) ([]byte, []byte, []byte, error) {
- token, rest, err := scanMultilineLiteralString(b)
- if err != nil {
- return nil, nil, nil, err
- }
- i := 3
- // skip the immediate new line
- if token[i] == '\n' {
- i++
- } else if token[i] == '\r' && token[i+1] == '\n' {
- i += 2
- }
- return token, token[i : len(token)-3], rest, err
- }
- //nolint:funlen,gocognit,cyclop
- func (p *Parser) parseMultilineBasicString(b []byte) ([]byte, []byte, []byte, error) {
- // ml-basic-string = ml-basic-string-delim [ newline ] ml-basic-body
- // ml-basic-string-delim
- // ml-basic-string-delim = 3quotation-mark
- // ml-basic-body = *mlb-content *( mlb-quotes 1*mlb-content ) [ mlb-quotes ]
- //
- // mlb-content = mlb-char / newline / mlb-escaped-nl
- // mlb-char = mlb-unescaped / escaped
- // mlb-quotes = 1*2quotation-mark
- // mlb-unescaped = wschar / %x21 / %x23-5B / %x5D-7E / non-ascii
- // mlb-escaped-nl = escape ws newline *( wschar / newline )
- token, escaped, rest, err := scanMultilineBasicString(b)
- if err != nil {
- return nil, nil, nil, err
- }
- i := 3
- // skip the immediate new line
- if token[i] == '\n' {
- i++
- } else if token[i] == '\r' && token[i+1] == '\n' {
- i += 2
- }
- // fast path
- startIdx := i
- endIdx := len(token) - len(`"""`)
- if !escaped {
- str := token[startIdx:endIdx]
- verr := characters.Utf8TomlValidAlreadyEscaped(str)
- if verr.Zero() {
- return token, str, rest, nil
- }
- return nil, nil, nil, NewParserError(str[verr.Index:verr.Index+verr.Size], "invalid UTF-8")
- }
- var builder bytes.Buffer
- // The scanner ensures that the token starts and ends with quotes and that
- // escapes are balanced.
- for i < len(token)-3 {
- c := token[i]
- //nolint:nestif
- if c == '\\' {
- // When the last non-whitespace character on a line is an unescaped \,
- // it will be trimmed along with all whitespace (including newlines) up
- // to the next non-whitespace character or closing delimiter.
- isLastNonWhitespaceOnLine := false
- j := 1
- findEOLLoop:
- for ; j < len(token)-3-i; j++ {
- switch token[i+j] {
- case ' ', '\t':
- continue
- case '\r':
- if token[i+j+1] == '\n' {
- continue
- }
- case '\n':
- isLastNonWhitespaceOnLine = true
- }
- break findEOLLoop
- }
- if isLastNonWhitespaceOnLine {
- i += j
- for ; i < len(token)-3; i++ {
- c := token[i]
- if !(c == '\n' || c == '\r' || c == ' ' || c == '\t') {
- i--
- break
- }
- }
- i++
- continue
- }
- // handle escaping
- i++
- c = token[i]
- switch c {
- case '"', '\\':
- builder.WriteByte(c)
- case 'b':
- builder.WriteByte('\b')
- case 'f':
- builder.WriteByte('\f')
- case 'n':
- builder.WriteByte('\n')
- case 'r':
- builder.WriteByte('\r')
- case 't':
- builder.WriteByte('\t')
- case 'e':
- builder.WriteByte(0x1B)
- case 'u':
- x, err := hexToRune(atmost(token[i+1:], 4), 4)
- if err != nil {
- return nil, nil, nil, err
- }
- builder.WriteRune(x)
- i += 4
- case 'U':
- x, err := hexToRune(atmost(token[i+1:], 8), 8)
- if err != nil {
- return nil, nil, nil, err
- }
- builder.WriteRune(x)
- i += 8
- default:
- return nil, nil, nil, NewParserError(token[i:i+1], "invalid escaped character %#U", c)
- }
- i++
- } else {
- size := characters.Utf8ValidNext(token[i:])
- if size == 0 {
- return nil, nil, nil, NewParserError(token[i:i+1], "invalid character %#U", c)
- }
- builder.Write(token[i : i+size])
- i += size
- }
- }
- return token, builder.Bytes(), rest, nil
- }
- func (p *Parser) parseKey(b []byte) (reference, []byte, error) {
- // key = simple-key / dotted-key
- // simple-key = quoted-key / unquoted-key
- //
- // unquoted-key = 1*( ALPHA / DIGIT / %x2D / %x5F ) ; A-Z / a-z / 0-9 / - / _
- // quoted-key = basic-string / literal-string
- // dotted-key = simple-key 1*( dot-sep simple-key )
- //
- // dot-sep = ws %x2E ws ; . Period
- raw, key, b, err := p.parseSimpleKey(b)
- if err != nil {
- return invalidReference, nil, err
- }
- ref := p.builder.Push(Node{
- Kind: Key,
- Raw: p.Range(raw),
- Data: key,
- })
- for {
- b = p.parseWhitespace(b)
- if len(b) > 0 && b[0] == '.' {
- b = p.parseWhitespace(b[1:])
- raw, key, b, err = p.parseSimpleKey(b)
- if err != nil {
- return ref, nil, err
- }
- p.builder.PushAndChain(Node{
- Kind: Key,
- Raw: p.Range(raw),
- Data: key,
- })
- } else {
- break
- }
- }
- return ref, b, nil
- }
- func (p *Parser) parseSimpleKey(b []byte) (raw, key, rest []byte, err error) {
- if len(b) == 0 {
- return nil, nil, nil, NewParserError(b, "expected key but found none")
- }
- // simple-key = quoted-key / unquoted-key
- // unquoted-key = 1*( ALPHA / DIGIT / %x2D / %x5F ) ; A-Z / a-z / 0-9 / - / _
- // quoted-key = basic-string / literal-string
- switch {
- case b[0] == '\'':
- return p.parseLiteralString(b)
- case b[0] == '"':
- return p.parseBasicString(b)
- case isUnquotedKeyChar(b[0]):
- key, rest = scanUnquotedKey(b)
- return key, key, rest, nil
- default:
- return nil, nil, nil, NewParserError(b[0:1], "invalid character at start of key: %c", b[0])
- }
- }
- //nolint:funlen,cyclop
- func (p *Parser) parseBasicString(b []byte) ([]byte, []byte, []byte, error) {
- // basic-string = quotation-mark *basic-char quotation-mark
- // quotation-mark = %x22 ; "
- // basic-char = basic-unescaped / escaped
- // basic-unescaped = wschar / %x21 / %x23-5B / %x5D-7E / non-ascii
- // escaped = escape escape-seq-char
- // escape-seq-char = %x22 ; " quotation mark U+0022
- // escape-seq-char =/ %x5C ; \ reverse solidus U+005C
- // escape-seq-char =/ %x62 ; b backspace U+0008
- // escape-seq-char =/ %x66 ; f form feed U+000C
- // escape-seq-char =/ %x6E ; n line feed U+000A
- // escape-seq-char =/ %x72 ; r carriage return U+000D
- // escape-seq-char =/ %x74 ; t tab U+0009
- // escape-seq-char =/ %x75 4HEXDIG ; uXXXX U+XXXX
- // escape-seq-char =/ %x55 8HEXDIG ; UXXXXXXXX U+XXXXXXXX
- token, escaped, rest, err := scanBasicString(b)
- if err != nil {
- return nil, nil, nil, err
- }
- startIdx := len(`"`)
- endIdx := len(token) - len(`"`)
- // Fast path. If there is no escape sequence, the string should just be
- // an UTF-8 encoded string, which is the same as Go. In that case,
- // validate the string and return a direct reference to the buffer.
- if !escaped {
- str := token[startIdx:endIdx]
- verr := characters.Utf8TomlValidAlreadyEscaped(str)
- if verr.Zero() {
- return token, str, rest, nil
- }
- return nil, nil, nil, NewParserError(str[verr.Index:verr.Index+verr.Size], "invalid UTF-8")
- }
- i := startIdx
- var builder bytes.Buffer
- // The scanner ensures that the token starts and ends with quotes and that
- // escapes are balanced.
- for i < len(token)-1 {
- c := token[i]
- if c == '\\' {
- i++
- c = token[i]
- switch c {
- case '"', '\\':
- builder.WriteByte(c)
- case 'b':
- builder.WriteByte('\b')
- case 'f':
- builder.WriteByte('\f')
- case 'n':
- builder.WriteByte('\n')
- case 'r':
- builder.WriteByte('\r')
- case 't':
- builder.WriteByte('\t')
- case 'e':
- builder.WriteByte(0x1B)
- case 'u':
- x, err := hexToRune(token[i+1:len(token)-1], 4)
- if err != nil {
- return nil, nil, nil, err
- }
- builder.WriteRune(x)
- i += 4
- case 'U':
- x, err := hexToRune(token[i+1:len(token)-1], 8)
- if err != nil {
- return nil, nil, nil, err
- }
- builder.WriteRune(x)
- i += 8
- default:
- return nil, nil, nil, NewParserError(token[i:i+1], "invalid escaped character %#U", c)
- }
- i++
- } else {
- size := characters.Utf8ValidNext(token[i:])
- if size == 0 {
- return nil, nil, nil, NewParserError(token[i:i+1], "invalid character %#U", c)
- }
- builder.Write(token[i : i+size])
- i += size
- }
- }
- return token, builder.Bytes(), rest, nil
- }
- func hexToRune(b []byte, length int) (rune, error) {
- if len(b) < length {
- return -1, NewParserError(b, "unicode point needs %d character, not %d", length, len(b))
- }
- b = b[:length]
- var r uint32
- for i, c := range b {
- d := uint32(0)
- switch {
- case '0' <= c && c <= '9':
- d = uint32(c - '0')
- case 'a' <= c && c <= 'f':
- d = uint32(c - 'a' + 10)
- case 'A' <= c && c <= 'F':
- d = uint32(c - 'A' + 10)
- default:
- return -1, NewParserError(b[i:i+1], "non-hex character")
- }
- r = r*16 + d
- }
- if r > unicode.MaxRune || 0xD800 <= r && r < 0xE000 {
- return -1, NewParserError(b, "escape sequence is invalid Unicode code point")
- }
- return rune(r), nil
- }
- func (p *Parser) parseWhitespace(b []byte) []byte {
- // ws = *wschar
- // wschar = %x20 ; Space
- // wschar =/ %x09 ; Horizontal tab
- _, rest := scanWhitespace(b)
- return rest
- }
- //nolint:cyclop
- func (p *Parser) parseIntOrFloatOrDateTime(b []byte) (reference, []byte, error) {
- switch b[0] {
- case 'i':
- if !scanFollowsInf(b) {
- return invalidReference, nil, NewParserError(atmost(b, 3), "expected 'inf'")
- }
- return p.builder.Push(Node{
- Kind: Float,
- Data: b[:3],
- Raw: p.Range(b[:3]),
- }), b[3:], nil
- case 'n':
- if !scanFollowsNan(b) {
- return invalidReference, nil, NewParserError(atmost(b, 3), "expected 'nan'")
- }
- return p.builder.Push(Node{
- Kind: Float,
- Data: b[:3],
- Raw: p.Range(b[:3]),
- }), b[3:], nil
- case '+', '-':
- return p.scanIntOrFloat(b)
- }
- if len(b) < 3 {
- return p.scanIntOrFloat(b)
- }
- s := 5
- if len(b) < s {
- s = len(b)
- }
- for idx, c := range b[:s] {
- if isDigit(c) {
- continue
- }
- if idx == 2 && c == ':' || (idx == 4 && c == '-') {
- return p.scanDateTime(b)
- }
- break
- }
- return p.scanIntOrFloat(b)
- }
- func (p *Parser) scanDateTime(b []byte) (reference, []byte, error) {
- // scans for contiguous characters in [0-9T:Z.+-], and up to one space if
- // followed by a digit.
- hasDate := false
- hasTime := false
- hasTz := false
- seenSpace := false
- i := 0
- byteLoop:
- for ; i < len(b); i++ {
- c := b[i]
- switch {
- case isDigit(c):
- case c == '-':
- hasDate = true
- const minOffsetOfTz = 8
- if i >= minOffsetOfTz {
- hasTz = true
- }
- case c == 'T' || c == 't' || c == ':' || c == '.':
- hasTime = true
- case c == '+' || c == '-' || c == 'Z' || c == 'z':
- hasTz = true
- case c == ' ':
- if !seenSpace && i+1 < len(b) && isDigit(b[i+1]) {
- i += 2
- // Avoid reaching past the end of the document in case the time
- // is malformed. See TestIssue585.
- if i >= len(b) {
- i--
- }
- seenSpace = true
- hasTime = true
- } else {
- break byteLoop
- }
- default:
- break byteLoop
- }
- }
- var kind Kind
- if hasTime {
- if hasDate {
- if hasTz {
- kind = DateTime
- } else {
- kind = LocalDateTime
- }
- } else {
- kind = LocalTime
- }
- } else {
- kind = LocalDate
- }
- return p.builder.Push(Node{
- Kind: kind,
- Data: b[:i],
- }), b[i:], nil
- }
- //nolint:funlen,gocognit,cyclop
- func (p *Parser) scanIntOrFloat(b []byte) (reference, []byte, error) {
- i := 0
- if len(b) > 2 && b[0] == '0' && b[1] != '.' && b[1] != 'e' && b[1] != 'E' {
- var isValidRune validRuneFn
- switch b[1] {
- case 'x':
- isValidRune = isValidHexRune
- case 'o':
- isValidRune = isValidOctalRune
- case 'b':
- isValidRune = isValidBinaryRune
- default:
- i++
- }
- if isValidRune != nil {
- i += 2
- for ; i < len(b); i++ {
- if !isValidRune(b[i]) {
- break
- }
- }
- }
- return p.builder.Push(Node{
- Kind: Integer,
- Data: b[:i],
- Raw: p.Range(b[:i]),
- }), b[i:], nil
- }
- isFloat := false
- for ; i < len(b); i++ {
- c := b[i]
- if c >= '0' && c <= '9' || c == '+' || c == '-' || c == '_' {
- continue
- }
- if c == '.' || c == 'e' || c == 'E' {
- isFloat = true
- continue
- }
- if c == 'i' {
- if scanFollowsInf(b[i:]) {
- return p.builder.Push(Node{
- Kind: Float,
- Data: b[:i+3],
- Raw: p.Range(b[:i+3]),
- }), b[i+3:], nil
- }
- return invalidReference, nil, NewParserError(b[i:i+1], "unexpected character 'i' while scanning for a number")
- }
- if c == 'n' {
- if scanFollowsNan(b[i:]) {
- return p.builder.Push(Node{
- Kind: Float,
- Data: b[:i+3],
- Raw: p.Range(b[:i+3]),
- }), b[i+3:], nil
- }
- return invalidReference, nil, NewParserError(b[i:i+1], "unexpected character 'n' while scanning for a number")
- }
- break
- }
- if i == 0 {
- return invalidReference, b, NewParserError(b, "incomplete number")
- }
- kind := Integer
- if isFloat {
- kind = Float
- }
- return p.builder.Push(Node{
- Kind: kind,
- Data: b[:i],
- Raw: p.Range(b[:i]),
- }), b[i:], nil
- }
- func isDigit(r byte) bool {
- return r >= '0' && r <= '9'
- }
- type validRuneFn func(r byte) bool
- func isValidHexRune(r byte) bool {
- return r >= 'a' && r <= 'f' ||
- r >= 'A' && r <= 'F' ||
- r >= '0' && r <= '9' ||
- r == '_'
- }
- func isValidOctalRune(r byte) bool {
- return r >= '0' && r <= '7' || r == '_'
- }
- func isValidBinaryRune(r byte) bool {
- return r == '0' || r == '1' || r == '_'
- }
- func expect(x byte, b []byte) ([]byte, error) {
- if len(b) == 0 {
- return nil, NewParserError(b, "expected character %c but the document ended here", x)
- }
- if b[0] != x {
- return nil, NewParserError(b[0:1], "expected character %c", x)
- }
- return b[1:], nil
- }
|