123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270 |
- package unstable
- import "github.com/pelletier/go-toml/v2/internal/characters"
- func scanFollows(b []byte, pattern string) bool {
- n := len(pattern)
- return len(b) >= n && string(b[:n]) == pattern
- }
- func scanFollowsMultilineBasicStringDelimiter(b []byte) bool {
- return scanFollows(b, `"""`)
- }
- func scanFollowsMultilineLiteralStringDelimiter(b []byte) bool {
- return scanFollows(b, `'''`)
- }
- func scanFollowsTrue(b []byte) bool {
- return scanFollows(b, `true`)
- }
- func scanFollowsFalse(b []byte) bool {
- return scanFollows(b, `false`)
- }
- func scanFollowsInf(b []byte) bool {
- return scanFollows(b, `inf`)
- }
- func scanFollowsNan(b []byte) bool {
- return scanFollows(b, `nan`)
- }
- func scanUnquotedKey(b []byte) ([]byte, []byte) {
- // unquoted-key = 1*( ALPHA / DIGIT / %x2D / %x5F ) ; A-Z / a-z / 0-9 / - / _
- for i := 0; i < len(b); i++ {
- if !isUnquotedKeyChar(b[i]) {
- return b[:i], b[i:]
- }
- }
- return b, b[len(b):]
- }
- func isUnquotedKeyChar(r byte) bool {
- return (r >= 'A' && r <= 'Z') || (r >= 'a' && r <= 'z') || (r >= '0' && r <= '9') || r == '-' || r == '_'
- }
- func scanLiteralString(b []byte) ([]byte, []byte, error) {
- // literal-string = apostrophe *literal-char apostrophe
- // apostrophe = %x27 ; ' apostrophe
- // literal-char = %x09 / %x20-26 / %x28-7E / non-ascii
- for i := 1; i < len(b); {
- switch b[i] {
- case '\'':
- return b[:i+1], b[i+1:], nil
- case '\n', '\r':
- return nil, nil, NewParserError(b[i:i+1], "literal strings cannot have new lines")
- }
- size := characters.Utf8ValidNext(b[i:])
- if size == 0 {
- return nil, nil, NewParserError(b[i:i+1], "invalid character")
- }
- i += size
- }
- return nil, nil, NewParserError(b[len(b):], "unterminated literal string")
- }
- func scanMultilineLiteralString(b []byte) ([]byte, []byte, error) {
- // ml-literal-string = ml-literal-string-delim [ newline ] ml-literal-body
- // ml-literal-string-delim
- // ml-literal-string-delim = 3apostrophe
- // ml-literal-body = *mll-content *( mll-quotes 1*mll-content ) [ mll-quotes ]
- //
- // mll-content = mll-char / newline
- // mll-char = %x09 / %x20-26 / %x28-7E / non-ascii
- // mll-quotes = 1*2apostrophe
- for i := 3; i < len(b); {
- switch b[i] {
- case '\'':
- if scanFollowsMultilineLiteralStringDelimiter(b[i:]) {
- i += 3
- // At that point we found 3 apostrophe, and i is the
- // index of the byte after the third one. The scanner
- // needs to be eager, because there can be an extra 2
- // apostrophe that can be accepted at the end of the
- // string.
- if i >= len(b) || b[i] != '\'' {
- return b[:i], b[i:], nil
- }
- i++
- if i >= len(b) || b[i] != '\'' {
- return b[:i], b[i:], nil
- }
- i++
- if i < len(b) && b[i] == '\'' {
- return nil, nil, NewParserError(b[i-3:i+1], "''' not allowed in multiline literal string")
- }
- return b[:i], b[i:], nil
- }
- case '\r':
- if len(b) < i+2 {
- return nil, nil, NewParserError(b[len(b):], `need a \n after \r`)
- }
- if b[i+1] != '\n' {
- return nil, nil, NewParserError(b[i:i+2], `need a \n after \r`)
- }
- i += 2 // skip the \n
- continue
- }
- size := characters.Utf8ValidNext(b[i:])
- if size == 0 {
- return nil, nil, NewParserError(b[i:i+1], "invalid character")
- }
- i += size
- }
- return nil, nil, NewParserError(b[len(b):], `multiline literal string not terminated by '''`)
- }
- func scanWindowsNewline(b []byte) ([]byte, []byte, error) {
- const lenCRLF = 2
- if len(b) < lenCRLF {
- return nil, nil, NewParserError(b, "windows new line expected")
- }
- if b[1] != '\n' {
- return nil, nil, NewParserError(b, `windows new line should be \r\n`)
- }
- return b[:lenCRLF], b[lenCRLF:], nil
- }
- func scanWhitespace(b []byte) ([]byte, []byte) {
- for i := 0; i < len(b); i++ {
- switch b[i] {
- case ' ', '\t':
- continue
- default:
- return b[:i], b[i:]
- }
- }
- return b, b[len(b):]
- }
- func scanComment(b []byte) ([]byte, []byte, error) {
- // comment-start-symbol = %x23 ; #
- // non-ascii = %x80-D7FF / %xE000-10FFFF
- // non-eol = %x09 / %x20-7F / non-ascii
- //
- // comment = comment-start-symbol *non-eol
- for i := 1; i < len(b); {
- if b[i] == '\n' {
- return b[:i], b[i:], nil
- }
- if b[i] == '\r' {
- if i+1 < len(b) && b[i+1] == '\n' {
- return b[:i+1], b[i+1:], nil
- }
- return nil, nil, NewParserError(b[i:i+1], "invalid character in comment")
- }
- size := characters.Utf8ValidNext(b[i:])
- if size == 0 {
- return nil, nil, NewParserError(b[i:i+1], "invalid character in comment")
- }
- i += size
- }
- return b, b[len(b):], nil
- }
- func scanBasicString(b []byte) ([]byte, bool, []byte, error) {
- // basic-string = quotation-mark *basic-char quotation-mark
- // quotation-mark = %x22 ; "
- // basic-char = basic-unescaped / escaped
- // basic-unescaped = wschar / %x21 / %x23-5B / %x5D-7E / non-ascii
- // escaped = escape escape-seq-char
- escaped := false
- i := 1
- for ; i < len(b); i++ {
- switch b[i] {
- case '"':
- return b[:i+1], escaped, b[i+1:], nil
- case '\n', '\r':
- return nil, escaped, nil, NewParserError(b[i:i+1], "basic strings cannot have new lines")
- case '\\':
- if len(b) < i+2 {
- return nil, escaped, nil, NewParserError(b[i:i+1], "need a character after \\")
- }
- escaped = true
- i++ // skip the next character
- }
- }
- return nil, escaped, nil, NewParserError(b[len(b):], `basic string not terminated by "`)
- }
- func scanMultilineBasicString(b []byte) ([]byte, bool, []byte, error) {
- // ml-basic-string = ml-basic-string-delim [ newline ] ml-basic-body
- // ml-basic-string-delim
- // ml-basic-string-delim = 3quotation-mark
- // ml-basic-body = *mlb-content *( mlb-quotes 1*mlb-content ) [ mlb-quotes ]
- //
- // mlb-content = mlb-char / newline / mlb-escaped-nl
- // mlb-char = mlb-unescaped / escaped
- // mlb-quotes = 1*2quotation-mark
- // mlb-unescaped = wschar / %x21 / %x23-5B / %x5D-7E / non-ascii
- // mlb-escaped-nl = escape ws newline *( wschar / newline )
- escaped := false
- i := 3
- for ; i < len(b); i++ {
- switch b[i] {
- case '"':
- if scanFollowsMultilineBasicStringDelimiter(b[i:]) {
- i += 3
- // At that point we found 3 apostrophe, and i is the
- // index of the byte after the third one. The scanner
- // needs to be eager, because there can be an extra 2
- // apostrophe that can be accepted at the end of the
- // string.
- if i >= len(b) || b[i] != '"' {
- return b[:i], escaped, b[i:], nil
- }
- i++
- if i >= len(b) || b[i] != '"' {
- return b[:i], escaped, b[i:], nil
- }
- i++
- if i < len(b) && b[i] == '"' {
- return nil, escaped, nil, NewParserError(b[i-3:i+1], `""" not allowed in multiline basic string`)
- }
- return b[:i], escaped, b[i:], nil
- }
- case '\\':
- if len(b) < i+2 {
- return nil, escaped, nil, NewParserError(b[len(b):], "need a character after \\")
- }
- escaped = true
- i++ // skip the next character
- case '\r':
- if len(b) < i+2 {
- return nil, escaped, nil, NewParserError(b[len(b):], `need a \n after \r`)
- }
- if b[i+1] != '\n' {
- return nil, escaped, nil, NewParserError(b[i:i+2], `need a \n after \r`)
- }
- i++ // skip the \n
- }
- }
- return nil, escaped, nil, NewParserError(b[len(b):], `multiline basic string not terminated by """`)
- }
|