parser.go 7.1 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313
  1. package expr
  2. import (
  3. `strconv`
  4. `unicode`
  5. `unsafe`
  6. )
  7. type _TokenKind uint8
  8. const (
  9. _T_end _TokenKind = iota + 1
  10. _T_int
  11. _T_punc
  12. _T_name
  13. )
  14. const (
  15. _OP2 = 0x80
  16. _POW = _OP2 | '*'
  17. _SHL = _OP2 | '<'
  18. _SHR = _OP2 | '>'
  19. )
  20. type _Slice struct {
  21. p unsafe.Pointer
  22. n int
  23. c int
  24. }
  25. type _Token struct {
  26. pos int
  27. ptr *rune
  28. u64 uint64
  29. tag _TokenKind
  30. }
  31. func (self _Token) str() (v string) {
  32. return string(self.rbuf())
  33. }
  34. func (self _Token) rbuf() (v []rune) {
  35. (*_Slice)(unsafe.Pointer(&v)).c = int(self.u64)
  36. (*_Slice)(unsafe.Pointer(&v)).n = int(self.u64)
  37. (*_Slice)(unsafe.Pointer(&v)).p = unsafe.Pointer(self.ptr)
  38. return
  39. }
  40. func tokenEnd(p int) _Token {
  41. return _Token {
  42. pos: p,
  43. tag: _T_end,
  44. }
  45. }
  46. func tokenInt(p int, v uint64) _Token {
  47. return _Token {
  48. pos: p,
  49. u64: v,
  50. tag: _T_int,
  51. }
  52. }
  53. func tokenPunc(p int, v rune) _Token {
  54. return _Token {
  55. pos: p,
  56. tag: _T_punc,
  57. u64: uint64(v),
  58. }
  59. }
  60. func tokenName(p int, v []rune) _Token {
  61. return _Token {
  62. pos: p,
  63. ptr: &v[0],
  64. tag: _T_name,
  65. u64: uint64(len(v)),
  66. }
  67. }
  68. // Repository represents a repository of Term's.
  69. type Repository interface {
  70. Get(name string) (Term, error)
  71. }
  72. // Parser parses an expression string to it's AST representation.
  73. type Parser struct {
  74. pos int
  75. src []rune
  76. }
  77. var binaryOps = [...]func(*Expr, *Expr) *Expr {
  78. '+' : (*Expr).Add,
  79. '-' : (*Expr).Sub,
  80. '*' : (*Expr).Mul,
  81. '/' : (*Expr).Div,
  82. '%' : (*Expr).Mod,
  83. '&' : (*Expr).And,
  84. '^' : (*Expr).Xor,
  85. '|' : (*Expr).Or,
  86. _SHL : (*Expr).Shl,
  87. _SHR : (*Expr).Shr,
  88. _POW : (*Expr).Pow,
  89. }
  90. var precedence = [...]map[int]bool {
  91. {_SHL: true, _SHR: true},
  92. {'|' : true},
  93. {'^' : true},
  94. {'&' : true},
  95. {'+' : true, '-': true},
  96. {'*' : true, '/': true, '%': true},
  97. {_POW: true},
  98. }
  99. func (self *Parser) ch() rune {
  100. return self.src[self.pos]
  101. }
  102. func (self *Parser) eof() bool {
  103. return self.pos >= len(self.src)
  104. }
  105. func (self *Parser) rch() (v rune) {
  106. v, self.pos = self.src[self.pos], self.pos + 1
  107. return
  108. }
  109. func (self *Parser) hex(ss []rune) bool {
  110. if len(ss) == 1 && ss[0] == '0' {
  111. return unicode.ToLower(self.ch()) == 'x'
  112. } else if len(ss) <= 1 || unicode.ToLower(ss[1]) != 'x' {
  113. return unicode.IsDigit(self.ch())
  114. } else {
  115. return ishexdigit(self.ch())
  116. }
  117. }
  118. func (self *Parser) int(p int, ss []rune) (_Token, error) {
  119. var err error
  120. var val uint64
  121. /* find all the digits */
  122. for !self.eof() && self.hex(ss) {
  123. ss = append(ss, self.rch())
  124. }
  125. /* parse the value */
  126. if val, err = strconv.ParseUint(string(ss), 0, 64); err != nil {
  127. return _Token{}, err
  128. } else {
  129. return tokenInt(p, val), nil
  130. }
  131. }
  132. func (self *Parser) name(p int, ss []rune) _Token {
  133. for !self.eof() && isident(self.ch()) { ss = append(ss, self.rch()) }
  134. return tokenName(p, ss)
  135. }
  136. func (self *Parser) read(p int, ch rune) (_Token, error) {
  137. if isdigit(ch) {
  138. return self.int(p, []rune { ch })
  139. } else if isident0(ch) {
  140. return self.name(p, []rune { ch }), nil
  141. } else if isop2ch(ch) && !self.eof() && self.ch() == ch {
  142. return tokenPunc(p, _OP2 | self.rch()), nil
  143. } else if isop1ch(ch) {
  144. return tokenPunc(p, ch), nil
  145. } else {
  146. return _Token{}, newSyntaxError(self.pos, "invalid character " + strconv.QuoteRuneToASCII(ch))
  147. }
  148. }
  149. func (self *Parser) next() (_Token, error) {
  150. for {
  151. var p int
  152. var c rune
  153. /* check for EOF */
  154. if self.eof() {
  155. return tokenEnd(self.pos), nil
  156. }
  157. /* read the next char */
  158. p = self.pos
  159. c = self.rch()
  160. /* parse the token if not a space */
  161. if !unicode.IsSpace(c) {
  162. return self.read(p, c)
  163. }
  164. }
  165. }
  166. func (self *Parser) grab(tk _Token, repo Repository) (*Expr, error) {
  167. if repo == nil {
  168. return nil, newSyntaxError(tk.pos, "unresolved symbol: " + tk.str())
  169. } else if term, err := repo.Get(tk.str()); err != nil {
  170. return nil, err
  171. } else {
  172. return Ref(term), nil
  173. }
  174. }
  175. func (self *Parser) nest(nest int, repo Repository) (*Expr, error) {
  176. var err error
  177. var ret *Expr
  178. var ntk _Token
  179. /* evaluate the nested expression */
  180. if ret, err = self.expr(0, nest + 1, repo); err != nil {
  181. return nil, err
  182. }
  183. /* must follows with a ')' */
  184. if ntk, err = self.next(); err != nil {
  185. return nil, err
  186. } else if ntk.tag != _T_punc || ntk.u64 != ')' {
  187. return nil, newSyntaxError(ntk.pos, "')' expected")
  188. } else {
  189. return ret, nil
  190. }
  191. }
  192. func (self *Parser) unit(nest int, repo Repository) (*Expr, error) {
  193. if tk, err := self.next(); err != nil {
  194. return nil, err
  195. } else if tk.tag == _T_int {
  196. return Int(int64(tk.u64)), nil
  197. } else if tk.tag == _T_name {
  198. return self.grab(tk, repo)
  199. } else if tk.tag == _T_punc && tk.u64 == '(' {
  200. return self.nest(nest, repo)
  201. } else if tk.tag == _T_punc && tk.u64 == '+' {
  202. return self.unit(nest, repo)
  203. } else if tk.tag == _T_punc && tk.u64 == '-' {
  204. return neg2(self.unit(nest, repo))
  205. } else if tk.tag == _T_punc && tk.u64 == '~' {
  206. return not2(self.unit(nest, repo))
  207. } else {
  208. return nil, newSyntaxError(tk.pos, "integer, unary operator or nested expression expected")
  209. }
  210. }
  211. func (self *Parser) term(prec int, nest int, repo Repository) (*Expr, error) {
  212. var err error
  213. var val *Expr
  214. /* parse the LHS operand */
  215. if val, err = self.expr(prec + 1, nest, repo); err != nil {
  216. return nil, err
  217. }
  218. /* parse all the operators of the same precedence */
  219. for {
  220. var op int
  221. var rv *Expr
  222. var tk _Token
  223. /* peek the next token */
  224. pp := self.pos
  225. tk, err = self.next()
  226. /* check for errors */
  227. if err != nil {
  228. return nil, err
  229. }
  230. /* encountered EOF */
  231. if tk.tag == _T_end {
  232. return val, nil
  233. }
  234. /* must be an operator */
  235. if tk.tag != _T_punc {
  236. return nil, newSyntaxError(tk.pos, "operators expected")
  237. }
  238. /* check for the operator precedence */
  239. if op = int(tk.u64); !precedence[prec][op] {
  240. self.pos = pp
  241. return val, nil
  242. }
  243. /* evaluate the RHS operand, and combine the value */
  244. if rv, err = self.expr(prec + 1, nest, repo); err != nil {
  245. return nil, err
  246. } else {
  247. val = binaryOps[op](val, rv)
  248. }
  249. }
  250. }
  251. func (self *Parser) expr(prec int, nest int, repo Repository) (*Expr, error) {
  252. if prec >= len(precedence) {
  253. return self.unit(nest, repo)
  254. } else {
  255. return self.term(prec, nest, repo)
  256. }
  257. }
  258. // Parse parses the expression, and returns it's AST tree.
  259. func (self *Parser) Parse(repo Repository) (*Expr, error) {
  260. return self.expr(0, 0, repo)
  261. }
  262. // SetSource resets the expression parser and sets the expression source.
  263. func (self *Parser) SetSource(src string) *Parser {
  264. self.pos = 0
  265. self.src = []rune(src)
  266. return self
  267. }