assembler.go 49 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773774775776777778779780781782783784785786787788789790791792793794795796797798799800801802803804805806807808809810811812813814815816817818819820821822823824825826827828829830831832833834835836837838839840841842843844845846847848849850851852853854855856857858859860861862863864865866867868869870871872873874875876877878879880881882883884885886887888889890891892893894895896897898899900901902903904905906907908909910911912913914915916917918919920921922923924925926927928929930931932933934935936937938939940941942943944945946947948949950951952953954955956957958959960961962963964965966967968969970971972973974975976977978979980981982983984985986987988989990991992993994995996997998999100010011002100310041005100610071008100910101011101210131014101510161017101810191020102110221023102410251026102710281029103010311032103310341035103610371038103910401041104210431044104510461047104810491050105110521053105410551056105710581059106010611062106310641065106610671068106910701071107210731074107510761077107810791080108110821083108410851086108710881089109010911092109310941095109610971098109911001101110211031104110511061107110811091110111111121113111411151116111711181119112011211122112311241125112611271128112911301131113211331134113511361137113811391140114111421143114411451146114711481149115011511152115311541155115611571158115911601161116211631164116511661167116811691170117111721173117411751176117711781179118011811182118311841185118611871188118911901191119211931194119511961197119811991200120112021203120412051206120712081209121012111212121312141215121612171218121912201221122212231224122512261227122812291230123112321233123412351236123712381239124012411242124312441245124612471248124912501251125212531254125512561257125812591260126112621263126412651266126712681269127012711272127312741275127612771278127912801281128212831284128512861287128812891290129112921293129412951296129712981299130013011302130313041305130613071308130913101311131213131314131513161317131813191320132113221323132413251326132713281329133013311332133313341335133613371338133913401341134213431344134513461347134813491350135113521353135413551356135713581359136013611362136313641365136613671368136913701371137213731374137513761377137813791380138113821383138413851386138713881389139013911392139313941395139613971398139914001401140214031404140514061407140814091410141114121413141414151416141714181419142014211422142314241425142614271428142914301431143214331434143514361437143814391440144114421443144414451446144714481449145014511452145314541455145614571458145914601461146214631464146514661467146814691470147114721473147414751476147714781479148014811482148314841485148614871488148914901491149214931494149514961497149814991500150115021503150415051506150715081509151015111512151315141515151615171518151915201521152215231524152515261527152815291530153115321533153415351536153715381539154015411542154315441545154615471548154915501551155215531554155515561557155815591560156115621563156415651566156715681569157015711572157315741575157615771578157915801581158215831584158515861587158815891590159115921593159415951596159715981599160016011602160316041605160616071608160916101611161216131614161516161617161816191620162116221623162416251626162716281629163016311632163316341635163616371638163916401641164216431644164516461647164816491650165116521653165416551656165716581659166016611662166316641665166616671668166916701671167216731674167516761677167816791680168116821683168416851686168716881689169016911692169316941695169616971698169917001701170217031704170517061707170817091710171117121713171417151716171717181719172017211722172317241725172617271728172917301731173217331734173517361737173817391740174117421743174417451746174717481749175017511752175317541755175617571758175917601761176217631764176517661767176817691770177117721773177417751776177717781779178017811782178317841785178617871788178917901791179217931794179517961797179817991800180118021803
  1. package x86_64
  2. import (
  3. `bytes`
  4. `errors`
  5. `fmt`
  6. `math`
  7. `strconv`
  8. `strings`
  9. `unicode`
  10. `github.com/chenzhuoyu/iasm/expr`
  11. )
  12. type (
  13. _TokenKind int
  14. _Punctuation int
  15. )
  16. const (
  17. _T_end _TokenKind = iota + 1
  18. _T_int
  19. _T_name
  20. _T_punc
  21. _T_space
  22. )
  23. const (
  24. _P_plus _Punctuation = iota + 1
  25. _P_minus
  26. _P_star
  27. _P_slash
  28. _P_percent
  29. _P_amp
  30. _P_bar
  31. _P_caret
  32. _P_shl
  33. _P_shr
  34. _P_tilde
  35. _P_lbrk
  36. _P_rbrk
  37. _P_dot
  38. _P_comma
  39. _P_colon
  40. _P_dollar
  41. _P_hash
  42. )
  43. var _PUNC_NAME = map[_Punctuation]string {
  44. _P_plus : "+",
  45. _P_minus : "-",
  46. _P_star : "*",
  47. _P_slash : "/",
  48. _P_percent : "%",
  49. _P_amp : "&",
  50. _P_bar : "|",
  51. _P_caret : "^",
  52. _P_shl : "<<",
  53. _P_shr : ">>",
  54. _P_tilde : "~",
  55. _P_lbrk : "(",
  56. _P_rbrk : ")",
  57. _P_dot : ".",
  58. _P_comma : ",",
  59. _P_colon : ":",
  60. _P_dollar : "$",
  61. _P_hash : "#",
  62. }
  63. func (self _Punctuation) String() string {
  64. if v, ok := _PUNC_NAME[self]; ok {
  65. return v
  66. } else {
  67. return fmt.Sprintf("_Punctuation(%d)", self)
  68. }
  69. }
  70. type _Token struct {
  71. pos int
  72. end int
  73. u64 uint64
  74. str string
  75. tag _TokenKind
  76. }
  77. func (self *_Token) punc() _Punctuation {
  78. return _Punctuation(self.u64)
  79. }
  80. func (self *_Token) String() string {
  81. switch self.tag {
  82. case _T_end : return "<END>"
  83. case _T_int : return fmt.Sprintf("<INT %d>", self.u64)
  84. case _T_punc : return fmt.Sprintf("<PUNC %s>", _Punctuation(self.u64))
  85. case _T_name : return fmt.Sprintf("<NAME %s>", strconv.QuoteToASCII(self.str))
  86. case _T_space : return "<SPACE>"
  87. default : return fmt.Sprintf("<UNK:%d %d %s>", self.tag, self.u64, strconv.QuoteToASCII(self.str))
  88. }
  89. }
  90. func tokenEnd(p int, end int) _Token {
  91. return _Token {
  92. pos: p,
  93. end: end,
  94. tag: _T_end,
  95. }
  96. }
  97. func tokenInt(p int, val uint64) _Token {
  98. return _Token {
  99. pos: p,
  100. u64: val,
  101. tag: _T_int,
  102. }
  103. }
  104. func tokenName(p int, name string) _Token {
  105. return _Token {
  106. pos: p,
  107. str: name,
  108. tag: _T_name,
  109. }
  110. }
  111. func tokenPunc(p int, punc _Punctuation) _Token {
  112. return _Token {
  113. pos: p,
  114. tag: _T_punc,
  115. u64: uint64(punc),
  116. }
  117. }
  118. func tokenSpace(p int, end int) _Token {
  119. return _Token {
  120. pos: p,
  121. end: end,
  122. tag: _T_space,
  123. }
  124. }
  125. // SyntaxError represents an error in the assembly syntax.
  126. type SyntaxError struct {
  127. Pos int
  128. Row int
  129. Src []rune
  130. Reason string
  131. }
  132. // Error implements the error interface.
  133. func (self *SyntaxError) Error() string {
  134. if self.Pos < 0 {
  135. return fmt.Sprintf("%s at line %d", self.Reason, self.Row)
  136. } else {
  137. return fmt.Sprintf("%s at %d:%d", self.Reason, self.Row, self.Pos + 1)
  138. }
  139. }
  140. type _Tokenizer struct {
  141. pos int
  142. row int
  143. src []rune
  144. }
  145. func (self *_Tokenizer) ch() rune {
  146. return self.src[self.pos]
  147. }
  148. func (self *_Tokenizer) eof() bool {
  149. return self.pos >= len(self.src)
  150. }
  151. func (self *_Tokenizer) rch() (ret rune) {
  152. ret, self.pos = self.src[self.pos], self.pos + 1
  153. return
  154. }
  155. func (self *_Tokenizer) err(pos int, msg string) *SyntaxError {
  156. return &SyntaxError {
  157. Pos : pos,
  158. Row : self.row,
  159. Src : self.src,
  160. Reason : msg,
  161. }
  162. }
  163. type _TrimState int
  164. const (
  165. _TS_normal _TrimState = iota
  166. _TS_slcomm
  167. _TS_hscomm
  168. _TS_string
  169. _TS_escape
  170. _TS_accept
  171. _TS_nolast
  172. )
  173. func (self *_Tokenizer) init(src string) {
  174. var i int
  175. var ch rune
  176. var st _TrimState
  177. /* set the source */
  178. self.pos = 0
  179. self.src = []rune(src)
  180. /* remove commends, including "//" and "##" */
  181. loop: for i, ch = range self.src {
  182. switch {
  183. case st == _TS_normal && ch == '/' : st = _TS_slcomm
  184. case st == _TS_normal && ch == '"' : st = _TS_string
  185. case st == _TS_normal && ch == ';' : st = _TS_accept; break loop
  186. case st == _TS_normal && ch == '#' : st = _TS_hscomm
  187. case st == _TS_slcomm && ch == '/' : st = _TS_nolast; break loop
  188. case st == _TS_slcomm : st = _TS_normal
  189. case st == _TS_hscomm && ch == '#' : st = _TS_nolast; break loop
  190. case st == _TS_hscomm : st = _TS_normal
  191. case st == _TS_string && ch == '"' : st = _TS_normal
  192. case st == _TS_string && ch == '\\' : st = _TS_escape
  193. case st == _TS_escape : st = _TS_string
  194. }
  195. }
  196. /* check for errors */
  197. switch st {
  198. case _TS_accept: self.src = self.src[:i]
  199. case _TS_nolast: self.src = self.src[:i - 1]
  200. case _TS_string: panic(self.err(i, "string is not terminated"))
  201. case _TS_escape: panic(self.err(i, "escape sequence is not terminated"))
  202. }
  203. }
  204. func (self *_Tokenizer) skip(check func(v rune) bool) {
  205. for !self.eof() && check(self.ch()) {
  206. self.pos++
  207. }
  208. }
  209. func (self *_Tokenizer) find(pos int, check func(v rune) bool) string {
  210. self.skip(check)
  211. return string(self.src[pos:self.pos])
  212. }
  213. func (self *_Tokenizer) chrv(p int) _Token {
  214. var err error
  215. var val uint64
  216. /* starting and ending position */
  217. p0 := p + 1
  218. p1 := p0 + 1
  219. /* find the end of the literal */
  220. for p1 < len(self.src) && self.src[p1] != '\'' {
  221. if p1++; self.src[p1 - 1] == '\\' {
  222. p1++
  223. }
  224. }
  225. /* empty literal */
  226. if p1 == p0 {
  227. panic(self.err(p1, "empty character constant"))
  228. }
  229. /* check for EOF */
  230. if p1 == len(self.src) {
  231. panic(self.err(p1, "unexpected EOF when scanning literals"))
  232. }
  233. /* parse the literal */
  234. if val, err = literal64(string(self.src[p0:p1])); err != nil {
  235. panic(self.err(p0, "cannot parse literal: " + err.Error()))
  236. }
  237. /* skip the closing '\'' */
  238. self.pos = p1 + 1
  239. return tokenInt(p, val)
  240. }
  241. func (self *_Tokenizer) numv(p int) _Token {
  242. if val, err := strconv.ParseUint(self.find(p, isnumber), 0, 64); err != nil {
  243. panic(self.err(p, "invalid immediate value: " + err.Error()))
  244. } else {
  245. return tokenInt(p, val)
  246. }
  247. }
  248. func (self *_Tokenizer) defv(p int, cc rune) _Token {
  249. if isdigit(cc) {
  250. return self.numv(p)
  251. } else if isident0(cc) {
  252. return tokenName(p, self.find(p, isident))
  253. } else {
  254. panic(self.err(p, "invalid char: " + strconv.QuoteRune(cc)))
  255. }
  256. }
  257. func (self *_Tokenizer) rep2(p int, pp _Punctuation, cc rune) _Token {
  258. if self.eof() {
  259. panic(self.err(self.pos, "unexpected EOF when scanning operators"))
  260. } else if c := self.rch(); c != cc {
  261. panic(self.err(p + 1, strconv.QuoteRune(cc) + " expected, got " + strconv.QuoteRune(c)))
  262. } else {
  263. return tokenPunc(p, pp)
  264. }
  265. }
  266. func (self *_Tokenizer) read() _Token {
  267. var p int
  268. var c rune
  269. var t _Token
  270. /* check for EOF */
  271. if self.eof() {
  272. return tokenEnd(self.pos, self.pos)
  273. }
  274. /* skip spaces as needed */
  275. if p = self.pos; unicode.IsSpace(self.src[p]) {
  276. self.skip(unicode.IsSpace)
  277. return tokenSpace(p, self.pos)
  278. }
  279. /* check for line comments */
  280. if p = self.pos; p < len(self.src) - 1 && self.src[p] == '/' && self.src[p + 1] == '/' {
  281. self.pos = len(self.src)
  282. return tokenEnd(p, self.pos)
  283. }
  284. /* read the next character */
  285. p = self.pos
  286. c = self.rch()
  287. /* parse the next character */
  288. switch c {
  289. case '+' : t = tokenPunc(p, _P_plus)
  290. case '-' : t = tokenPunc(p, _P_minus)
  291. case '*' : t = tokenPunc(p, _P_star)
  292. case '/' : t = tokenPunc(p, _P_slash)
  293. case '%' : t = tokenPunc(p, _P_percent)
  294. case '&' : t = tokenPunc(p, _P_amp)
  295. case '|' : t = tokenPunc(p, _P_bar)
  296. case '^' : t = tokenPunc(p, _P_caret)
  297. case '<' : t = self.rep2(p, _P_shl, '<')
  298. case '>' : t = self.rep2(p, _P_shr, '>')
  299. case '~' : t = tokenPunc(p, _P_tilde)
  300. case '(' : t = tokenPunc(p, _P_lbrk)
  301. case ')' : t = tokenPunc(p, _P_rbrk)
  302. case '.' : t = tokenPunc(p, _P_dot)
  303. case ',' : t = tokenPunc(p, _P_comma)
  304. case ':' : t = tokenPunc(p, _P_colon)
  305. case '$' : t = tokenPunc(p, _P_dollar)
  306. case '#' : t = tokenPunc(p, _P_hash)
  307. case '\'' : t = self.chrv(p)
  308. default : t = self.defv(p, c)
  309. }
  310. /* mark the end of token */
  311. t.end = self.pos
  312. return t
  313. }
  314. func (self *_Tokenizer) next() (tk _Token) {
  315. for {
  316. if tk = self.read(); tk.tag != _T_space {
  317. return
  318. }
  319. }
  320. }
  321. // LabelKind indicates the type of label reference.
  322. type LabelKind int
  323. // OperandKind indicates the type of the operand.
  324. type OperandKind int
  325. // InstructionPrefix indicates the prefix bytes prepended to the instruction.
  326. type InstructionPrefix byte
  327. const (
  328. // OpImm means the operand is an immediate value.
  329. OpImm OperandKind = 1 << iota
  330. // OpReg means the operand is a register.
  331. OpReg
  332. // OpMem means the operand is a memory address.
  333. OpMem
  334. // OpLabel means the operand is a label, specifically for
  335. // branch instructions.
  336. OpLabel
  337. )
  338. const (
  339. // Declaration means the label is a declaration.
  340. Declaration LabelKind = iota + 1
  341. // BranchTarget means the label should be treated as a branch target.
  342. BranchTarget
  343. // RelativeAddress means the label should be treated as a reference to
  344. // the code section (e.g. RIP-relative addressing).
  345. RelativeAddress
  346. )
  347. const (
  348. // PrefixLock causes the processor's LOCK# signal to be asserted during execution of
  349. // the accompanying instruction (turns the instruction into an atomic instruction).
  350. // In a multiprocessor environment, the LOCK# signal insures that the processor
  351. // has exclusive use of any shared memory while the signal is asserted.
  352. PrefixLock InstructionPrefix = iota
  353. // PrefixSegmentCS overrides the memory operation of this instruction to CS (Code Segment).
  354. PrefixSegmentCS
  355. // PrefixSegmentDS overrides the memory operation of this instruction to DS (Data Segment),
  356. // this is the default section for most instructions if not specified.
  357. PrefixSegmentDS
  358. // PrefixSegmentES overrides the memory operation of this instruction to ES (Extra Segment).
  359. PrefixSegmentES
  360. // PrefixSegmentFS overrides the memory operation of this instruction to FS.
  361. PrefixSegmentFS
  362. // PrefixSegmentGS overrides the memory operation of this instruction to GS.
  363. PrefixSegmentGS
  364. // PrefixSegmentSS overrides the memory operation of this instruction to SS (Stack Segment).
  365. PrefixSegmentSS
  366. )
  367. // ParsedLabel represents a label in the source, either a jump target or
  368. // an RIP-relative addressing.
  369. type ParsedLabel struct {
  370. Name string
  371. Kind LabelKind
  372. }
  373. // ParsedOperand represents an operand of an instruction in the source.
  374. type ParsedOperand struct {
  375. Op OperandKind
  376. Imm int64
  377. Reg Register
  378. Label ParsedLabel
  379. Memory MemoryAddress
  380. }
  381. // ParsedInstruction represents an instruction in the source.
  382. type ParsedInstruction struct {
  383. Mnemonic string
  384. Operands []ParsedOperand
  385. Prefixes []InstructionPrefix
  386. }
  387. func (self *ParsedInstruction) imm(v int64) {
  388. self.Operands = append(self.Operands, ParsedOperand {
  389. Op : OpImm,
  390. Imm : v,
  391. })
  392. }
  393. func (self *ParsedInstruction) reg(v Register) {
  394. self.Operands = append(self.Operands, ParsedOperand {
  395. Op : OpReg,
  396. Reg : v,
  397. })
  398. }
  399. func (self *ParsedInstruction) mem(v MemoryAddress) {
  400. self.Operands = append(self.Operands, ParsedOperand {
  401. Op : OpMem,
  402. Memory : v,
  403. })
  404. }
  405. func (self *ParsedInstruction) target(v string) {
  406. self.Operands = append(self.Operands, ParsedOperand {
  407. Op : OpLabel,
  408. Label : ParsedLabel {
  409. Name: v,
  410. Kind: BranchTarget,
  411. },
  412. })
  413. }
  414. func (self *ParsedInstruction) reference(v string) {
  415. self.Operands = append(self.Operands, ParsedOperand {
  416. Op : OpLabel,
  417. Label : ParsedLabel {
  418. Name: v,
  419. Kind: RelativeAddress,
  420. },
  421. })
  422. }
  423. // LineKind indicates the type of ParsedLine.
  424. type LineKind int
  425. const (
  426. // LineLabel means the ParsedLine is a label.
  427. LineLabel LineKind = iota + 1
  428. // LineInstr means the ParsedLine is an instruction.
  429. LineInstr
  430. // LineCommand means the ParsedLine is a ParsedCommand.
  431. LineCommand
  432. )
  433. // ParsedLine represents a parsed source line.
  434. type ParsedLine struct {
  435. Row int
  436. Src []rune
  437. Kind LineKind
  438. Label ParsedLabel
  439. Command ParsedCommand
  440. Instruction ParsedInstruction
  441. }
  442. // ParsedCommand represents a parsed assembly directive command.
  443. type ParsedCommand struct {
  444. Cmd string
  445. Args []ParsedCommandArg
  446. }
  447. // ParsedCommandArg represents an argument of a ParsedCommand.
  448. type ParsedCommandArg struct {
  449. Value string
  450. IsString bool
  451. }
  452. // Parser parses the source, and generates a sequence of ParsedInstruction's.
  453. type Parser struct {
  454. lex _Tokenizer
  455. exp expr.Parser
  456. }
  457. const (
  458. rip Register64 = 0xff
  459. )
  460. var _RegBranch = map[string]bool {
  461. "jmp" : true,
  462. "jmpq" : true,
  463. "call" : true,
  464. "callq" : true,
  465. }
  466. var _SegPrefix = map[string]InstructionPrefix {
  467. "cs": PrefixSegmentCS,
  468. "ds": PrefixSegmentDS,
  469. "es": PrefixSegmentES,
  470. "fs": PrefixSegmentFS,
  471. "gs": PrefixSegmentGS,
  472. "ss": PrefixSegmentSS,
  473. }
  474. func (self *Parser) i32(tk _Token, v int64) int32 {
  475. if v >= math.MinInt32 && v <= math.MaxUint32 {
  476. return int32(v)
  477. } else {
  478. panic(self.err(tk.pos, fmt.Sprintf("32-bit integer out ouf range: %d", v)))
  479. }
  480. }
  481. func (self *Parser) err(pos int, msg string) *SyntaxError {
  482. return &SyntaxError {
  483. Pos : pos,
  484. Row : self.lex.row,
  485. Src : self.lex.src,
  486. Reason : msg,
  487. }
  488. }
  489. func (self *Parser) negv() int64 {
  490. tk := self.lex.read()
  491. tt := tk.tag
  492. /* must be an integer */
  493. if tt != _T_int {
  494. panic(self.err(tk.pos, "integer expected after '-'"))
  495. } else {
  496. return -int64(tk.u64)
  497. }
  498. }
  499. func (self *Parser) eval(p int) (r int64) {
  500. var e error
  501. var v *expr.Expr
  502. /* searching start */
  503. n := 1
  504. q := p + 1
  505. /* find the end of expression */
  506. for n > 0 && q < len(self.lex.src) {
  507. switch self.lex.src[q] {
  508. case '(' : q++; n++
  509. case ')' : q++; n--
  510. default : q++
  511. }
  512. }
  513. /* check for EOF */
  514. if n != 0 {
  515. panic(self.err(q, "unexpected EOF when parsing expressions"))
  516. }
  517. /* evaluate the expression */
  518. if v, e = self.exp.SetSource(string(self.lex.src[p:q - 1])).Parse(nil); e != nil {
  519. panic(self.err(p, "cannot evaluate expression: " + e.Error()))
  520. }
  521. /* evaluate the expression */
  522. if r, e = v.Evaluate(); e != nil {
  523. panic(self.err(p, "cannot evaluate expression: " + e.Error()))
  524. }
  525. /* skip the last ')' */
  526. v.Free()
  527. self.lex.pos = q
  528. return
  529. }
  530. func (self *Parser) relx(tk _Token) {
  531. if tk.tag != _T_punc || tk.punc() != _P_lbrk {
  532. panic(self.err(tk.pos, "'(' expected for RIP-relative addressing"))
  533. } else if tk = self.lex.next(); self.regx(tk) != rip {
  534. panic(self.err(tk.pos, "RIP-relative addressing expects %rip as the base register"))
  535. } else if tk = self.lex.next(); tk.tag != _T_punc || tk.punc() != _P_rbrk {
  536. panic(self.err(tk.pos, "RIP-relative addressing does not support indexing or scaling"))
  537. }
  538. }
  539. func (self *Parser) immx(tk _Token) int64 {
  540. if tk.tag != _T_punc || tk.punc() != _P_dollar {
  541. panic(self.err(tk.pos, "'$' expected for registers"))
  542. } else if tk = self.lex.read(); tk.tag == _T_int {
  543. return int64(tk.u64)
  544. } else if tk.tag == _T_punc && tk.punc() == _P_lbrk {
  545. return self.eval(self.lex.pos)
  546. } else if tk.tag == _T_punc && tk.punc() == _P_minus {
  547. return self.negv()
  548. } else {
  549. panic(self.err(tk.pos, "immediate value expected"))
  550. }
  551. }
  552. func (self *Parser) regx(tk _Token) Register {
  553. if tk.tag != _T_punc || tk.punc() != _P_percent {
  554. panic(self.err(tk.pos, "'%' expected for registers"))
  555. } else if tk = self.lex.read(); tk.tag != _T_name {
  556. panic(self.err(tk.pos, "register name expected"))
  557. } else if tk.str == "rip" {
  558. return rip
  559. } else if reg, ok := Registers[tk.str]; ok {
  560. return reg
  561. } else {
  562. panic(self.err(tk.pos, "invalid register name: " + strconv.Quote(tk.str)))
  563. }
  564. }
  565. func (self *Parser) regv(tk _Token) Register {
  566. if reg := self.regx(tk); reg == rip {
  567. panic(self.err(tk.pos, "%rip is not accessable as a dedicated register"))
  568. } else {
  569. return reg
  570. }
  571. }
  572. func (self *Parser) disp(vv int32) MemoryAddress {
  573. switch tk := self.lex.next(); tk.tag {
  574. case _T_end : return MemoryAddress { Displacement: vv }
  575. case _T_punc : return self.relm(tk, vv)
  576. default : panic(self.err(tk.pos, "',' or '(' expected"))
  577. }
  578. }
  579. func (self *Parser) relm(tv _Token, disp int32) MemoryAddress {
  580. var tk _Token
  581. var tt _TokenKind
  582. /* check for absolute addressing */
  583. if tv.punc() == _P_comma {
  584. self.lex.pos--
  585. return MemoryAddress { Displacement: disp }
  586. }
  587. /* must be '(' now */
  588. if tv.punc() != _P_lbrk {
  589. panic(self.err(tv.pos, "',' or '(' expected"))
  590. }
  591. /* read the next token */
  592. tk = self.lex.next()
  593. tt = tk.tag
  594. /* must be a punctuation */
  595. if tt != _T_punc {
  596. panic(self.err(tk.pos, "'%' or ',' expected"))
  597. }
  598. /* check for base */
  599. switch tk.punc() {
  600. case _P_percent : return self.base(tk, disp)
  601. case _P_comma : return self.index(nil, disp)
  602. default : panic(self.err(tk.pos, "'%' or ',' expected"))
  603. }
  604. }
  605. func (self *Parser) base(tk _Token, disp int32) MemoryAddress {
  606. rr := self.regx(tk)
  607. nk := self.lex.next()
  608. /* check for register indirection or base-index addressing */
  609. if !isReg64(rr) {
  610. panic(self.err(tk.pos, "not a valid base register"))
  611. } else if nk.tag != _T_punc {
  612. panic(self.err(nk.pos, "',' or ')' expected"))
  613. } else if nk.punc() == _P_comma {
  614. return self.index(rr, disp)
  615. } else if nk.punc() == _P_rbrk {
  616. return MemoryAddress { Base: rr, Displacement: disp }
  617. } else {
  618. panic(self.err(nk.pos, "',' or ')' expected"))
  619. }
  620. }
  621. func (self *Parser) index(base Register, disp int32) MemoryAddress {
  622. tk := self.lex.next()
  623. rr := self.regx(tk)
  624. nk := self.lex.next()
  625. /* check for scaled indexing */
  626. if base == rip {
  627. panic(self.err(tk.pos, "RIP-relative addressing does not support indexing or scaling"))
  628. } else if !isIndexable(rr) {
  629. panic(self.err(tk.pos, "not a valid index register"))
  630. } else if nk.tag != _T_punc {
  631. panic(self.err(nk.pos, "',' or ')' expected"))
  632. } else if nk.punc() == _P_comma {
  633. return self.scale(base, rr, disp)
  634. } else if nk.punc() == _P_rbrk {
  635. return MemoryAddress { Base: base, Index: rr, Scale: 1, Displacement: disp }
  636. } else {
  637. panic(self.err(nk.pos, "',' or ')' expected"))
  638. }
  639. }
  640. func (self *Parser) scale(base Register, index Register, disp int32) MemoryAddress {
  641. tk := self.lex.next()
  642. tt := tk.tag
  643. tv := tk.u64
  644. /* must be an integer */
  645. if tt != _T_int {
  646. panic(self.err(tk.pos, "integer expected"))
  647. }
  648. /* scale can only be 1, 2, 4 or 8 */
  649. if tv == 0 || (_Scales & (1 << tv)) == 0 {
  650. panic(self.err(tk.pos, "scale can only be 1, 2, 4 or 8"))
  651. }
  652. /* read next token */
  653. tk = self.lex.next()
  654. tt = tk.tag
  655. /* check for the closing ')' */
  656. if tt != _T_punc || tk.punc() != _P_rbrk {
  657. panic(self.err(tk.pos, "')' expected"))
  658. }
  659. /* construct the memory address */
  660. return MemoryAddress {
  661. Base : base,
  662. Index : index,
  663. Scale : uint8(tv),
  664. Displacement : disp,
  665. }
  666. }
  667. func (self *Parser) cmds() *ParsedLine {
  668. cmd := ""
  669. pos := self.lex.pos
  670. buf := []ParsedCommandArg(nil)
  671. /* find the end of command */
  672. for p := pos; pos < len(self.lex.src); pos++ {
  673. if unicode.IsSpace(self.lex.src[pos]) {
  674. cmd = string(self.lex.src[p:pos])
  675. break
  676. }
  677. }
  678. /* parse the arguments */
  679. loop: for {
  680. switch self.next(&pos) {
  681. case 0 : break loop
  682. case '#' : break loop
  683. case '"' : pos = self.strings(&buf, pos)
  684. default : pos = self.expressions(&buf, pos)
  685. }
  686. }
  687. /* construct the line */
  688. return &ParsedLine {
  689. Row : self.lex.row,
  690. Src : self.lex.src,
  691. Kind : LineCommand,
  692. Command : ParsedCommand {
  693. Cmd : cmd,
  694. Args : buf,
  695. },
  696. }
  697. }
  698. func (self *Parser) feed(line string) *ParsedLine {
  699. ff := true
  700. rr := false
  701. lk := false
  702. /* reset the lexer */
  703. self.lex.row++
  704. self.lex.init(line)
  705. /* parse the first token */
  706. tk := self.lex.next()
  707. tt := tk.tag
  708. /* it is a directive if it starts with a dot */
  709. if tk.tag == _T_punc && tk.punc() == _P_dot {
  710. return self.cmds()
  711. }
  712. /* otherwise it could be labels or instructions */
  713. if tt != _T_name {
  714. panic(self.err(tk.pos, "identifier expected"))
  715. }
  716. /* peek the next token */
  717. lex := self.lex
  718. tkx := lex.next()
  719. /* check for labels */
  720. if tkx.tag == _T_punc && tkx.punc() == _P_colon {
  721. tkx = lex.next()
  722. ttx := tkx.tag
  723. /* the line must end here */
  724. if ttx != _T_end {
  725. panic(self.err(tkx.pos, "garbage after label definition"))
  726. }
  727. /* construct the label */
  728. return &ParsedLine {
  729. Row : self.lex.row,
  730. Src : self.lex.src,
  731. Kind : LineLabel,
  732. Label : ParsedLabel {
  733. Kind: Declaration,
  734. Name: tk.str,
  735. },
  736. }
  737. }
  738. /* special case for the "lock" prefix */
  739. if tk.tag == _T_name && strings.ToLower(tk.str) == "lock" {
  740. lk = true
  741. tk = self.lex.next()
  742. /* must be an instruction */
  743. if tk.tag != _T_name {
  744. panic(self.err(tk.pos, "identifier expected"))
  745. }
  746. }
  747. /* set the line kind and mnemonic */
  748. ret := &ParsedLine {
  749. Row : self.lex.row,
  750. Src : self.lex.src,
  751. Kind : LineInstr,
  752. Instruction : ParsedInstruction { Mnemonic: strings.ToLower(tk.str) },
  753. }
  754. /* check for LOCK prefix */
  755. if lk {
  756. ret.Instruction.Prefixes = append(ret.Instruction.Prefixes, PrefixLock)
  757. }
  758. /* parse all the operands */
  759. for {
  760. tk = self.lex.next()
  761. tt = tk.tag
  762. /* check for end of line */
  763. if tt == _T_end {
  764. break
  765. }
  766. /* expect a comma if not the first operand */
  767. if !ff {
  768. if tt == _T_punc && tk.punc() == _P_comma {
  769. tk = self.lex.next()
  770. } else {
  771. panic(self.err(tk.pos, "',' expected"))
  772. }
  773. }
  774. /* not the first operand anymore */
  775. ff = false
  776. tt = tk.tag
  777. /* encountered an integer, must be a SIB memory address */
  778. if tt == _T_int {
  779. ret.Instruction.mem(self.disp(self.i32(tk, int64(tk.u64))))
  780. continue
  781. }
  782. /* encountered an identifier, maybe an expression or a jump target, or a segment override prefix */
  783. if tt == _T_name {
  784. ts := tk.str
  785. tp := self.lex.pos
  786. /* if the next token is EOF or a comma, it's a jumpt target */
  787. if tk = self.lex.next(); tk.tag == _T_end || (tk.tag == _T_punc && tk.punc() == _P_comma) {
  788. self.lex.pos = tp
  789. ret.Instruction.target(ts)
  790. continue
  791. }
  792. /* if it is a colon, it's a segment override prefix, otherwise it must be an RIP-relative addressing operand */
  793. if tk.tag != _T_punc || tk.punc() != _P_colon {
  794. self.relx(tk)
  795. ret.Instruction.reference(ts)
  796. continue
  797. }
  798. /* lookup segment prefixes */
  799. if p, ok := _SegPrefix[strings.ToLower(ts)]; !ok {
  800. panic(self.err(tk.pos, "invalid segment name"))
  801. } else {
  802. ret.Instruction.Prefixes = append(ret.Instruction.Prefixes, p)
  803. }
  804. /* read the next token */
  805. tk = self.lex.next()
  806. tt = tk.tag
  807. /* encountered an integer, must be a SIB memory address */
  808. if tt == _T_int {
  809. ret.Instruction.mem(self.disp(self.i32(tk, int64(tk.u64))))
  810. continue
  811. }
  812. }
  813. /* certain instructions may have a "*" before operands */
  814. if tt == _T_punc && tk.punc() == _P_star {
  815. tk = self.lex.next()
  816. tt = tk.tag
  817. rr = true
  818. }
  819. /* ... otherwise it must be a punctuation */
  820. if tt != _T_punc {
  821. panic(self.err(tk.pos, "'$', '%', '-' or '(' expected"))
  822. }
  823. /* check the operator */
  824. switch tk.punc() {
  825. case _P_lbrk : break
  826. case _P_minus : ret.Instruction.mem(self.disp(self.i32(tk, self.negv()))) ; continue
  827. case _P_dollar : ret.Instruction.imm(self.immx(tk)) ; continue
  828. case _P_percent : ret.Instruction.reg(self.regv(tk)) ; continue
  829. default : panic(self.err(tk.pos, "'$', '%', '-' or '(' expected"))
  830. }
  831. /* special case of '(', might be either `(expr)(SIB)` or just `(SIB)`
  832. * read one more token to confirm */
  833. tk = self.lex.next()
  834. tt = tk.tag
  835. /* the next token is '%', it's a memory address,
  836. * or ',' if it's a memory address without base,
  837. * otherwise it must be in `(expr)(SIB)` form */
  838. if tk.tag == _T_punc && tk.punc() == _P_percent {
  839. ret.Instruction.mem(self.base(tk, 0))
  840. } else if tk.tag == _T_punc && tk.punc() == _P_comma {
  841. ret.Instruction.mem(self.index(nil, 0))
  842. } else {
  843. ret.Instruction.mem(self.disp(self.i32(tk, self.eval(tk.pos))))
  844. }
  845. }
  846. /* check "jmp" and "call" instructions */
  847. if !_RegBranch[ret.Instruction.Mnemonic] {
  848. return ret
  849. } else if len(ret.Instruction.Operands) != 1 {
  850. panic(self.err(tk.pos, fmt.Sprintf(`"%s" requires exact 1 argument`, ret.Instruction.Mnemonic)))
  851. } else if !rr && ret.Instruction.Operands[0].Op != OpReg && ret.Instruction.Operands[0].Op != OpLabel {
  852. panic(self.err(tk.pos, fmt.Sprintf(`invalid operand for "%s" instruction`, ret.Instruction.Mnemonic)))
  853. } else {
  854. return ret
  855. }
  856. }
  857. func (self *Parser) next(p *int) rune {
  858. for {
  859. if *p >= len(self.lex.src) {
  860. return 0
  861. } else if cc := self.lex.src[*p]; !unicode.IsSpace(cc) {
  862. return cc
  863. } else {
  864. *p++
  865. }
  866. }
  867. }
  868. func (self *Parser) delim(p int) int {
  869. if cc := self.next(&p); cc == 0 {
  870. return p
  871. } else if cc == ',' {
  872. return p + 1
  873. } else {
  874. panic(self.err(p, "',' expected"))
  875. }
  876. }
  877. func (self *Parser) strings(argv *[]ParsedCommandArg, p int) int {
  878. var i int
  879. var e error
  880. var v string
  881. /* find the end of string */
  882. for i = p + 1; i < len(self.lex.src) && self.lex.src[i] != '"'; i++ {
  883. if self.lex.src[i] == '\\' {
  884. i++
  885. }
  886. }
  887. /* check for EOF */
  888. if i == len(self.lex.src) {
  889. panic(self.err(i, "unexpected EOF when scanning strings"))
  890. }
  891. /* unquote the string */
  892. if v, e = strconv.Unquote(string(self.lex.src[p:i + 1])); e != nil {
  893. panic(self.err(p, "invalid string: " + e.Error()))
  894. }
  895. /* add the argument to buffer */
  896. *argv = append(*argv, ParsedCommandArg { Value: v, IsString: true })
  897. return self.delim(i + 1)
  898. }
  899. func (self *Parser) directives(line string) {
  900. self.lex.row++
  901. self.lex.init(line)
  902. /* parse the first token */
  903. tk := self.lex.next()
  904. tt := tk.tag
  905. /* check for EOF */
  906. if tt == _T_end {
  907. return
  908. }
  909. /* must be a directive */
  910. if tt != _T_punc || tk.punc() != _P_hash {
  911. panic(self.err(tk.pos, "'#' expected"))
  912. }
  913. /* parse the line number */
  914. tk = self.lex.next()
  915. tt = tk.tag
  916. /* must be a line number, if it is, set the row number, and ignore the rest of the line */
  917. if tt != _T_int {
  918. panic(self.err(tk.pos, "line number expected"))
  919. } else {
  920. self.lex.row = int(tk.u64) - 1
  921. }
  922. }
  923. func (self *Parser) expressions(argv *[]ParsedCommandArg, p int) int {
  924. var i int
  925. var n int
  926. var s int
  927. /* scan until the first standalone ',' or EOF */
  928. loop: for i = p; i < len(self.lex.src); i++ {
  929. switch self.lex.src[i] {
  930. case ',' : if s == 0 { if n == 0 { break loop } }
  931. case ']', '}', '>' : if s == 0 { if n == 0 { break loop } else { n-- } }
  932. case '[', '{', '<' : if s == 0 { n++ }
  933. case '\\' : if s != 0 { i++ }
  934. case '\'' : if s != 2 { s ^= 1 }
  935. case '"' : if s != 1 { s ^= 2 }
  936. }
  937. }
  938. /* check for EOF in strings */
  939. if s != 0 {
  940. panic(self.err(i, "unexpected EOF when scanning strings"))
  941. }
  942. /* check for bracket matching */
  943. if n != 0 {
  944. panic(self.err(i, "unbalanced '{' or '[' or '<'"))
  945. }
  946. /* add the argument to buffer */
  947. *argv = append(*argv, ParsedCommandArg { Value: string(self.lex.src[p:i]) })
  948. return self.delim(i)
  949. }
  950. // Feed feeds the parser with one more line, and the parser
  951. // parses it into a ParsedLine.
  952. //
  953. // NOTE: Feed does not handle empty lines or multiple lines,
  954. // it panics when this happens. Use Parse to parse multiple
  955. // lines of assembly source.
  956. //
  957. func (self *Parser) Feed(src string) (ret *ParsedLine, err error) {
  958. var ok bool
  959. var ss string
  960. var vv interface{}
  961. /* check for multiple lines */
  962. if strings.ContainsRune(src, '\n') {
  963. return nil, errors.New("passing multiple lines to Feed()")
  964. }
  965. /* check for blank lines */
  966. if ss = strings.TrimSpace(src); ss == "" || ss[0] == '#' || strings.HasPrefix(ss, "//") {
  967. return nil, errors.New("blank line or line with only comments or line-marks")
  968. }
  969. /* setup error handler */
  970. defer func() {
  971. if vv = recover(); vv != nil {
  972. if err, ok = vv.(*SyntaxError); !ok {
  973. panic(vv)
  974. }
  975. }
  976. }()
  977. /* call the actual parser */
  978. ret = self.feed(src)
  979. return
  980. }
  981. // Parse parses the entire assembly source (possibly multiple lines) into
  982. // a sequence of *ParsedLine.
  983. func (self *Parser) Parse(src string) (ret []*ParsedLine, err error) {
  984. var ok bool
  985. var ss string
  986. var vv interface{}
  987. /* setup error handler */
  988. defer func() {
  989. if vv = recover(); vv != nil {
  990. if err, ok = vv.(*SyntaxError); !ok {
  991. panic(vv)
  992. }
  993. }
  994. }()
  995. /* feed every line */
  996. for _, line := range strings.Split(src, "\n") {
  997. if ss = strings.TrimSpace(line); ss == "" || strings.HasPrefix(ss, "//") {
  998. self.lex.row++
  999. } else if ss[0] == '#' {
  1000. self.directives(line)
  1001. } else {
  1002. ret = append(ret, self.feed(line))
  1003. }
  1004. }
  1005. /* all done */
  1006. err = nil
  1007. return
  1008. }
  1009. // Directive handles the directive.
  1010. func (self *Parser) Directive(line string) (err error) {
  1011. var ok bool
  1012. var ss string
  1013. var vv interface{}
  1014. /* check for directives */
  1015. if ss = strings.TrimSpace(line); ss == "" || ss[0] != '#' {
  1016. return errors.New("not a directive")
  1017. }
  1018. /* setup error handler */
  1019. defer func() {
  1020. if vv = recover(); vv != nil {
  1021. if err, ok = vv.(*SyntaxError); !ok {
  1022. panic(vv)
  1023. }
  1024. }
  1025. }()
  1026. /* call the directive parser */
  1027. self.directives(line)
  1028. return
  1029. }
  1030. type _TermRepo struct {
  1031. terms map[string]expr.Term
  1032. }
  1033. func (self *_TermRepo) Get(name string) (expr.Term, error) {
  1034. if ret, ok := self.terms[name]; ok {
  1035. return ret, nil
  1036. } else {
  1037. return nil, errors.New("undefined name: " + name)
  1038. }
  1039. }
  1040. func (self *_TermRepo) label(name string) (*Label, error) {
  1041. var ok bool
  1042. var lb *Label
  1043. var tr expr.Term
  1044. /* check for existing terms */
  1045. if tr, ok = self.terms[name]; ok {
  1046. if lb, ok = tr.(*Label); ok {
  1047. return lb, nil
  1048. } else {
  1049. return nil, errors.New("name is not a label: " + name)
  1050. }
  1051. }
  1052. /* create a new one as needed */
  1053. lb = new(Label)
  1054. lb.Name = name
  1055. /* create the map if needed */
  1056. if self.terms == nil {
  1057. self.terms = make(map[string]expr.Term, 1)
  1058. }
  1059. /* register the label */
  1060. self.terms[name] = lb
  1061. return lb, nil
  1062. }
  1063. func (self *_TermRepo) define(name string, term expr.Term) {
  1064. var ok bool
  1065. var tr expr.Term
  1066. /* create the map if needed */
  1067. if self.terms == nil {
  1068. self.terms = make(map[string]expr.Term, 1)
  1069. }
  1070. /* check for existing terms */
  1071. if tr, ok = self.terms[name]; !ok {
  1072. self.terms[name] = term
  1073. } else if _, ok = tr.(*Label); !ok {
  1074. self.terms[name] = term
  1075. } else {
  1076. panic("conflicting term types: " + name)
  1077. }
  1078. }
  1079. // _Command describes an assembler command.
  1080. //
  1081. // The _Command.args describes both the arity and argument type with characters,
  1082. // the length is the number of arguments, the character itself represents the
  1083. // argument type.
  1084. //
  1085. // Possible values are:
  1086. //
  1087. // s This argument should be a string
  1088. // e This argument should be an expression
  1089. // ? The next argument is optional, and must be the last argument.
  1090. //
  1091. type _Command struct {
  1092. args string
  1093. handler func(*Assembler, *Program, []ParsedCommandArg) error
  1094. }
  1095. // Options controls the behavior of Assembler.
  1096. type Options struct {
  1097. // InstructionAliasing specifies whether to enable instruction aliasing.
  1098. // Set to true enables instruction aliasing, and the Assembler will try harder to find instructions.
  1099. InstructionAliasing bool
  1100. // IgnoreUnknownDirectives specifies whether to report errors when encountered unknown directives.
  1101. // Set to true ignores all unknwon directives silently, useful for parsing generated assembly.
  1102. IgnoreUnknownDirectives bool
  1103. }
  1104. // Assembler assembles the entire assembly program and generates the corresponding
  1105. // machine code representations.
  1106. type Assembler struct {
  1107. cc int
  1108. ps Parser
  1109. pc uintptr
  1110. buf []byte
  1111. main string
  1112. opts Options
  1113. repo _TermRepo
  1114. expr expr.Parser
  1115. line *ParsedLine
  1116. }
  1117. var asmCommands = map[string]_Command {
  1118. "org" : { "e" , (*Assembler).assembleCommandOrg },
  1119. "set" : { "ee" , (*Assembler).assembleCommandSet },
  1120. "byte" : { "e" , (*Assembler).assembleCommandByte },
  1121. "word" : { "e" , (*Assembler).assembleCommandWord },
  1122. "long" : { "e" , (*Assembler).assembleCommandLong },
  1123. "quad" : { "e" , (*Assembler).assembleCommandQuad },
  1124. "fill" : { "e?e" , (*Assembler).assembleCommandFill },
  1125. "space" : { "e?e" , (*Assembler).assembleCommandFill },
  1126. "align" : { "e?e" , (*Assembler).assembleCommandAlign },
  1127. "entry" : { "e" , (*Assembler).assembleCommandEntry },
  1128. "ascii" : { "s" , (*Assembler).assembleCommandAscii },
  1129. "asciz" : { "s" , (*Assembler).assembleCommandAsciz },
  1130. "p2align" : { "e?e" , (*Assembler).assembleCommandP2Align },
  1131. }
  1132. func (self *Assembler) err(msg string) *SyntaxError {
  1133. return &SyntaxError {
  1134. Pos : -1,
  1135. Row : self.line.Row,
  1136. Src : self.line.Src,
  1137. Reason : msg,
  1138. }
  1139. }
  1140. func (self *Assembler) eval(expr string) (int64, error) {
  1141. if exp, err := self.expr.SetSource(expr).Parse(nil); err != nil {
  1142. return 0, err
  1143. } else {
  1144. return exp.Evaluate()
  1145. }
  1146. }
  1147. func (self *Assembler) checkArgs(i int, n int, v *ParsedCommand, isString bool) error {
  1148. if i >= len(v.Args) {
  1149. return self.err(fmt.Sprintf("command %s takes exact %d arguments", strconv.Quote(v.Cmd), n))
  1150. } else if isString && !v.Args[i].IsString {
  1151. return self.err(fmt.Sprintf("argument %d of command %s must be a string", i + 1, strconv.Quote(v.Cmd)))
  1152. } else if !isString && v.Args[i].IsString {
  1153. return self.err(fmt.Sprintf("argument %d of command %s must be an expression", i + 1, strconv.Quote(v.Cmd)))
  1154. } else {
  1155. return nil
  1156. }
  1157. }
  1158. func (self *Assembler) assembleLabel(p *Program, lb *ParsedLabel) error {
  1159. if v, err := self.repo.label(lb.Name); err != nil {
  1160. return err
  1161. } else {
  1162. p.Link(v)
  1163. return nil
  1164. }
  1165. }
  1166. func (self *Assembler) assembleInstr(p *Program, line *ParsedInstruction) (err error) {
  1167. var ok bool
  1168. var pfx []byte
  1169. var ops []interface{}
  1170. var enc _InstructionEncoder
  1171. /* convert to lower-case */
  1172. opts := self.opts
  1173. name := strings.ToLower(line.Mnemonic)
  1174. /* fix register-addressing branches if needed */
  1175. if opts.InstructionAliasing && len(line.Operands) == 1 {
  1176. switch {
  1177. case name == "retq" : name = "ret"
  1178. case name == "movabsq" : name = "movq"
  1179. case name == "jmp" && line.Operands[0].Op != OpLabel : name = "jmpq"
  1180. case name == "jmpq" && line.Operands[0].Op == OpLabel : name = "jmp"
  1181. case name == "call" && line.Operands[0].Op != OpLabel : name = "callq"
  1182. case name == "callq" && line.Operands[0].Op == OpLabel : name = "call"
  1183. }
  1184. }
  1185. /* lookup from the alias table if needed */
  1186. if opts.InstructionAliasing {
  1187. enc, ok = _InstructionAliases[name]
  1188. }
  1189. /* lookup from the instruction table */
  1190. if !ok {
  1191. enc, ok = Instructions[name]
  1192. }
  1193. /* remove size suffix if possible */
  1194. if !ok && opts.InstructionAliasing {
  1195. switch i := len(name) - 1; name[i] {
  1196. case 'b', 'w', 'l', 'q': {
  1197. enc, ok = Instructions[name[:i]]
  1198. }
  1199. }
  1200. }
  1201. /* check for instruction name */
  1202. if !ok {
  1203. return self.err("no such instruction: " + strconv.Quote(name))
  1204. }
  1205. /* allocate memory for prefix if any */
  1206. if len(line.Prefixes) != 0 {
  1207. pfx = make([]byte, len(line.Prefixes))
  1208. }
  1209. /* convert the prefixes */
  1210. for i, v := range line.Prefixes {
  1211. switch v {
  1212. case PrefixLock : pfx[i] = _P_lock
  1213. case PrefixSegmentCS : pfx[i] = _P_cs
  1214. case PrefixSegmentDS : pfx[i] = _P_ds
  1215. case PrefixSegmentES : pfx[i] = _P_es
  1216. case PrefixSegmentFS : pfx[i] = _P_fs
  1217. case PrefixSegmentGS : pfx[i] = _P_gs
  1218. case PrefixSegmentSS : pfx[i] = _P_ss
  1219. default : panic("unreachable: invalid segment prefix")
  1220. }
  1221. }
  1222. /* convert the operands */
  1223. for _, op := range line.Operands {
  1224. switch op.Op {
  1225. case OpImm : ops = append(ops, op.Imm)
  1226. case OpReg : ops = append(ops, op.Reg)
  1227. case OpMem : self.assembleInstrMem(&ops, op.Memory)
  1228. case OpLabel : self.assembleInstrLabel(&ops, op.Label)
  1229. default : panic("parser yields an invalid operand kind")
  1230. }
  1231. }
  1232. /* catch any exceptions in the encoder */
  1233. defer func() {
  1234. if v := recover(); v != nil {
  1235. err = self.err(fmt.Sprint(v))
  1236. }
  1237. }()
  1238. /* encode the instruction */
  1239. enc(p, ops...).prefix = pfx
  1240. return nil
  1241. }
  1242. func (self *Assembler) assembleInstrMem(ops *[]interface{}, addr MemoryAddress) {
  1243. mem := new(MemoryOperand)
  1244. *ops = append(*ops, mem)
  1245. /* check for RIP-relative addressing */
  1246. if addr.Base != rip {
  1247. mem.Addr.Type = Memory
  1248. mem.Addr.Memory = addr
  1249. } else {
  1250. mem.Addr.Type = Offset
  1251. mem.Addr.Offset = RelativeOffset(addr.Displacement)
  1252. }
  1253. }
  1254. func (self *Assembler) assembleInstrLabel(ops *[]interface{}, label ParsedLabel) {
  1255. vk := label.Kind
  1256. tr, err := self.repo.label(label.Name)
  1257. /* check for errors */
  1258. if err != nil {
  1259. panic(err)
  1260. }
  1261. /* check for branch target */
  1262. if vk == BranchTarget {
  1263. *ops = append(*ops, tr)
  1264. return
  1265. }
  1266. /* add to ops */
  1267. *ops = append(*ops, &MemoryOperand {
  1268. Addr: Addressable {
  1269. Type : Reference,
  1270. Reference : tr,
  1271. },
  1272. })
  1273. }
  1274. func (self *Assembler) assembleCommand(p *Program, line *ParsedCommand) error {
  1275. var iv int
  1276. var cc rune
  1277. var ok bool
  1278. var va bool
  1279. var fn _Command
  1280. /* find the command */
  1281. if fn, ok = asmCommands[line.Cmd]; !ok {
  1282. if self.opts.IgnoreUnknownDirectives {
  1283. return nil
  1284. } else {
  1285. return self.err("no such command: " + strconv.Quote(line.Cmd))
  1286. }
  1287. }
  1288. /* expected & real argument count */
  1289. argx := len(fn.args)
  1290. argc := len(line.Args)
  1291. /* check the arguments */
  1292. loop: for iv, cc = range fn.args {
  1293. switch cc {
  1294. case '?' : va = true; break loop
  1295. case 's' : if err := self.checkArgs(iv, argx, line, true) ; err != nil { return err }
  1296. case 'e' : if err := self.checkArgs(iv, argx, line, false) ; err != nil { return err }
  1297. default : panic("invalid argument descriptor: " + strconv.Quote(fn.args))
  1298. }
  1299. }
  1300. /* simple case: non-variadic command */
  1301. if !va {
  1302. if argc == argx {
  1303. return fn.handler(self, p, line.Args)
  1304. } else {
  1305. return self.err(fmt.Sprintf("command %s takes exact %d arguments", strconv.Quote(line.Cmd), argx))
  1306. }
  1307. }
  1308. /* check for the descriptor */
  1309. if iv != argx - 2 {
  1310. panic("invalid argument descriptor: " + strconv.Quote(fn.args))
  1311. }
  1312. /* variadic command and the final optional argument is set */
  1313. if argc == argx - 1 {
  1314. switch fn.args[argx - 1] {
  1315. case 's' : if err := self.checkArgs(iv, -1, line, true) ; err != nil { return err }
  1316. case 'e' : if err := self.checkArgs(iv, -1, line, false) ; err != nil { return err }
  1317. default : panic("invalid argument descriptor: " + strconv.Quote(fn.args))
  1318. }
  1319. }
  1320. /* check argument count */
  1321. if argc == argx - 1 || argc == argx - 2 {
  1322. return fn.handler(self, p, line.Args)
  1323. } else {
  1324. return self.err(fmt.Sprintf("command %s takes %d or %d arguments", strconv.Quote(line.Cmd), argx - 2, argx - 1))
  1325. }
  1326. }
  1327. func (self *Assembler) assembleCommandInt(p *Program, argv []ParsedCommandArg, addfn func(*Program, *expr.Expr) *Instruction) error {
  1328. var err error
  1329. var val *expr.Expr
  1330. /* parse the expression */
  1331. if val, err = self.expr.SetSource(argv[0].Value).Parse(&self.repo); err != nil {
  1332. return err
  1333. }
  1334. /* add to the program */
  1335. addfn(p, val)
  1336. return nil
  1337. }
  1338. func (self *Assembler) assembleCommandOrg(_ *Program, argv []ParsedCommandArg) error {
  1339. var err error
  1340. var val int64
  1341. /* evaluate the expression */
  1342. if val, err = self.eval(argv[0].Value); err != nil {
  1343. return err
  1344. }
  1345. /* check for origin */
  1346. if val < 0 {
  1347. return self.err(fmt.Sprintf("negative origin: %d", val))
  1348. }
  1349. /* ".org" must be the first command if any */
  1350. if self.cc != 1 {
  1351. return self.err(".org must be the first command if present")
  1352. }
  1353. /* set the initial program counter */
  1354. self.pc = uintptr(val)
  1355. return nil
  1356. }
  1357. func (self *Assembler) assembleCommandSet(_ *Program, argv []ParsedCommandArg) error {
  1358. var err error
  1359. var val *expr.Expr
  1360. /* parse the expression */
  1361. if val, err = self.expr.SetSource(argv[1].Value).Parse(&self.repo); err != nil {
  1362. return err
  1363. }
  1364. /* define the new identifier */
  1365. self.repo.define(argv[0].Value, val)
  1366. return nil
  1367. }
  1368. func (self *Assembler) assembleCommandByte(p *Program, argv []ParsedCommandArg) error {
  1369. return self.assembleCommandInt(p, argv, (*Program).Byte)
  1370. }
  1371. func (self *Assembler) assembleCommandWord(p *Program, argv []ParsedCommandArg) error {
  1372. return self.assembleCommandInt(p, argv, (*Program).Word)
  1373. }
  1374. func (self *Assembler) assembleCommandLong(p *Program, argv []ParsedCommandArg) error {
  1375. return self.assembleCommandInt(p, argv, (*Program).Long)
  1376. }
  1377. func (self *Assembler) assembleCommandQuad(p *Program, argv []ParsedCommandArg) error {
  1378. return self.assembleCommandInt(p, argv, (*Program).Quad)
  1379. }
  1380. func (self *Assembler) assembleCommandFill(p *Program, argv []ParsedCommandArg) error {
  1381. var fv byte
  1382. var nb int64
  1383. var ex error
  1384. /* evaluate the size */
  1385. if nb, ex = self.eval(argv[0].Value); ex != nil {
  1386. return ex
  1387. }
  1388. /* check for filling size */
  1389. if nb < 0 {
  1390. return self.err(fmt.Sprintf("negative filling size: %d", nb))
  1391. }
  1392. /* check for optional filling value */
  1393. if len(argv) == 2 {
  1394. if val, err := self.eval(argv[1].Value); err != nil {
  1395. return err
  1396. } else if val < math.MinInt8 || val > math.MaxUint8 {
  1397. return self.err(fmt.Sprintf("value %d cannot be represented with a byte", val))
  1398. } else {
  1399. fv = byte(val)
  1400. }
  1401. }
  1402. /* fill with specified byte */
  1403. p.Data(bytes.Repeat([]byte { fv }, int(nb)))
  1404. return nil
  1405. }
  1406. func (self *Assembler) assembleCommandAlign(p *Program, argv []ParsedCommandArg) error {
  1407. var nb int64
  1408. var ex error
  1409. var fv *expr.Expr
  1410. /* evaluate the size */
  1411. if nb, ex = self.eval(argv[0].Value); ex != nil {
  1412. return ex
  1413. }
  1414. /* check for alignment value */
  1415. if nb <= 0 {
  1416. return self.err(fmt.Sprintf("zero or negative alignment: %d", nb))
  1417. }
  1418. /* alignment must be a power of 2 */
  1419. if (nb & (nb - 1)) != 0 {
  1420. return self.err(fmt.Sprintf("alignment must be a power of 2: %d", nb))
  1421. }
  1422. /* check for optional filling value */
  1423. if len(argv) == 2 {
  1424. if v, err := self.expr.SetSource(argv[1].Value).Parse(&self.repo); err == nil {
  1425. fv = v
  1426. } else {
  1427. return err
  1428. }
  1429. }
  1430. /* fill with specified byte, default to 0 if not specified */
  1431. p.Align(uint64(nb), fv)
  1432. return nil
  1433. }
  1434. func (self *Assembler) assembleCommandEntry(_ *Program, argv []ParsedCommandArg) error {
  1435. name := argv[0].Value
  1436. rbuf := []rune(name)
  1437. /* check all the characters */
  1438. for i, cc := range rbuf {
  1439. if !isident0(cc) && (i == 0 || !isident(cc)) {
  1440. return self.err("entry point must be a label name")
  1441. }
  1442. }
  1443. /* set the main entry point */
  1444. self.main = name
  1445. return nil
  1446. }
  1447. func (self *Assembler) assembleCommandAscii(p *Program, argv []ParsedCommandArg) error {
  1448. p.Data([]byte(argv[0].Value))
  1449. return nil
  1450. }
  1451. func (self *Assembler) assembleCommandAsciz(p *Program, argv []ParsedCommandArg) error {
  1452. p.Data(append([]byte(argv[0].Value), 0))
  1453. return nil
  1454. }
  1455. func (self *Assembler) assembleCommandP2Align(p *Program, argv []ParsedCommandArg) error {
  1456. var nb int64
  1457. var ex error
  1458. var fv *expr.Expr
  1459. /* evaluate the size */
  1460. if nb, ex = self.eval(argv[0].Value); ex != nil {
  1461. return ex
  1462. }
  1463. /* check for alignment value */
  1464. if nb <= 0 {
  1465. return self.err(fmt.Sprintf("zero or negative alignment: %d", nb))
  1466. }
  1467. /* check for optional filling value */
  1468. if len(argv) == 2 {
  1469. if v, err := self.expr.SetSource(argv[1].Value).Parse(&self.repo); err == nil {
  1470. fv = v
  1471. } else {
  1472. return err
  1473. }
  1474. }
  1475. /* fill with specified byte, default to 0 if not specified */
  1476. p.Align(1 << nb, fv)
  1477. return nil
  1478. }
  1479. // Base returns the origin.
  1480. func (self *Assembler) Base() uintptr {
  1481. return self.pc
  1482. }
  1483. // Code returns the assembled machine code.
  1484. func (self *Assembler) Code() []byte {
  1485. return self.buf
  1486. }
  1487. // Entry returns the address of the specified entry point, or the origin if not specified.
  1488. func (self *Assembler) Entry() uintptr {
  1489. if self.main == "" {
  1490. return self.pc
  1491. } else if tr, err := self.repo.Get(self.main); err != nil {
  1492. panic(err)
  1493. } else if val, err := tr.Evaluate(); err != nil {
  1494. panic(err)
  1495. } else {
  1496. return uintptr(val)
  1497. }
  1498. }
  1499. // Options returns the internal options reference, changing it WILL affect this Assembler instance.
  1500. func (self *Assembler) Options() *Options {
  1501. return &self.opts
  1502. }
  1503. // WithBase resets the origin to pc.
  1504. func (self *Assembler) WithBase(pc uintptr) *Assembler {
  1505. self.pc = pc
  1506. return self
  1507. }
  1508. // Assemble assembles the assembly source and save the machine code to internal buffer.
  1509. func (self *Assembler) Assemble(src string) error {
  1510. var err error
  1511. var buf []*ParsedLine
  1512. /* parse the source */
  1513. if buf, err = self.ps.Parse(src); err != nil {
  1514. return err
  1515. }
  1516. /* create a new program */
  1517. p := DefaultArch.CreateProgram()
  1518. defer p.Free()
  1519. /* process every line */
  1520. for _, self.line = range buf {
  1521. switch self.cc++; self.line.Kind {
  1522. case LineLabel : if err = self.assembleLabel (p, &self.line.Label) ; err != nil { return err }
  1523. case LineInstr : if err = self.assembleInstr (p, &self.line.Instruction) ; err != nil { return err }
  1524. case LineCommand : if err = self.assembleCommand (p, &self.line.Command) ; err != nil { return err }
  1525. default : panic("parser yields an invalid line kind")
  1526. }
  1527. }
  1528. /* assemble the program */
  1529. self.buf = p.Assemble(self.pc)
  1530. return nil
  1531. }