obj0.go 29 KB


  1. // cmd/9l/noop.c, cmd/9l/pass.c, cmd/9l/span.c from Vita Nuova.
  2. //
  3. // Copyright © 1994-1999 Lucent Technologies Inc. All rights reserved.
  4. // Portions Copyright © 1995-1997 C H Forsyth (forsyth@terzarima.net)
  5. // Portions Copyright © 1997-1999 Vita Nuova Limited
  6. // Portions Copyright © 2000-2008 Vita Nuova Holdings Limited (www.vitanuova.com)
  7. // Portions Copyright © 2004,2006 Bruce Ellis
  8. // Portions Copyright © 2005-2007 C H Forsyth (forsyth@terzarima.net)
  9. // Revisions Copyright © 2000-2008 Lucent Technologies Inc. and others
  10. // Portions Copyright © 2009 The Go Authors. All rights reserved.
  11. //
  12. // Permission is hereby granted, free of charge, to any person obtaining a copy
  13. // of this software and associated documentation files (the "Software"), to deal
  14. // in the Software without restriction, including without limitation the rights
  15. // to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
  16. // copies of the Software, and to permit persons to whom the Software is
  17. // furnished to do so, subject to the following conditions:
  18. //
  19. // The above copyright notice and this permission notice shall be included in
  20. // all copies or substantial portions of the Software.
  21. //
  22. // THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
  23. // IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
  24. // FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
  25. // AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
  26. // LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
  27. // OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
  28. // THE SOFTWARE.
  29. package mips
  30. import (
  31. "github.com/twitchyliquid64/golang-asm/obj"
  32. "github.com/twitchyliquid64/golang-asm/objabi"
  33. "github.com/twitchyliquid64/golang-asm/sys"
  34. "encoding/binary"
  35. "fmt"
  36. "math"
  37. )
  38. func progedit(ctxt *obj.Link, p *obj.Prog, newprog obj.ProgAlloc) {
  39. c := ctxt0{ctxt: ctxt, newprog: newprog}
  40. p.From.Class = 0
  41. p.To.Class = 0
  42. // Rewrite JMP/JAL to symbol as TYPE_BRANCH.
  43. switch p.As {
  44. case AJMP,
  45. AJAL,
  46. ARET,
  47. obj.ADUFFZERO,
  48. obj.ADUFFCOPY:
  49. if p.To.Sym != nil {
  50. p.To.Type = obj.TYPE_BRANCH
  51. }
  52. }
  53. // Rewrite float constants to values stored in memory.
  54. switch p.As {
  55. case AMOVF:
  56. if p.From.Type == obj.TYPE_FCONST {
  57. f32 := float32(p.From.Val.(float64))
  58. if math.Float32bits(f32) == 0 {
  59. p.As = AMOVW
  60. p.From.Type = obj.TYPE_REG
  61. p.From.Reg = REGZERO
  62. break
  63. }
  64. p.From.Type = obj.TYPE_MEM
  65. p.From.Sym = ctxt.Float32Sym(f32)
  66. p.From.Name = obj.NAME_EXTERN
  67. p.From.Offset = 0
  68. }
  69. case AMOVD:
  70. if p.From.Type == obj.TYPE_FCONST {
  71. f64 := p.From.Val.(float64)
  72. if math.Float64bits(f64) == 0 && c.ctxt.Arch.Family == sys.MIPS64 {
  73. p.As = AMOVV
  74. p.From.Type = obj.TYPE_REG
  75. p.From.Reg = REGZERO
  76. break
  77. }
  78. p.From.Type = obj.TYPE_MEM
  79. p.From.Sym = ctxt.Float64Sym(f64)
  80. p.From.Name = obj.NAME_EXTERN
  81. p.From.Offset = 0
  82. }
  83. // Put >32-bit constants in memory and load them
  84. case AMOVV:
  85. if p.From.Type == obj.TYPE_CONST && p.From.Name == obj.NAME_NONE && p.From.Reg == 0 && int64(int32(p.From.Offset)) != p.From.Offset {
  86. p.From.Type = obj.TYPE_MEM
  87. p.From.Sym = ctxt.Int64Sym(p.From.Offset)
  88. p.From.Name = obj.NAME_EXTERN
  89. p.From.Offset = 0
  90. }
  91. }
  92. // Rewrite SUB constants into ADD.
  93. switch p.As {
  94. case ASUB:
  95. if p.From.Type == obj.TYPE_CONST {
  96. p.From.Offset = -p.From.Offset
  97. p.As = AADD
  98. }
  99. case ASUBU:
  100. if p.From.Type == obj.TYPE_CONST {
  101. p.From.Offset = -p.From.Offset
  102. p.As = AADDU
  103. }
  104. case ASUBV:
  105. if p.From.Type == obj.TYPE_CONST {
  106. p.From.Offset = -p.From.Offset
  107. p.As = AADDV
  108. }
  109. case ASUBVU:
  110. if p.From.Type == obj.TYPE_CONST {
  111. p.From.Offset = -p.From.Offset
  112. p.As = AADDVU
  113. }
  114. }
  115. }
  116. func preprocess(ctxt *obj.Link, cursym *obj.LSym, newprog obj.ProgAlloc) {
  117. // TODO(minux): add morestack short-cuts with small fixed frame-size.
  118. c := ctxt0{ctxt: ctxt, newprog: newprog, cursym: cursym}
  119. // a switch for enabling/disabling instruction scheduling
  120. nosched := true
  121. if c.cursym.Func.Text == nil || c.cursym.Func.Text.Link == nil {
  122. return
  123. }
  124. p := c.cursym.Func.Text
  125. textstksiz := p.To.Offset
  126. if textstksiz == -ctxt.FixedFrameSize() {
  127. // Historical way to mark NOFRAME.
  128. p.From.Sym.Set(obj.AttrNoFrame, true)
  129. textstksiz = 0
  130. }
  131. if textstksiz < 0 {
  132. c.ctxt.Diag("negative frame size %d - did you mean NOFRAME?", textstksiz)
  133. }
  134. if p.From.Sym.NoFrame() {
  135. if textstksiz != 0 {
  136. c.ctxt.Diag("NOFRAME functions must have a frame size of 0, not %d", textstksiz)
  137. }
  138. }
  139. c.cursym.Func.Args = p.To.Val.(int32)
  140. c.cursym.Func.Locals = int32(textstksiz)
  141. /*
  142. * find leaf subroutines
  143. * expand RET
  144. * expand BECOME pseudo
  145. */
  146. for p := c.cursym.Func.Text; p != nil; p = p.Link {
  147. switch p.As {
  148. /* too hard, just leave alone */
  149. case obj.ATEXT:
  150. p.Mark |= LABEL | LEAF | SYNC
  151. if p.Link != nil {
  152. p.Link.Mark |= LABEL
  153. }
  154. /* too hard, just leave alone */
  155. case AMOVW,
  156. AMOVV:
  157. if p.To.Type == obj.TYPE_REG && p.To.Reg >= REG_SPECIAL {
  158. p.Mark |= LABEL | SYNC
  159. break
  160. }
  161. if p.From.Type == obj.TYPE_REG && p.From.Reg >= REG_SPECIAL {
  162. p.Mark |= LABEL | SYNC
  163. }
  164. /* too hard, just leave alone */
  165. case ASYSCALL,
  166. AWORD,
  167. ATLBWR,
  168. ATLBWI,
  169. ATLBP,
  170. ATLBR:
  171. p.Mark |= LABEL | SYNC
  172. case ANOR:
  173. if p.To.Type == obj.TYPE_REG {
  174. if p.To.Reg == REGZERO {
  175. p.Mark |= LABEL | SYNC
  176. }
  177. }
  178. case ABGEZAL,
  179. ABLTZAL,
  180. AJAL,
  181. obj.ADUFFZERO,
  182. obj.ADUFFCOPY:
  183. c.cursym.Func.Text.Mark &^= LEAF
  184. fallthrough
  185. case AJMP,
  186. ABEQ,
  187. ABGEZ,
  188. ABGTZ,
  189. ABLEZ,
  190. ABLTZ,
  191. ABNE,
  192. ABFPT, ABFPF:
  193. if p.As == ABFPT || p.As == ABFPF {
  194. // We don't treat ABFPT and ABFPF as branches here,
  195. // so that we will always fill nop (0x0) in their
  196. // delay slot during assembly.
  197. // This is to workaround a kernel FPU emulator bug
  198. // where it uses the user stack to simulate the
  199. // instruction in the delay slot if it's not 0x0,
  200. // and somehow that leads to SIGSEGV when the kernel
  201. // jump to the stack.
  202. p.Mark |= SYNC
  203. } else {
  204. p.Mark |= BRANCH
  205. }
  206. q1 := p.To.Target()
  207. if q1 != nil {
  208. for q1.As == obj.ANOP {
  209. q1 = q1.Link
  210. p.To.SetTarget(q1)
  211. }
  212. if q1.Mark&LEAF == 0 {
  213. q1.Mark |= LABEL
  214. }
  215. }
  216. //else {
  217. // p.Mark |= LABEL
  218. //}
  219. q1 = p.Link
  220. if q1 != nil {
  221. q1.Mark |= LABEL
  222. }
  223. case ARET:
  224. if p.Link != nil {
  225. p.Link.Mark |= LABEL
  226. }
  227. }
  228. }
  229. var mov, add obj.As
  230. if c.ctxt.Arch.Family == sys.MIPS64 {
  231. add = AADDV
  232. mov = AMOVV
  233. } else {
  234. add = AADDU
  235. mov = AMOVW
  236. }
  237. var q *obj.Prog
  238. var q1 *obj.Prog
  239. autosize := int32(0)
  240. var p1 *obj.Prog
  241. var p2 *obj.Prog
  242. for p := c.cursym.Func.Text; p != nil; p = p.Link {
  243. o := p.As
  244. switch o {
  245. case obj.ATEXT:
  246. autosize = int32(textstksiz)
  247. if p.Mark&LEAF != 0 && autosize == 0 {
  248. // A leaf function with no locals has no frame.
  249. p.From.Sym.Set(obj.AttrNoFrame, true)
  250. }
  251. if !p.From.Sym.NoFrame() {
  252. // If there is a stack frame at all, it includes
  253. // space to save the LR.
  254. autosize += int32(c.ctxt.FixedFrameSize())
  255. }
  256. if autosize&4 != 0 && c.ctxt.Arch.Family == sys.MIPS64 {
  257. autosize += 4
  258. }
  259. if autosize == 0 && c.cursym.Func.Text.Mark&LEAF == 0 {
  260. if c.cursym.Func.Text.From.Sym.NoSplit() {
  261. if ctxt.Debugvlog {
  262. ctxt.Logf("save suppressed in: %s\n", c.cursym.Name)
  263. }
  264. c.cursym.Func.Text.Mark |= LEAF
  265. }
  266. }
  267. p.To.Offset = int64(autosize) - ctxt.FixedFrameSize()
  268. if c.cursym.Func.Text.Mark&LEAF != 0 {
  269. c.cursym.Set(obj.AttrLeaf, true)
  270. if p.From.Sym.NoFrame() {
  271. break
  272. }
  273. }
  274. if !p.From.Sym.NoSplit() {
  275. p = c.stacksplit(p, autosize) // emit split check
  276. }
  277. q = p
  278. if autosize != 0 {
  279. // Make sure to save link register for non-empty frame, even if
  280. // it is a leaf function, so that traceback works.
  281. // Store link register before decrement SP, so if a signal comes
  282. // during the execution of the function prologue, the traceback
  283. // code will not see a half-updated stack frame.
  284. // This sequence is not async preemptible, as if we open a frame
  285. // at the current SP, it will clobber the saved LR.
  286. q = c.ctxt.StartUnsafePoint(q, c.newprog)
  287. q = obj.Appendp(q, newprog)
  288. q.As = mov
  289. q.Pos = p.Pos
  290. q.From.Type = obj.TYPE_REG
  291. q.From.Reg = REGLINK
  292. q.To.Type = obj.TYPE_MEM
  293. q.To.Offset = int64(-autosize)
  294. q.To.Reg = REGSP
  295. q = obj.Appendp(q, newprog)
  296. q.As = add
  297. q.Pos = p.Pos
  298. q.From.Type = obj.TYPE_CONST
  299. q.From.Offset = int64(-autosize)
  300. q.To.Type = obj.TYPE_REG
  301. q.To.Reg = REGSP
  302. q.Spadj = +autosize
  303. q = c.ctxt.EndUnsafePoint(q, c.newprog, -1)
  304. }
  305. if c.cursym.Func.Text.From.Sym.Wrapper() && c.cursym.Func.Text.Mark&LEAF == 0 {
  306. // if(g->panic != nil && g->panic->argp == FP) g->panic->argp = bottom-of-frame
  307. //
  308. // MOV g_panic(g), R1
  309. // BEQ R1, end
  310. // MOV panic_argp(R1), R2
  311. // ADD $(autosize+FIXED_FRAME), R29, R3
  312. // BNE R2, R3, end
  313. // ADD $FIXED_FRAME, R29, R2
  314. // MOV R2, panic_argp(R1)
  315. // end:
  316. // NOP
  317. //
  318. // The NOP is needed to give the jumps somewhere to land.
  319. // It is a liblink NOP, not an mips NOP: it encodes to 0 instruction bytes.
  320. //
  321. // We don't generate this for leafs because that means the wrapped
  322. // function was inlined into the wrapper.
  323. q = obj.Appendp(q, newprog)
  324. q.As = mov
  325. q.From.Type = obj.TYPE_MEM
  326. q.From.Reg = REGG
  327. q.From.Offset = 4 * int64(c.ctxt.Arch.PtrSize) // G.panic
  328. q.To.Type = obj.TYPE_REG
  329. q.To.Reg = REG_R1
  330. q = obj.Appendp(q, newprog)
  331. q.As = ABEQ
  332. q.From.Type = obj.TYPE_REG
  333. q.From.Reg = REG_R1
  334. q.To.Type = obj.TYPE_BRANCH
  335. q.Mark |= BRANCH
  336. p1 = q
  337. q = obj.Appendp(q, newprog)
  338. q.As = mov
  339. q.From.Type = obj.TYPE_MEM
  340. q.From.Reg = REG_R1
  341. q.From.Offset = 0 // Panic.argp
  342. q.To.Type = obj.TYPE_REG
  343. q.To.Reg = REG_R2
  344. q = obj.Appendp(q, newprog)
  345. q.As = add
  346. q.From.Type = obj.TYPE_CONST
  347. q.From.Offset = int64(autosize) + ctxt.FixedFrameSize()
  348. q.Reg = REGSP
  349. q.To.Type = obj.TYPE_REG
  350. q.To.Reg = REG_R3
  351. q = obj.Appendp(q, newprog)
  352. q.As = ABNE
  353. q.From.Type = obj.TYPE_REG
  354. q.From.Reg = REG_R2
  355. q.Reg = REG_R3
  356. q.To.Type = obj.TYPE_BRANCH
  357. q.Mark |= BRANCH
  358. p2 = q
  359. q = obj.Appendp(q, newprog)
  360. q.As = add
  361. q.From.Type = obj.TYPE_CONST
  362. q.From.Offset = ctxt.FixedFrameSize()
  363. q.Reg = REGSP
  364. q.To.Type = obj.TYPE_REG
  365. q.To.Reg = REG_R2
  366. q = obj.Appendp(q, newprog)
  367. q.As = mov
  368. q.From.Type = obj.TYPE_REG
  369. q.From.Reg = REG_R2
  370. q.To.Type = obj.TYPE_MEM
  371. q.To.Reg = REG_R1
  372. q.To.Offset = 0 // Panic.argp
  373. q = obj.Appendp(q, newprog)
  374. q.As = obj.ANOP
  375. p1.To.SetTarget(q)
  376. p2.To.SetTarget(q)
  377. }
  378. case ARET:
  379. if p.From.Type == obj.TYPE_CONST {
  380. ctxt.Diag("using BECOME (%v) is not supported!", p)
  381. break
  382. }
  383. retSym := p.To.Sym
  384. p.To.Name = obj.NAME_NONE // clear fields as we may modify p to other instruction
  385. p.To.Sym = nil
  386. if c.cursym.Func.Text.Mark&LEAF != 0 {
  387. if autosize == 0 {
  388. p.As = AJMP
  389. p.From = obj.Addr{}
  390. if retSym != nil { // retjmp
  391. p.To.Type = obj.TYPE_BRANCH
  392. p.To.Name = obj.NAME_EXTERN
  393. p.To.Sym = retSym
  394. } else {
  395. p.To.Type = obj.TYPE_MEM
  396. p.To.Reg = REGLINK
  397. p.To.Offset = 0
  398. }
  399. p.Mark |= BRANCH
  400. break
  401. }
  402. p.As = add
  403. p.From.Type = obj.TYPE_CONST
  404. p.From.Offset = int64(autosize)
  405. p.To.Type = obj.TYPE_REG
  406. p.To.Reg = REGSP
  407. p.Spadj = -autosize
  408. q = c.newprog()
  409. q.As = AJMP
  410. q.Pos = p.Pos
  411. q.To.Type = obj.TYPE_MEM
  412. q.To.Offset = 0
  413. q.To.Reg = REGLINK
  414. q.Mark |= BRANCH
  415. q.Spadj = +autosize
  416. q.Link = p.Link
  417. p.Link = q
  418. break
  419. }
  420. p.As = mov
  421. p.From.Type = obj.TYPE_MEM
  422. p.From.Offset = 0
  423. p.From.Reg = REGSP
  424. p.To.Type = obj.TYPE_REG
  425. p.To.Reg = REGLINK
  426. if autosize != 0 {
  427. q = c.newprog()
  428. q.As = add
  429. q.Pos = p.Pos
  430. q.From.Type = obj.TYPE_CONST
  431. q.From.Offset = int64(autosize)
  432. q.To.Type = obj.TYPE_REG
  433. q.To.Reg = REGSP
  434. q.Spadj = -autosize
  435. q.Link = p.Link
  436. p.Link = q
  437. }
  438. q1 = c.newprog()
  439. q1.As = AJMP
  440. q1.Pos = p.Pos
  441. if retSym != nil { // retjmp
  442. q1.To.Type = obj.TYPE_BRANCH
  443. q1.To.Name = obj.NAME_EXTERN
  444. q1.To.Sym = retSym
  445. } else {
  446. q1.To.Type = obj.TYPE_MEM
  447. q1.To.Offset = 0
  448. q1.To.Reg = REGLINK
  449. }
  450. q1.Mark |= BRANCH
  451. q1.Spadj = +autosize
  452. q1.Link = q.Link
  453. q.Link = q1
  454. case AADD,
  455. AADDU,
  456. AADDV,
  457. AADDVU:
  458. if p.To.Type == obj.TYPE_REG && p.To.Reg == REGSP && p.From.Type == obj.TYPE_CONST {
  459. p.Spadj = int32(-p.From.Offset)
  460. }
  461. case obj.AGETCALLERPC:
  462. if cursym.Leaf() {
  463. /* MOV LR, Rd */
  464. p.As = mov
  465. p.From.Type = obj.TYPE_REG
  466. p.From.Reg = REGLINK
  467. } else {
  468. /* MOV (RSP), Rd */
  469. p.As = mov
  470. p.From.Type = obj.TYPE_MEM
  471. p.From.Reg = REGSP
  472. }
  473. }
  474. }
  475. if c.ctxt.Arch.Family == sys.MIPS {
  476. // rewrite MOVD into two MOVF in 32-bit mode to avoid unaligned memory access
  477. for p = c.cursym.Func.Text; p != nil; p = p1 {
  478. p1 = p.Link
  479. if p.As != AMOVD {
  480. continue
  481. }
  482. if p.From.Type != obj.TYPE_MEM && p.To.Type != obj.TYPE_MEM {
  483. continue
  484. }
  485. p.As = AMOVF
  486. q = c.newprog()
  487. *q = *p
  488. q.Link = p.Link
  489. p.Link = q
  490. p1 = q.Link
  491. var addrOff int64
  492. if c.ctxt.Arch.ByteOrder == binary.BigEndian {
  493. addrOff = 4 // swap load/save order
  494. }
  495. if p.From.Type == obj.TYPE_MEM {
  496. reg := REG_F0 + (p.To.Reg-REG_F0)&^1
  497. p.To.Reg = reg
  498. q.To.Reg = reg + 1
  499. p.From.Offset += addrOff
  500. q.From.Offset += 4 - addrOff
  501. } else if p.To.Type == obj.TYPE_MEM {
  502. reg := REG_F0 + (p.From.Reg-REG_F0)&^1
  503. p.From.Reg = reg
  504. q.From.Reg = reg + 1
  505. p.To.Offset += addrOff
  506. q.To.Offset += 4 - addrOff
  507. }
  508. }
  509. }
  510. if nosched {
  511. // if we don't do instruction scheduling, simply add
  512. // NOP after each branch instruction.
  513. for p = c.cursym.Func.Text; p != nil; p = p.Link {
  514. if p.Mark&BRANCH != 0 {
  515. c.addnop(p)
  516. }
  517. }
  518. return
  519. }
  520. // instruction scheduling
  521. q = nil // p - 1
  522. q1 = c.cursym.Func.Text // top of block
  523. o := 0 // count of instructions
  524. for p = c.cursym.Func.Text; p != nil; p = p1 {
  525. p1 = p.Link
  526. o++
  527. if p.Mark&NOSCHED != 0 {
  528. if q1 != p {
  529. c.sched(q1, q)
  530. }
  531. for ; p != nil; p = p.Link {
  532. if p.Mark&NOSCHED == 0 {
  533. break
  534. }
  535. q = p
  536. }
  537. p1 = p
  538. q1 = p
  539. o = 0
  540. continue
  541. }
  542. if p.Mark&(LABEL|SYNC) != 0 {
  543. if q1 != p {
  544. c.sched(q1, q)
  545. }
  546. q1 = p
  547. o = 1
  548. }
  549. if p.Mark&(BRANCH|SYNC) != 0 {
  550. c.sched(q1, p)
  551. q1 = p1
  552. o = 0
  553. }
  554. if o >= NSCHED {
  555. c.sched(q1, p)
  556. q1 = p1
  557. o = 0
  558. }
  559. q = p
  560. }
  561. }
  562. func (c *ctxt0) stacksplit(p *obj.Prog, framesize int32) *obj.Prog {
  563. var mov, add, sub obj.As
  564. if c.ctxt.Arch.Family == sys.MIPS64 {
  565. add = AADDV
  566. mov = AMOVV
  567. sub = ASUBVU
  568. } else {
  569. add = AADDU
  570. mov = AMOVW
  571. sub = ASUBU
  572. }
  573. // MOV g_stackguard(g), R1
  574. p = obj.Appendp(p, c.newprog)
  575. p.As = mov
  576. p.From.Type = obj.TYPE_MEM
  577. p.From.Reg = REGG
  578. p.From.Offset = 2 * int64(c.ctxt.Arch.PtrSize) // G.stackguard0
  579. if c.cursym.CFunc() {
  580. p.From.Offset = 3 * int64(c.ctxt.Arch.PtrSize) // G.stackguard1
  581. }
  582. p.To.Type = obj.TYPE_REG
  583. p.To.Reg = REG_R1
  584. // Mark the stack bound check and morestack call async nonpreemptible.
  585. // If we get preempted here, when resumed the preemption request is
  586. // cleared, but we'll still call morestack, which will double the stack
  587. // unnecessarily. See issue #35470.
  588. p = c.ctxt.StartUnsafePoint(p, c.newprog)
  589. var q *obj.Prog
  590. if framesize <= objabi.StackSmall {
  591. // small stack: SP < stackguard
  592. // AGTU SP, stackguard, R1
  593. p = obj.Appendp(p, c.newprog)
  594. p.As = ASGTU
  595. p.From.Type = obj.TYPE_REG
  596. p.From.Reg = REGSP
  597. p.Reg = REG_R1
  598. p.To.Type = obj.TYPE_REG
  599. p.To.Reg = REG_R1
  600. } else if framesize <= objabi.StackBig {
  601. // large stack: SP-framesize < stackguard-StackSmall
  602. // ADD $-(framesize-StackSmall), SP, R2
  603. // SGTU R2, stackguard, R1
  604. p = obj.Appendp(p, c.newprog)
  605. p.As = add
  606. p.From.Type = obj.TYPE_CONST
  607. p.From.Offset = -(int64(framesize) - objabi.StackSmall)
  608. p.Reg = REGSP
  609. p.To.Type = obj.TYPE_REG
  610. p.To.Reg = REG_R2
  611. p = obj.Appendp(p, c.newprog)
  612. p.As = ASGTU
  613. p.From.Type = obj.TYPE_REG
  614. p.From.Reg = REG_R2
  615. p.Reg = REG_R1
  616. p.To.Type = obj.TYPE_REG
  617. p.To.Reg = REG_R1
  618. } else {
  619. // Such a large stack we need to protect against wraparound.
  620. // If SP is close to zero:
  621. // SP-stackguard+StackGuard <= framesize + (StackGuard-StackSmall)
  622. // The +StackGuard on both sides is required to keep the left side positive:
  623. // SP is allowed to be slightly below stackguard. See stack.h.
  624. //
  625. // Preemption sets stackguard to StackPreempt, a very large value.
  626. // That breaks the math above, so we have to check for that explicitly.
  627. // // stackguard is R1
  628. // MOV $StackPreempt, R2
  629. // BEQ R1, R2, label-of-call-to-morestack
  630. // ADD $StackGuard, SP, R2
  631. // SUB R1, R2
  632. // MOV $(framesize+(StackGuard-StackSmall)), R1
  633. // SGTU R2, R1, R1
  634. p = obj.Appendp(p, c.newprog)
  635. p.As = mov
  636. p.From.Type = obj.TYPE_CONST
  637. p.From.Offset = objabi.StackPreempt
  638. p.To.Type = obj.TYPE_REG
  639. p.To.Reg = REG_R2
  640. p = obj.Appendp(p, c.newprog)
  641. q = p
  642. p.As = ABEQ
  643. p.From.Type = obj.TYPE_REG
  644. p.From.Reg = REG_R1
  645. p.Reg = REG_R2
  646. p.To.Type = obj.TYPE_BRANCH
  647. p.Mark |= BRANCH
  648. p = obj.Appendp(p, c.newprog)
  649. p.As = add
  650. p.From.Type = obj.TYPE_CONST
  651. p.From.Offset = int64(objabi.StackGuard)
  652. p.Reg = REGSP
  653. p.To.Type = obj.TYPE_REG
  654. p.To.Reg = REG_R2
  655. p = obj.Appendp(p, c.newprog)
  656. p.As = sub
  657. p.From.Type = obj.TYPE_REG
  658. p.From.Reg = REG_R1
  659. p.To.Type = obj.TYPE_REG
  660. p.To.Reg = REG_R2
  661. p = obj.Appendp(p, c.newprog)
  662. p.As = mov
  663. p.From.Type = obj.TYPE_CONST
  664. p.From.Offset = int64(framesize) + int64(objabi.StackGuard) - objabi.StackSmall
  665. p.To.Type = obj.TYPE_REG
  666. p.To.Reg = REG_R1
  667. p = obj.Appendp(p, c.newprog)
  668. p.As = ASGTU
  669. p.From.Type = obj.TYPE_REG
  670. p.From.Reg = REG_R2
  671. p.Reg = REG_R1
  672. p.To.Type = obj.TYPE_REG
  673. p.To.Reg = REG_R1
  674. }
  675. // q1: BNE R1, done
  676. p = obj.Appendp(p, c.newprog)
  677. q1 := p
  678. p.As = ABNE
  679. p.From.Type = obj.TYPE_REG
  680. p.From.Reg = REG_R1
  681. p.To.Type = obj.TYPE_BRANCH
  682. p.Mark |= BRANCH
  683. // MOV LINK, R3
  684. p = obj.Appendp(p, c.newprog)
  685. p.As = mov
  686. p.From.Type = obj.TYPE_REG
  687. p.From.Reg = REGLINK
  688. p.To.Type = obj.TYPE_REG
  689. p.To.Reg = REG_R3
  690. if q != nil {
  691. q.To.SetTarget(p)
  692. p.Mark |= LABEL
  693. }
  694. p = c.ctxt.EmitEntryStackMap(c.cursym, p, c.newprog)
  695. // JAL runtime.morestack(SB)
  696. p = obj.Appendp(p, c.newprog)
  697. p.As = AJAL
  698. p.To.Type = obj.TYPE_BRANCH
  699. if c.cursym.CFunc() {
  700. p.To.Sym = c.ctxt.Lookup("runtime.morestackc")
  701. } else if !c.cursym.Func.Text.From.Sym.NeedCtxt() {
  702. p.To.Sym = c.ctxt.Lookup("runtime.morestack_noctxt")
  703. } else {
  704. p.To.Sym = c.ctxt.Lookup("runtime.morestack")
  705. }
  706. p.Mark |= BRANCH
  707. p = c.ctxt.EndUnsafePoint(p, c.newprog, -1)
  708. // JMP start
  709. p = obj.Appendp(p, c.newprog)
  710. p.As = AJMP
  711. p.To.Type = obj.TYPE_BRANCH
  712. p.To.SetTarget(c.cursym.Func.Text.Link)
  713. p.Mark |= BRANCH
  714. // placeholder for q1's jump target
  715. p = obj.Appendp(p, c.newprog)
  716. p.As = obj.ANOP // zero-width place holder
  717. q1.To.SetTarget(p)
  718. return p
  719. }
  720. func (c *ctxt0) addnop(p *obj.Prog) {
  721. q := c.newprog()
  722. q.As = ANOOP
  723. q.Pos = p.Pos
  724. q.Link = p.Link
  725. p.Link = q
  726. }
  727. const (
  728. E_HILO = 1 << 0
  729. E_FCR = 1 << 1
  730. E_MCR = 1 << 2
  731. E_MEM = 1 << 3
  732. E_MEMSP = 1 << 4 /* uses offset and size */
  733. E_MEMSB = 1 << 5 /* uses offset and size */
  734. ANYMEM = E_MEM | E_MEMSP | E_MEMSB
  735. //DELAY = LOAD|BRANCH|FCMP
  736. DELAY = BRANCH /* only schedule branch */
  737. )
  738. type Dep struct {
  739. ireg uint32
  740. freg uint32
  741. cc uint32
  742. }
  743. type Sch struct {
  744. p obj.Prog
  745. set Dep
  746. used Dep
  747. soffset int32
  748. size uint8
  749. nop uint8
  750. comp bool
  751. }
  752. func (c *ctxt0) sched(p0, pe *obj.Prog) {
  753. var sch [NSCHED]Sch
  754. /*
  755. * build side structure
  756. */
  757. s := sch[:]
  758. for p := p0; ; p = p.Link {
  759. s[0].p = *p
  760. c.markregused(&s[0])
  761. if p == pe {
  762. break
  763. }
  764. s = s[1:]
  765. }
  766. se := s
  767. for i := cap(sch) - cap(se); i >= 0; i-- {
  768. s = sch[i:]
  769. if s[0].p.Mark&DELAY == 0 {
  770. continue
  771. }
  772. if -cap(s) < -cap(se) {
  773. if !conflict(&s[0], &s[1]) {
  774. continue
  775. }
  776. }
  777. var t []Sch
  778. var j int
  779. for j = cap(sch) - cap(s) - 1; j >= 0; j-- {
  780. t = sch[j:]
  781. if t[0].comp {
  782. if s[0].p.Mark&BRANCH != 0 {
  783. continue
  784. }
  785. }
  786. if t[0].p.Mark&DELAY != 0 {
  787. if -cap(s) >= -cap(se) || conflict(&t[0], &s[1]) {
  788. continue
  789. }
  790. }
  791. for u := t[1:]; -cap(u) <= -cap(s); u = u[1:] {
  792. if c.depend(&u[0], &t[0]) {
  793. continue
  794. }
  795. }
  796. goto out2
  797. }
  798. if s[0].p.Mark&BRANCH != 0 {
  799. s[0].nop = 1
  800. }
  801. continue
  802. out2:
  803. // t[0] is the instruction being moved to fill the delay
  804. stmp := t[0]
  805. copy(t[:i-j], t[1:i-j+1])
  806. s[0] = stmp
  807. if t[i-j-1].p.Mark&BRANCH != 0 {
  808. // t[i-j] is being put into a branch delay slot
  809. // combine its Spadj with the branch instruction
  810. t[i-j-1].p.Spadj += t[i-j].p.Spadj
  811. t[i-j].p.Spadj = 0
  812. }
  813. i--
  814. }
  815. /*
  816. * put it all back
  817. */
  818. var p *obj.Prog
  819. var q *obj.Prog
  820. for s, p = sch[:], p0; -cap(s) <= -cap(se); s, p = s[1:], q {
  821. q = p.Link
  822. if q != s[0].p.Link {
  823. *p = s[0].p
  824. p.Link = q
  825. }
  826. for s[0].nop != 0 {
  827. s[0].nop--
  828. c.addnop(p)
  829. }
  830. }
  831. }
  832. func (c *ctxt0) markregused(s *Sch) {
  833. p := &s.p
  834. s.comp = c.compound(p)
  835. s.nop = 0
  836. if s.comp {
  837. s.set.ireg |= 1 << (REGTMP - REG_R0)
  838. s.used.ireg |= 1 << (REGTMP - REG_R0)
  839. }
  840. ar := 0 /* dest is really reference */
  841. ad := 0 /* source/dest is really address */
  842. ld := 0 /* opcode is load instruction */
  843. sz := 20 /* size of load/store for overlap computation */
  844. /*
  845. * flags based on opcode
  846. */
  847. switch p.As {
  848. case obj.ATEXT:
  849. c.autosize = int32(p.To.Offset + 8)
  850. ad = 1
  851. case AJAL:
  852. r := p.Reg
  853. if r == 0 {
  854. r = REGLINK
  855. }
  856. s.set.ireg |= 1 << uint(r-REG_R0)
  857. ar = 1
  858. ad = 1
  859. case ABGEZAL,
  860. ABLTZAL:
  861. s.set.ireg |= 1 << (REGLINK - REG_R0)
  862. fallthrough
  863. case ABEQ,
  864. ABGEZ,
  865. ABGTZ,
  866. ABLEZ,
  867. ABLTZ,
  868. ABNE:
  869. ar = 1
  870. ad = 1
  871. case ABFPT,
  872. ABFPF:
  873. ad = 1
  874. s.used.cc |= E_FCR
  875. case ACMPEQD,
  876. ACMPEQF,
  877. ACMPGED,
  878. ACMPGEF,
  879. ACMPGTD,
  880. ACMPGTF:
  881. ar = 1
  882. s.set.cc |= E_FCR
  883. p.Mark |= FCMP
  884. case AJMP:
  885. ar = 1
  886. ad = 1
  887. case AMOVB,
  888. AMOVBU:
  889. sz = 1
  890. ld = 1
  891. case AMOVH,
  892. AMOVHU:
  893. sz = 2
  894. ld = 1
  895. case AMOVF,
  896. AMOVW,
  897. AMOVWL,
  898. AMOVWR:
  899. sz = 4
  900. ld = 1
  901. case AMOVD,
  902. AMOVV,
  903. AMOVVL,
  904. AMOVVR:
  905. sz = 8
  906. ld = 1
  907. case ADIV,
  908. ADIVU,
  909. AMUL,
  910. AMULU,
  911. AREM,
  912. AREMU,
  913. ADIVV,
  914. ADIVVU,
  915. AMULV,
  916. AMULVU,
  917. AREMV,
  918. AREMVU:
  919. s.set.cc = E_HILO
  920. fallthrough
  921. case AADD,
  922. AADDU,
  923. AADDV,
  924. AADDVU,
  925. AAND,
  926. ANOR,
  927. AOR,
  928. ASGT,
  929. ASGTU,
  930. ASLL,
  931. ASRA,
  932. ASRL,
  933. ASLLV,
  934. ASRAV,
  935. ASRLV,
  936. ASUB,
  937. ASUBU,
  938. ASUBV,
  939. ASUBVU,
  940. AXOR,
  941. AADDD,
  942. AADDF,
  943. AADDW,
  944. ASUBD,
  945. ASUBF,
  946. ASUBW,
  947. AMULF,
  948. AMULD,
  949. AMULW,
  950. ADIVF,
  951. ADIVD,
  952. ADIVW:
  953. if p.Reg == 0 {
  954. if p.To.Type == obj.TYPE_REG {
  955. p.Reg = p.To.Reg
  956. }
  957. //if(p->reg == NREG)
  958. // print("botch %P\n", p);
  959. }
  960. }
  961. /*
  962. * flags based on 'to' field
  963. */
  964. cls := int(p.To.Class)
  965. if cls == 0 {
  966. cls = c.aclass(&p.To) + 1
  967. p.To.Class = int8(cls)
  968. }
  969. cls--
  970. switch cls {
  971. default:
  972. fmt.Printf("unknown class %d %v\n", cls, p)
  973. case C_ZCON,
  974. C_SCON,
  975. C_ADD0CON,
  976. C_AND0CON,
  977. C_ADDCON,
  978. C_ANDCON,
  979. C_UCON,
  980. C_LCON,
  981. C_NONE,
  982. C_SBRA,
  983. C_LBRA,
  984. C_ADDR,
  985. C_TEXTSIZE:
  986. break
  987. case C_HI,
  988. C_LO:
  989. s.set.cc |= E_HILO
  990. case C_FCREG:
  991. s.set.cc |= E_FCR
  992. case C_MREG:
  993. s.set.cc |= E_MCR
  994. case C_ZOREG,
  995. C_SOREG,
  996. C_LOREG:
  997. cls = int(p.To.Reg)
  998. s.used.ireg |= 1 << uint(cls-REG_R0)
  999. if ad != 0 {
  1000. break
  1001. }
  1002. s.size = uint8(sz)
  1003. s.soffset = c.regoff(&p.To)
  1004. m := uint32(ANYMEM)
  1005. if cls == REGSB {
  1006. m = E_MEMSB
  1007. }
  1008. if cls == REGSP {
  1009. m = E_MEMSP
  1010. }
  1011. if ar != 0 {
  1012. s.used.cc |= m
  1013. } else {
  1014. s.set.cc |= m
  1015. }
  1016. case C_SACON,
  1017. C_LACON:
  1018. s.used.ireg |= 1 << (REGSP - REG_R0)
  1019. case C_SECON,
  1020. C_LECON:
  1021. s.used.ireg |= 1 << (REGSB - REG_R0)
  1022. case C_REG:
  1023. if ar != 0 {
  1024. s.used.ireg |= 1 << uint(p.To.Reg-REG_R0)
  1025. } else {
  1026. s.set.ireg |= 1 << uint(p.To.Reg-REG_R0)
  1027. }
  1028. case C_FREG:
  1029. if ar != 0 {
  1030. s.used.freg |= 1 << uint(p.To.Reg-REG_F0)
  1031. } else {
  1032. s.set.freg |= 1 << uint(p.To.Reg-REG_F0)
  1033. }
  1034. if ld != 0 && p.From.Type == obj.TYPE_REG {
  1035. p.Mark |= LOAD
  1036. }
  1037. case C_SAUTO,
  1038. C_LAUTO:
  1039. s.used.ireg |= 1 << (REGSP - REG_R0)
  1040. if ad != 0 {
  1041. break
  1042. }
  1043. s.size = uint8(sz)
  1044. s.soffset = c.regoff(&p.To)
  1045. if ar != 0 {
  1046. s.used.cc |= E_MEMSP
  1047. } else {
  1048. s.set.cc |= E_MEMSP
  1049. }
  1050. case C_SEXT,
  1051. C_LEXT:
  1052. s.used.ireg |= 1 << (REGSB - REG_R0)
  1053. if ad != 0 {
  1054. break
  1055. }
  1056. s.size = uint8(sz)
  1057. s.soffset = c.regoff(&p.To)
  1058. if ar != 0 {
  1059. s.used.cc |= E_MEMSB
  1060. } else {
  1061. s.set.cc |= E_MEMSB
  1062. }
  1063. }
  1064. /*
  1065. * flags based on 'from' field
  1066. */
  1067. cls = int(p.From.Class)
  1068. if cls == 0 {
  1069. cls = c.aclass(&p.From) + 1
  1070. p.From.Class = int8(cls)
  1071. }
  1072. cls--
  1073. switch cls {
  1074. default:
  1075. fmt.Printf("unknown class %d %v\n", cls, p)
  1076. case C_ZCON,
  1077. C_SCON,
  1078. C_ADD0CON,
  1079. C_AND0CON,
  1080. C_ADDCON,
  1081. C_ANDCON,
  1082. C_UCON,
  1083. C_LCON,
  1084. C_NONE,
  1085. C_SBRA,
  1086. C_LBRA,
  1087. C_ADDR,
  1088. C_TEXTSIZE:
  1089. break
  1090. case C_HI,
  1091. C_LO:
  1092. s.used.cc |= E_HILO
  1093. case C_FCREG:
  1094. s.used.cc |= E_FCR
  1095. case C_MREG:
  1096. s.used.cc |= E_MCR
  1097. case C_ZOREG,
  1098. C_SOREG,
  1099. C_LOREG:
  1100. cls = int(p.From.Reg)
  1101. s.used.ireg |= 1 << uint(cls-REG_R0)
  1102. if ld != 0 {
  1103. p.Mark |= LOAD
  1104. }
  1105. s.size = uint8(sz)
  1106. s.soffset = c.regoff(&p.From)
  1107. m := uint32(ANYMEM)
  1108. if cls == REGSB {
  1109. m = E_MEMSB
  1110. }
  1111. if cls == REGSP {
  1112. m = E_MEMSP
  1113. }
  1114. s.used.cc |= m
  1115. case C_SACON,
  1116. C_LACON:
  1117. cls = int(p.From.Reg)
  1118. if cls == 0 {
  1119. cls = REGSP
  1120. }
  1121. s.used.ireg |= 1 << uint(cls-REG_R0)
  1122. case C_SECON,
  1123. C_LECON:
  1124. s.used.ireg |= 1 << (REGSB - REG_R0)
  1125. case C_REG:
  1126. s.used.ireg |= 1 << uint(p.From.Reg-REG_R0)
  1127. case C_FREG:
  1128. s.used.freg |= 1 << uint(p.From.Reg-REG_F0)
  1129. if ld != 0 && p.To.Type == obj.TYPE_REG {
  1130. p.Mark |= LOAD
  1131. }
  1132. case C_SAUTO,
  1133. C_LAUTO:
  1134. s.used.ireg |= 1 << (REGSP - REG_R0)
  1135. if ld != 0 {
  1136. p.Mark |= LOAD
  1137. }
  1138. if ad != 0 {
  1139. break
  1140. }
  1141. s.size = uint8(sz)
  1142. s.soffset = c.regoff(&p.From)
  1143. s.used.cc |= E_MEMSP
  1144. case C_SEXT:
  1145. case C_LEXT:
  1146. s.used.ireg |= 1 << (REGSB - REG_R0)
  1147. if ld != 0 {
  1148. p.Mark |= LOAD
  1149. }
  1150. if ad != 0 {
  1151. break
  1152. }
  1153. s.size = uint8(sz)
  1154. s.soffset = c.regoff(&p.From)
  1155. s.used.cc |= E_MEMSB
  1156. }
  1157. cls = int(p.Reg)
  1158. if cls != 0 {
  1159. if REG_F0 <= cls && cls <= REG_F31 {
  1160. s.used.freg |= 1 << uint(cls-REG_F0)
  1161. } else {
  1162. s.used.ireg |= 1 << uint(cls-REG_R0)
  1163. }
  1164. }
  1165. s.set.ireg &^= (1 << (REGZERO - REG_R0)) /* R0 can't be set */
  1166. }
  1167. /*
  1168. * test to see if two instructions can be
  1169. * interchanged without changing semantics
  1170. */
  1171. func (c *ctxt0) depend(sa, sb *Sch) bool {
  1172. if sa.set.ireg&(sb.set.ireg|sb.used.ireg) != 0 {
  1173. return true
  1174. }
  1175. if sb.set.ireg&sa.used.ireg != 0 {
  1176. return true
  1177. }
  1178. if sa.set.freg&(sb.set.freg|sb.used.freg) != 0 {
  1179. return true
  1180. }
  1181. if sb.set.freg&sa.used.freg != 0 {
  1182. return true
  1183. }
  1184. /*
  1185. * special case.
  1186. * loads from same address cannot pass.
  1187. * this is for hardware fifo's and the like
  1188. */
  1189. if sa.used.cc&sb.used.cc&E_MEM != 0 {
  1190. if sa.p.Reg == sb.p.Reg {
  1191. if c.regoff(&sa.p.From) == c.regoff(&sb.p.From) {
  1192. return true
  1193. }
  1194. }
  1195. }
  1196. x := (sa.set.cc & (sb.set.cc | sb.used.cc)) | (sb.set.cc & sa.used.cc)
  1197. if x != 0 {
  1198. /*
  1199. * allow SB and SP to pass each other.
  1200. * allow SB to pass SB iff doffsets are ok
  1201. * anything else conflicts
  1202. */
  1203. if x != E_MEMSP && x != E_MEMSB {
  1204. return true
  1205. }
  1206. x = sa.set.cc | sb.set.cc | sa.used.cc | sb.used.cc
  1207. if x&E_MEM != 0 {
  1208. return true
  1209. }
  1210. if offoverlap(sa, sb) {
  1211. return true
  1212. }
  1213. }
  1214. return false
  1215. }
  1216. func offoverlap(sa, sb *Sch) bool {
  1217. if sa.soffset < sb.soffset {
  1218. if sa.soffset+int32(sa.size) > sb.soffset {
  1219. return true
  1220. }
  1221. return false
  1222. }
  1223. if sb.soffset+int32(sb.size) > sa.soffset {
  1224. return true
  1225. }
  1226. return false
  1227. }
  1228. /*
  1229. * test 2 adjacent instructions
  1230. * and find out if inserted instructions
  1231. * are desired to prevent stalls.
  1232. */
  1233. func conflict(sa, sb *Sch) bool {
  1234. if sa.set.ireg&sb.used.ireg != 0 {
  1235. return true
  1236. }
  1237. if sa.set.freg&sb.used.freg != 0 {
  1238. return true
  1239. }
  1240. if sa.set.cc&sb.used.cc != 0 {
  1241. return true
  1242. }
  1243. return false
  1244. }
  1245. func (c *ctxt0) compound(p *obj.Prog) bool {
  1246. o := c.oplook(p)
  1247. if o.size != 4 {
  1248. return true
  1249. }
  1250. if p.To.Type == obj.TYPE_REG && p.To.Reg == REGSB {
  1251. return true
  1252. }
  1253. return false
  1254. }
  1255. var Linkmips64 = obj.LinkArch{
  1256. Arch: sys.ArchMIPS64,
  1257. Init: buildop,
  1258. Preprocess: preprocess,
  1259. Assemble: span0,
  1260. Progedit: progedit,
  1261. DWARFRegisters: MIPSDWARFRegisters,
  1262. }
  1263. var Linkmips64le = obj.LinkArch{
  1264. Arch: sys.ArchMIPS64LE,
  1265. Init: buildop,
  1266. Preprocess: preprocess,
  1267. Assemble: span0,
  1268. Progedit: progedit,
  1269. DWARFRegisters: MIPSDWARFRegisters,
  1270. }
  1271. var Linkmips = obj.LinkArch{
  1272. Arch: sys.ArchMIPS,
  1273. Init: buildop,
  1274. Preprocess: preprocess,
  1275. Assemble: span0,
  1276. Progedit: progedit,
  1277. DWARFRegisters: MIPSDWARFRegisters,
  1278. }
  1279. var Linkmipsle = obj.LinkArch{
  1280. Arch: sys.ArchMIPSLE,
  1281. Init: buildop,
  1282. Preprocess: preprocess,
  1283. Assemble: span0,
  1284. Progedit: progedit,
  1285. DWARFRegisters: MIPSDWARFRegisters,
  1286. }