asm6.go 146 KB

1234567891011121314151617181920212223242526272829303132333435363738394041424344454647484950515253545556575859606162636465666768697071727374757677787980818283848586878889909192939495969798991001011021031041051061071081091101111121131141151161171181191201211221231241251261271281291301311321331341351361371381391401411421431441451461471481491501511521531541551561571581591601611621631641651661671681691701711721731741751761771781791801811821831841851861871881891901911921931941951961971981992002012022032042052062072082092102112122132142152162172182192202212222232242252262272282292302312322332342352362372382392402412422432442452462472482492502512522532542552562572582592602612622632642652662672682692702712722732742752762772782792802812822832842852862872882892902912922932942952962972982993003013023033043053063073083093103113123133143153163173183193203213223233243253263273283293303313323333343353363373383393403413423433443453463473483493503513523533543553563573583593603613623633643653663673683693703713723733743753763773783793803813823833843853863873883893903913923933943953963973983994004014024034044054064074084094104114124134144154164174184194204214224234244254264274284294304314324334344354364374384394404414424434444454464474484494504514524534544554564574584594604614624634644654664674684694704714724734744754764774784794804814824834844854864874884894904914924934944954964974984995005015025035045055065075085095105115125135145155165175185195205215225235245255265275285295305315325335345355365375385395405415425435445455465475485495505515525535545555565575585595605615625635645655665675685695705715725735745755765775785795805815825835845855865875885895905915925935945955965975985996006016026036046056066076086096106116126136146156166176186196206216226236246256266276286296306316326336346356366376386396406416426436446456466476486496506516526536546556566576586596606616626636646656666676686696706716726736746756766776786796806816826836846856866876886896906916926936946956966976986997007017027037047057067077087097107117127137147157167177187197207217227237247257267277287297307317327337347357367377387397407417427437447457467477487497507517527537547557567577587597607617627637647657667677687697707717727737747757767777787797807817827837847857867877887897907917927937947957967977987998008018028038048058068078088098108118128138148158168178188198208218228238248258268278288298308318328338348358368378388398408418428438448458468478488498508518528538548558568578588598608618628638648658668678688698708718728738748758768778788798808818828838848858868878888898908918928938948958968978988999009019029039049059069079089099109119129139149159169179189199209219229239249259269279289299309319329339349359369379389399409419429439449459469479489499509519529539549559569579589599609619629639649659669679689699709719729739749759769779789799809819829839849859869879889899909919929939949959969979989991000100110021003100410051006100710081009101010111012101310141015101610171018101910201021102210231024102510261027102810291030103110321033103410351036103710381039104010411042104310441045104610471048104910501051105210531054105510561057105810591060106110621063106410651066106710681069107010711072107310741075107610771078107910801081108210831084108510861087108810891090109110921093109410951096109710981099110011011102110311041105110611071108110911101111111211131114111511161117111811191120112111221123112411251126112711281129113011311132113311341135113611371138113911401141114211431144114511461147114811491150115111521153115411551156115711581159116011611162116311641165116611671168116911701171117211731174117511761177117811791180118111821183118411851186118711881189119011911192119311941195119611971198119912001201120212031204120512061207120812091210121112121213121412151216121712181219122012211222122312241225122612271228122912301231123212331234123512361237123812391240124112421243124412451246124712481249125012511252125312541255125612571258125912601261126212631264126512661267126812691270127112721273127412751276127712781279128012811282128312841285128612871288128912901291129212931294129512961297129812991300130113021303130413051306130713081309131013111312131313141315131613171318131913201321132213231324132513261327132813291330133113321333133413351336133713381339134013411342134313441345134613471348134913501351135213531354135513561357135813591360136113621363136413651366136713681369137013711372137313741375137613771378137913801381138213831384138513861387138813891390139113921393139413951396139713981399140014011402140314041405140614071408140914101411141214131414141514161417141814191420142114221423142414251426142714281429143014311432143314341435143614371438143914401441144214431444144514461447144814491450145114521453145414551456145714581459146014611462146314641465146614671468146914701471147214731474147514761477147814791480148114821483148414851486148714881489149014911492149314941495149614971498149915001501150215031504150515061507150815091510151115121513151415151516151715181519152015211522152315241525152615271528152915301531153215331534153515361537153815391540154115421543154415451546154715481549155015511552155315541555155615571558155915601561156215631564156515661567156815691570157115721573157415751576157715781579158015811582158315841585158615871588158915901591159215931594159515961597159815991600160116021603160416051606160716081609161016111612161316141615161616171618161916201621162216231624162516261627162816291630163116321633163416351636163716381639164016411642164316441645164616471648164916501651165216531654165516561657165816591660166116621663166416651666166716681669167016711672167316741675167616771678167916801681168216831684168516861687168816891690169116921693169416951696169716981699170017011702170317041705170617071708170917101711171217131714171517161717171817191720172117221723172417251726172717281729173017311732173317341735173617371738173917401741174217431744174517461747174817491750175117521753175417551756175717581759176017611762176317641765176617671768176917701771177217731774177517761777177817791780178117821783178417851786178717881789179017911792179317941795179617971798179918001801180218031804180518061807180818091810181118121813181418151816181718181819182018211822182318241825182618271828182918301831183218331834183518361837183818391840184118421843184418451846184718481849185018511852185318541855185618571858185918601861186218631864186518661867186818691870187118721873187418751876187718781879188018811882188318841885188618871888188918901891189218931894189518961897189818991900190119021903190419051906190719081909191019111912191319141915191619171918191919201921192219231924192519261927192819291930193119321933193419351936193719381939194019411942194319441945194619471948194919501951195219531954195519561957195819591960196119621963196419651966196719681969197019711972197319741975197619771978197919801981198219831984198519861987198819891990199119921993199419951996199719981999200020012002200320042005200620072008200920102011201220132014201520162017201820192020202120222023202420252026202720282029203020312032203320342035203620372038203920402041204220432044204520462047204820492050205120522053205420552056205720582059206020612062206320642065206620672068206920702071207220732074207520762077207820792080208120822083208420852086208720882089209020912092209320942095209620972098209921002101210221032104210521062107210821092110211121122113211421152116211721182119212021212122212321242125212621272128212921302131213221332134213521362137213821392140214121422143214421452146214721482149215021512152215321542155215621572158215921602161216221632164216521662167216821692170217121722173217421752176217721782179218021812182218321842185218621872188218921902191219221932194219521962197219821992200220122022203220422052206220722082209221022112212221322142215221622172218221922202221222222232224222522262227222822292230223122322233223422352236223722382239224022412242224322442245224622472248224922502251225222532254225522562257225822592260226122622263226422652266226722682269227022712272227322742275227622772278227922802281228222832284228522862287228822892290229122922293229422952296229722982299230023012302230323042305230623072308230923102311231223132314231523162317231823192320232123222323232423252326232723282329233023312332233323342335233623372338233923402341234223432344234523462347234823492350235123522353235423552356235723582359236023612362236323642365236623672368236923702371237223732374237523762377237823792380238123822383238423852386238723882389239023912392239323942395239623972398239924002401240224032404240524062407240824092410241124122413241424152416241724182419242024212422242324242425242624272428242924302431243224332434243524362437243824392440244124422443244424452446244724482449245024512452245324542455245624572458245924602461246224632464246524662467246824692470247124722473247424752476247724782479248024812482248324842485248624872488248924902491249224932494249524962497249824992500250125022503250425052506250725082509251025112512251325142515251625172518251925202521252225232524252525262527252825292530253125322533253425352536253725382539254025412542254325442545254625472548254925502551255225532554255525562557255825592560256125622563256425652566256725682569257025712572257325742575257625772578257925802581258225832584258525862587258825892590259125922593259425952596259725982599260026012602260326042605260626072608260926102611261226132614261526162617261826192620262126222623262426252626262726282629263026312632263326342635263626372638263926402641264226432644264526462647264826492650265126522653265426552656265726582659266026612662266326642665266626672668266926702671267226732674267526762677267826792680268126822683268426852686268726882689269026912692269326942695269626972698269927002701270227032704270527062707270827092710271127122713271427152716271727182719272027212722272327242725272627272728272927302731273227332734273527362737273827392740274127422743274427452746274727482749275027512752275327542755275627572758275927602761276227632764276527662767276827692770277127722773277427752776277727782779278027812782278327842785278627872788278927902791279227932794279527962797279827992800280128022803280428052806280728082809281028112812281328142815281628172818281928202821282228232824282528262827282828292830283128322833283428352836283728382839284028412842284328442845284628472848284928502851285228532854285528562857285828592860286128622863286428652866286728682869287028712872287328742875287628772878287928802881288228832884288528862887288828892890289128922893289428952896289728982899290029012902290329042905290629072908290929102911291229132914291529162917291829192920292129222923292429252926292729282929293029312932293329342935293629372938293929402941294229432944294529462947294829492950295129522953295429552956295729582959296029612962296329642965296629672968296929702971297229732974297529762977297829792980298129822983298429852986298729882989299029912992299329942995299629972998299930003001300230033004300530063007300830093010301130123013301430153016301730183019302030213022302330243025302630273028302930303031303230333034303530363037303830393040304130423043304430453046304730483049305030513052305330543055305630573058305930603061306230633064306530663067306830693070307130723073307430753076307730783079308030813082308330843085308630873088308930903091309230933094309530963097309830993100310131023103310431053106310731083109311031113112311331143115311631173118311931203121312231233124312531263127312831293130313131323133313431353136313731383139314031413142314331443145314631473148314931503151315231533154315531563157315831593160316131623163316431653166316731683169317031713172317331743175317631773178317931803181318231833184318531863187318831893190319131923193319431953196319731983199320032013202320332043205320632073208320932103211321232133214321532163217321832193220322132223223322432253226322732283229323032313232323332343235323632373238323932403241324232433244324532463247324832493250325132523253325432553256325732583259326032613262326332643265326632673268326932703271327232733274327532763277327832793280328132823283328432853286328732883289329032913292329332943295329632973298329933003301330233033304330533063307330833093310331133123313331433153316331733183319332033213322332333243325332633273328332933303331333233333334333533363337333833393340334133423343334433453346334733483349335033513352335333543355335633573358335933603361336233633364336533663367336833693370337133723373337433753376337733783379338033813382338333843385338633873388338933903391339233933394339533963397339833993400340134023403340434053406340734083409341034113412341334143415341634173418341934203421342234233424342534263427342834293430343134323433343434353436343734383439344034413442344334443445344634473448344934503451345234533454345534563457345834593460346134623463346434653466346734683469347034713472347334743475347634773478347934803481348234833484348534863487348834893490349134923493349434953496349734983499350035013502350335043505350635073508350935103511351235133514351535163517351835193520352135223523352435253526352735283529353035313532353335343535353635373538353935403541354235433544354535463547354835493550355135523553355435553556355735583559356035613562356335643565356635673568356935703571357235733574357535763577357835793580358135823583358435853586358735883589359035913592359335943595359635973598359936003601360236033604360536063607360836093610361136123613361436153616361736183619362036213622362336243625362636273628362936303631363236333634363536363637363836393640364136423643364436453646364736483649365036513652365336543655365636573658365936603661366236633664366536663667366836693670367136723673367436753676367736783679368036813682368336843685368636873688368936903691369236933694369536963697369836993700370137023703370437053706370737083709371037113712371337143715371637173718371937203721372237233724372537263727372837293730373137323733373437353736373737383739374037413742374337443745374637473748374937503751375237533754375537563757375837593760376137623763376437653766376737683769377037713772377337743775377637773778377937803781378237833784378537863787378837893790379137923793379437953796379737983799380038013802380338043805380638073808380938103811381238133814381538163817381838193820382138223823382438253826382738283829383038313832383338343835383638373838383938403841384238433844384538463847384838493850385138523853385438553856385738583859386038613862386338643865386638673868386938703871387238733874387538763877387838793880388138823883388438853886388738883889389038913892389338943895389638973898389939003901390239033904390539063907390839093910391139123913391439153916391739183919392039213922392339243925392639273928392939303931393239333934393539363937393839393940394139423943394439453946394739483949395039513952395339543955395639573958395939603961396239633964396539663967396839693970397139723973397439753976397739783979398039813982398339843985398639873988398939903991399239933994399539963997399839994000400140024003400440054006400740084009401040114012401340144015401640174018401940204021402240234024402540264027402840294030403140324033403440354036403740384039404040414042404340444045404640474048404940504051405240534054405540564057405840594060406140624063406440654066406740684069407040714072407340744075407640774078407940804081408240834084408540864087408840894090409140924093409440954096409740984099410041014102410341044105410641074108410941104111411241134114411541164117411841194120412141224123412441254126412741284129413041314132413341344135413641374138413941404141414241434144414541464147414841494150415141524153415441554156415741584159416041614162416341644165416641674168416941704171417241734174417541764177417841794180418141824183418441854186418741884189419041914192419341944195419641974198419942004201420242034204420542064207420842094210421142124213421442154216421742184219422042214222422342244225422642274228422942304231423242334234423542364237423842394240424142424243424442454246424742484249425042514252425342544255425642574258425942604261426242634264426542664267426842694270427142724273427442754276427742784279428042814282428342844285428642874288428942904291429242934294429542964297429842994300430143024303430443054306430743084309431043114312431343144315431643174318431943204321432243234324432543264327432843294330433143324333433443354336433743384339434043414342434343444345434643474348434943504351435243534354435543564357435843594360436143624363436443654366436743684369437043714372437343744375437643774378437943804381438243834384438543864387438843894390439143924393439443954396439743984399440044014402440344044405440644074408440944104411441244134414441544164417441844194420442144224423442444254426442744284429443044314432443344344435443644374438443944404441444244434444444544464447444844494450445144524453445444554456445744584459446044614462446344644465446644674468446944704471447244734474447544764477447844794480448144824483448444854486448744884489449044914492449344944495449644974498449945004501450245034504450545064507450845094510451145124513451445154516451745184519452045214522452345244525452645274528452945304531453245334534453545364537453845394540454145424543454445454546454745484549455045514552455345544555455645574558455945604561456245634564456545664567456845694570457145724573457445754576457745784579458045814582458345844585458645874588458945904591459245934594459545964597459845994600460146024603460446054606460746084609461046114612461346144615461646174618461946204621462246234624462546264627462846294630463146324633463446354636463746384639464046414642464346444645464646474648464946504651465246534654465546564657465846594660466146624663466446654666466746684669467046714672467346744675467646774678467946804681468246834684468546864687468846894690469146924693469446954696469746984699470047014702470347044705470647074708470947104711471247134714471547164717471847194720472147224723472447254726472747284729473047314732473347344735473647374738473947404741474247434744474547464747474847494750475147524753475447554756475747584759476047614762476347644765476647674768476947704771477247734774477547764777477847794780478147824783478447854786478747884789479047914792479347944795479647974798479948004801480248034804480548064807480848094810481148124813481448154816481748184819482048214822482348244825482648274828482948304831483248334834483548364837483848394840484148424843484448454846484748484849485048514852485348544855485648574858485948604861486248634864486548664867486848694870487148724873487448754876487748784879488048814882488348844885488648874888488948904891489248934894489548964897489848994900490149024903490449054906490749084909491049114912491349144915491649174918491949204921492249234924492549264927492849294930493149324933493449354936493749384939494049414942494349444945494649474948494949504951495249534954495549564957495849594960496149624963496449654966496749684969497049714972497349744975497649774978497949804981498249834984498549864987498849894990499149924993499449954996499749984999500050015002500350045005500650075008500950105011501250135014501550165017501850195020502150225023502450255026502750285029503050315032503350345035503650375038503950405041504250435044504550465047504850495050505150525053505450555056505750585059506050615062506350645065506650675068506950705071507250735074507550765077507850795080508150825083508450855086508750885089509050915092509350945095509650975098509951005101510251035104510551065107510851095110511151125113511451155116511751185119512051215122512351245125512651275128512951305131513251335134513551365137513851395140514151425143514451455146514751485149515051515152515351545155515651575158515951605161516251635164516551665167516851695170517151725173517451755176517751785179518051815182518351845185518651875188518951905191519251935194519551965197519851995200520152025203520452055206520752085209521052115212521352145215521652175218521952205221522252235224522552265227522852295230523152325233523452355236523752385239524052415242524352445245524652475248524952505251525252535254525552565257525852595260526152625263526452655266526752685269527052715272527352745275527652775278527952805281528252835284528552865287528852895290529152925293529452955296529752985299530053015302530353045305530653075308530953105311531253135314531553165317531853195320532153225323532453255326532753285329533053315332533353345335533653375338533953405341534253435344534553465347534853495350535153525353535453555356535753585359536053615362536353645365536653675368536953705371537253735374537553765377537853795380538153825383538453855386538753885389539053915392539353945395539653975398539954005401540254035404540554065407540854095410541154125413541454155416541754185419542054215422542354245425542654275428542954305431543254335434543554365437543854395440544154425443544454455446
  1. // Inferno utils/6l/span.c
  2. // https://bitbucket.org/inferno-os/inferno-os/src/master/utils/6l/span.c
  3. //
  4. // Copyright © 1994-1999 Lucent Technologies Inc. All rights reserved.
  5. // Portions Copyright © 1995-1997 C H Forsyth (forsyth@terzarima.net)
  6. // Portions Copyright © 1997-1999 Vita Nuova Limited
  7. // Portions Copyright © 2000-2007 Vita Nuova Holdings Limited (www.vitanuova.com)
  8. // Portions Copyright © 2004,2006 Bruce Ellis
  9. // Portions Copyright © 2005-2007 C H Forsyth (forsyth@terzarima.net)
  10. // Revisions Copyright © 2000-2007 Lucent Technologies Inc. and others
  11. // Portions Copyright © 2009 The Go Authors. All rights reserved.
  12. //
  13. // Permission is hereby granted, free of charge, to any person obtaining a copy
  14. // of this software and associated documentation files (the "Software"), to deal
  15. // in the Software without restriction, including without limitation the rights
  16. // to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
  17. // copies of the Software, and to permit persons to whom the Software is
  18. // furnished to do so, subject to the following conditions:
  19. //
  20. // The above copyright notice and this permission notice shall be included in
  21. // all copies or substantial portions of the Software.
  22. //
  23. // THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
  24. // IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
  25. // FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
  26. // AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
  27. // LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
  28. // OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
  29. // THE SOFTWARE.
  30. package x86
  31. import (
  32. "github.com/twitchyliquid64/golang-asm/obj"
  33. "github.com/twitchyliquid64/golang-asm/objabi"
  34. "github.com/twitchyliquid64/golang-asm/sys"
  35. "encoding/binary"
  36. "fmt"
  37. "log"
  38. "strings"
  39. )
  40. var (
  41. plan9privates *obj.LSym
  42. deferreturn *obj.LSym
  43. )
  44. // Instruction layout.
  45. // Loop alignment constants:
  46. // want to align loop entry to loopAlign-byte boundary,
  47. // and willing to insert at most maxLoopPad bytes of NOP to do so.
  48. // We define a loop entry as the target of a backward jump.
  49. //
  50. // gcc uses maxLoopPad = 10 for its 'generic x86-64' config,
  51. // and it aligns all jump targets, not just backward jump targets.
  52. //
  53. // As of 6/1/2012, the effect of setting maxLoopPad = 10 here
  54. // is very slight but negative, so the alignment is disabled by
  55. // setting MaxLoopPad = 0. The code is here for reference and
  56. // for future experiments.
  57. //
  58. const (
  59. loopAlign = 16
  60. maxLoopPad = 0
  61. )
  62. // Bit flags that are used to express jump target properties.
  63. const (
  64. // branchBackwards marks targets that are located behind.
  65. // Used to express jumps to loop headers.
  66. branchBackwards = (1 << iota)
  67. // branchShort marks branches those target is close,
  68. // with offset is in -128..127 range.
  69. branchShort
  70. // branchLoopHead marks loop entry.
  71. // Used to insert padding for misaligned loops.
  72. branchLoopHead
  73. )
  74. // opBytes holds optab encoding bytes.
  75. // Each ytab reserves fixed amount of bytes in this array.
  76. //
  77. // The size should be the minimal number of bytes that
  78. // are enough to hold biggest optab op lines.
  79. type opBytes [31]uint8
  80. type Optab struct {
  81. as obj.As
  82. ytab []ytab
  83. prefix uint8
  84. op opBytes
  85. }
  86. type movtab struct {
  87. as obj.As
  88. ft uint8
  89. f3t uint8
  90. tt uint8
  91. code uint8
  92. op [4]uint8
  93. }
  94. const (
  95. Yxxx = iota
  96. Ynone
  97. Yi0 // $0
  98. Yi1 // $1
  99. Yu2 // $x, x fits in uint2
  100. Yi8 // $x, x fits in int8
  101. Yu8 // $x, x fits in uint8
  102. Yu7 // $x, x in 0..127 (fits in both int8 and uint8)
  103. Ys32
  104. Yi32
  105. Yi64
  106. Yiauto
  107. Yal
  108. Ycl
  109. Yax
  110. Ycx
  111. Yrb
  112. Yrl
  113. Yrl32 // Yrl on 32-bit system
  114. Yrf
  115. Yf0
  116. Yrx
  117. Ymb
  118. Yml
  119. Ym
  120. Ybr
  121. Ycs
  122. Yss
  123. Yds
  124. Yes
  125. Yfs
  126. Ygs
  127. Ygdtr
  128. Yidtr
  129. Yldtr
  130. Ymsw
  131. Ytask
  132. Ycr0
  133. Ycr1
  134. Ycr2
  135. Ycr3
  136. Ycr4
  137. Ycr5
  138. Ycr6
  139. Ycr7
  140. Ycr8
  141. Ydr0
  142. Ydr1
  143. Ydr2
  144. Ydr3
  145. Ydr4
  146. Ydr5
  147. Ydr6
  148. Ydr7
  149. Ytr0
  150. Ytr1
  151. Ytr2
  152. Ytr3
  153. Ytr4
  154. Ytr5
  155. Ytr6
  156. Ytr7
  157. Ymr
  158. Ymm
  159. Yxr0 // X0 only. "<XMM0>" notation in Intel manual.
  160. YxrEvexMulti4 // [ X<n> - X<n+3> ]; multisource YxrEvex
  161. Yxr // X0..X15
  162. YxrEvex // X0..X31
  163. Yxm
  164. YxmEvex // YxrEvex+Ym
  165. Yxvm // VSIB vector array; vm32x/vm64x
  166. YxvmEvex // Yxvm which permits High-16 X register as index.
  167. YyrEvexMulti4 // [ Y<n> - Y<n+3> ]; multisource YyrEvex
  168. Yyr // Y0..Y15
  169. YyrEvex // Y0..Y31
  170. Yym
  171. YymEvex // YyrEvex+Ym
  172. Yyvm // VSIB vector array; vm32y/vm64y
  173. YyvmEvex // Yyvm which permits High-16 Y register as index.
  174. YzrMulti4 // [ Z<n> - Z<n+3> ]; multisource YzrEvex
  175. Yzr // Z0..Z31
  176. Yzm // Yzr+Ym
  177. Yzvm // VSIB vector array; vm32z/vm64z
  178. Yk0 // K0
  179. Yknot0 // K1..K7; write mask
  180. Yk // K0..K7; used for KOP
  181. Ykm // Yk+Ym; used for KOP
  182. Ytls
  183. Ytextsize
  184. Yindir
  185. Ymax
  186. )
  187. const (
  188. Zxxx = iota
  189. Zlit
  190. Zlitm_r
  191. Zlitr_m
  192. Zlit_m_r
  193. Z_rp
  194. Zbr
  195. Zcall
  196. Zcallcon
  197. Zcallduff
  198. Zcallind
  199. Zcallindreg
  200. Zib_
  201. Zib_rp
  202. Zibo_m
  203. Zibo_m_xm
  204. Zil_
  205. Zil_rp
  206. Ziq_rp
  207. Zilo_m
  208. Zjmp
  209. Zjmpcon
  210. Zloop
  211. Zo_iw
  212. Zm_o
  213. Zm_r
  214. Z_m_r
  215. Zm2_r
  216. Zm_r_xm
  217. Zm_r_i_xm
  218. Zm_r_xm_nr
  219. Zr_m_xm_nr
  220. Zibm_r // mmx1,mmx2/mem64,imm8
  221. Zibr_m
  222. Zmb_r
  223. Zaut_r
  224. Zo_m
  225. Zo_m64
  226. Zpseudo
  227. Zr_m
  228. Zr_m_xm
  229. Zrp_
  230. Z_ib
  231. Z_il
  232. Zm_ibo
  233. Zm_ilo
  234. Zib_rr
  235. Zil_rr
  236. Zbyte
  237. Zvex_rm_v_r
  238. Zvex_rm_v_ro
  239. Zvex_r_v_rm
  240. Zvex_i_rm_vo
  241. Zvex_v_rm_r
  242. Zvex_i_rm_r
  243. Zvex_i_r_v
  244. Zvex_i_rm_v_r
  245. Zvex
  246. Zvex_rm_r_vo
  247. Zvex_i_r_rm
  248. Zvex_hr_rm_v_r
  249. Zevex_first
  250. Zevex_i_r_k_rm
  251. Zevex_i_r_rm
  252. Zevex_i_rm_k_r
  253. Zevex_i_rm_k_vo
  254. Zevex_i_rm_r
  255. Zevex_i_rm_v_k_r
  256. Zevex_i_rm_v_r
  257. Zevex_i_rm_vo
  258. Zevex_k_rmo
  259. Zevex_r_k_rm
  260. Zevex_r_v_k_rm
  261. Zevex_r_v_rm
  262. Zevex_rm_k_r
  263. Zevex_rm_v_k_r
  264. Zevex_rm_v_r
  265. Zevex_last
  266. Zmax
  267. )
  268. const (
  269. Px = 0
  270. Px1 = 1 // symbolic; exact value doesn't matter
  271. P32 = 0x32 // 32-bit only
  272. Pe = 0x66 // operand escape
  273. Pm = 0x0f // 2byte opcode escape
  274. Pq = 0xff // both escapes: 66 0f
  275. Pb = 0xfe // byte operands
  276. Pf2 = 0xf2 // xmm escape 1: f2 0f
  277. Pf3 = 0xf3 // xmm escape 2: f3 0f
  278. Pef3 = 0xf5 // xmm escape 2 with 16-bit prefix: 66 f3 0f
  279. Pq3 = 0x67 // xmm escape 3: 66 48 0f
  280. Pq4 = 0x68 // xmm escape 4: 66 0F 38
  281. Pq4w = 0x69 // Pq4 with Rex.w 66 0F 38
  282. Pq5 = 0x6a // xmm escape 5: F3 0F 38
  283. Pq5w = 0x6b // Pq5 with Rex.w F3 0F 38
  284. Pfw = 0xf4 // Pf3 with Rex.w: f3 48 0f
  285. Pw = 0x48 // Rex.w
  286. Pw8 = 0x90 // symbolic; exact value doesn't matter
  287. Py = 0x80 // defaults to 64-bit mode
  288. Py1 = 0x81 // symbolic; exact value doesn't matter
  289. Py3 = 0x83 // symbolic; exact value doesn't matter
  290. Pavx = 0x84 // symbolic: exact value doesn't matter
  291. RxrEvex = 1 << 4 // AVX512 extension to REX.R/VEX.R
  292. Rxw = 1 << 3 // =1, 64-bit operand size
  293. Rxr = 1 << 2 // extend modrm reg
  294. Rxx = 1 << 1 // extend sib index
  295. Rxb = 1 << 0 // extend modrm r/m, sib base, or opcode reg
  296. )
  297. const (
  298. // Encoding for VEX prefix in tables.
  299. // The P, L, and W fields are chosen to match
  300. // their eventual locations in the VEX prefix bytes.
  301. // Encoding for VEX prefix in tables.
  302. // The P, L, and W fields are chosen to match
  303. // their eventual locations in the VEX prefix bytes.
  304. // Using spare bit to make leading [E]VEX encoding byte different from
  305. // 0x0f even if all other VEX fields are 0.
  306. avxEscape = 1 << 6
  307. // P field - 2 bits
  308. vex66 = 1 << 0
  309. vexF3 = 2 << 0
  310. vexF2 = 3 << 0
  311. // L field - 1 bit
  312. vexLZ = 0 << 2
  313. vexLIG = 0 << 2
  314. vex128 = 0 << 2
  315. vex256 = 1 << 2
  316. // W field - 1 bit
  317. vexWIG = 0 << 7
  318. vexW0 = 0 << 7
  319. vexW1 = 1 << 7
  320. // M field - 5 bits, but mostly reserved; we can store up to 3
  321. vex0F = 1 << 3
  322. vex0F38 = 2 << 3
  323. vex0F3A = 3 << 3
  324. )
  325. var ycover [Ymax * Ymax]uint8
  326. var reg [MAXREG]int
  327. var regrex [MAXREG + 1]int
  328. var ynone = []ytab{
  329. {Zlit, 1, argList{}},
  330. }
  331. var ytext = []ytab{
  332. {Zpseudo, 0, argList{Ymb, Ytextsize}},
  333. {Zpseudo, 1, argList{Ymb, Yi32, Ytextsize}},
  334. }
  335. var ynop = []ytab{
  336. {Zpseudo, 0, argList{}},
  337. {Zpseudo, 0, argList{Yiauto}},
  338. {Zpseudo, 0, argList{Yml}},
  339. {Zpseudo, 0, argList{Yrf}},
  340. {Zpseudo, 0, argList{Yxr}},
  341. {Zpseudo, 0, argList{Yiauto}},
  342. {Zpseudo, 0, argList{Yml}},
  343. {Zpseudo, 0, argList{Yrf}},
  344. {Zpseudo, 1, argList{Yxr}},
  345. }
  346. var yfuncdata = []ytab{
  347. {Zpseudo, 0, argList{Yi32, Ym}},
  348. }
  349. var ypcdata = []ytab{
  350. {Zpseudo, 0, argList{Yi32, Yi32}},
  351. }
  352. var yxorb = []ytab{
  353. {Zib_, 1, argList{Yi32, Yal}},
  354. {Zibo_m, 2, argList{Yi32, Ymb}},
  355. {Zr_m, 1, argList{Yrb, Ymb}},
  356. {Zm_r, 1, argList{Ymb, Yrb}},
  357. }
  358. var yaddl = []ytab{
  359. {Zibo_m, 2, argList{Yi8, Yml}},
  360. {Zil_, 1, argList{Yi32, Yax}},
  361. {Zilo_m, 2, argList{Yi32, Yml}},
  362. {Zr_m, 1, argList{Yrl, Yml}},
  363. {Zm_r, 1, argList{Yml, Yrl}},
  364. }
  365. var yincl = []ytab{
  366. {Z_rp, 1, argList{Yrl}},
  367. {Zo_m, 2, argList{Yml}},
  368. }
  369. var yincq = []ytab{
  370. {Zo_m, 2, argList{Yml}},
  371. }
  372. var ycmpb = []ytab{
  373. {Z_ib, 1, argList{Yal, Yi32}},
  374. {Zm_ibo, 2, argList{Ymb, Yi32}},
  375. {Zm_r, 1, argList{Ymb, Yrb}},
  376. {Zr_m, 1, argList{Yrb, Ymb}},
  377. }
  378. var ycmpl = []ytab{
  379. {Zm_ibo, 2, argList{Yml, Yi8}},
  380. {Z_il, 1, argList{Yax, Yi32}},
  381. {Zm_ilo, 2, argList{Yml, Yi32}},
  382. {Zm_r, 1, argList{Yml, Yrl}},
  383. {Zr_m, 1, argList{Yrl, Yml}},
  384. }
  385. var yshb = []ytab{
  386. {Zo_m, 2, argList{Yi1, Ymb}},
  387. {Zibo_m, 2, argList{Yu8, Ymb}},
  388. {Zo_m, 2, argList{Ycx, Ymb}},
  389. }
  390. var yshl = []ytab{
  391. {Zo_m, 2, argList{Yi1, Yml}},
  392. {Zibo_m, 2, argList{Yu8, Yml}},
  393. {Zo_m, 2, argList{Ycl, Yml}},
  394. {Zo_m, 2, argList{Ycx, Yml}},
  395. }
  396. var ytestl = []ytab{
  397. {Zil_, 1, argList{Yi32, Yax}},
  398. {Zilo_m, 2, argList{Yi32, Yml}},
  399. {Zr_m, 1, argList{Yrl, Yml}},
  400. {Zm_r, 1, argList{Yml, Yrl}},
  401. }
  402. var ymovb = []ytab{
  403. {Zr_m, 1, argList{Yrb, Ymb}},
  404. {Zm_r, 1, argList{Ymb, Yrb}},
  405. {Zib_rp, 1, argList{Yi32, Yrb}},
  406. {Zibo_m, 2, argList{Yi32, Ymb}},
  407. }
  408. var ybtl = []ytab{
  409. {Zibo_m, 2, argList{Yi8, Yml}},
  410. {Zr_m, 1, argList{Yrl, Yml}},
  411. }
  412. var ymovw = []ytab{
  413. {Zr_m, 1, argList{Yrl, Yml}},
  414. {Zm_r, 1, argList{Yml, Yrl}},
  415. {Zil_rp, 1, argList{Yi32, Yrl}},
  416. {Zilo_m, 2, argList{Yi32, Yml}},
  417. {Zaut_r, 2, argList{Yiauto, Yrl}},
  418. }
  419. var ymovl = []ytab{
  420. {Zr_m, 1, argList{Yrl, Yml}},
  421. {Zm_r, 1, argList{Yml, Yrl}},
  422. {Zil_rp, 1, argList{Yi32, Yrl}},
  423. {Zilo_m, 2, argList{Yi32, Yml}},
  424. {Zm_r_xm, 1, argList{Yml, Ymr}}, // MMX MOVD
  425. {Zr_m_xm, 1, argList{Ymr, Yml}}, // MMX MOVD
  426. {Zm_r_xm, 2, argList{Yml, Yxr}}, // XMM MOVD (32 bit)
  427. {Zr_m_xm, 2, argList{Yxr, Yml}}, // XMM MOVD (32 bit)
  428. {Zaut_r, 2, argList{Yiauto, Yrl}},
  429. }
  430. var yret = []ytab{
  431. {Zo_iw, 1, argList{}},
  432. {Zo_iw, 1, argList{Yi32}},
  433. }
  434. var ymovq = []ytab{
  435. // valid in 32-bit mode
  436. {Zm_r_xm_nr, 1, argList{Ym, Ymr}}, // 0x6f MMX MOVQ (shorter encoding)
  437. {Zr_m_xm_nr, 1, argList{Ymr, Ym}}, // 0x7f MMX MOVQ
  438. {Zm_r_xm_nr, 2, argList{Yxr, Ymr}}, // Pf2, 0xd6 MOVDQ2Q
  439. {Zm_r_xm_nr, 2, argList{Yxm, Yxr}}, // Pf3, 0x7e MOVQ xmm1/m64 -> xmm2
  440. {Zr_m_xm_nr, 2, argList{Yxr, Yxm}}, // Pe, 0xd6 MOVQ xmm1 -> xmm2/m64
  441. // valid only in 64-bit mode, usually with 64-bit prefix
  442. {Zr_m, 1, argList{Yrl, Yml}}, // 0x89
  443. {Zm_r, 1, argList{Yml, Yrl}}, // 0x8b
  444. {Zilo_m, 2, argList{Ys32, Yrl}}, // 32 bit signed 0xc7,(0)
  445. {Ziq_rp, 1, argList{Yi64, Yrl}}, // 0xb8 -- 32/64 bit immediate
  446. {Zilo_m, 2, argList{Yi32, Yml}}, // 0xc7,(0)
  447. {Zm_r_xm, 1, argList{Ymm, Ymr}}, // 0x6e MMX MOVD
  448. {Zr_m_xm, 1, argList{Ymr, Ymm}}, // 0x7e MMX MOVD
  449. {Zm_r_xm, 2, argList{Yml, Yxr}}, // Pe, 0x6e MOVD xmm load
  450. {Zr_m_xm, 2, argList{Yxr, Yml}}, // Pe, 0x7e MOVD xmm store
  451. {Zaut_r, 1, argList{Yiauto, Yrl}}, // 0 built-in LEAQ
  452. }
  453. var ymovbe = []ytab{
  454. {Zlitm_r, 3, argList{Ym, Yrl}},
  455. {Zlitr_m, 3, argList{Yrl, Ym}},
  456. }
  457. var ym_rl = []ytab{
  458. {Zm_r, 1, argList{Ym, Yrl}},
  459. }
  460. var yrl_m = []ytab{
  461. {Zr_m, 1, argList{Yrl, Ym}},
  462. }
  463. var ymb_rl = []ytab{
  464. {Zmb_r, 1, argList{Ymb, Yrl}},
  465. }
  466. var yml_rl = []ytab{
  467. {Zm_r, 1, argList{Yml, Yrl}},
  468. }
  469. var yrl_ml = []ytab{
  470. {Zr_m, 1, argList{Yrl, Yml}},
  471. }
  472. var yml_mb = []ytab{
  473. {Zr_m, 1, argList{Yrb, Ymb}},
  474. {Zm_r, 1, argList{Ymb, Yrb}},
  475. }
  476. var yrb_mb = []ytab{
  477. {Zr_m, 1, argList{Yrb, Ymb}},
  478. }
  479. var yxchg = []ytab{
  480. {Z_rp, 1, argList{Yax, Yrl}},
  481. {Zrp_, 1, argList{Yrl, Yax}},
  482. {Zr_m, 1, argList{Yrl, Yml}},
  483. {Zm_r, 1, argList{Yml, Yrl}},
  484. }
  485. var ydivl = []ytab{
  486. {Zm_o, 2, argList{Yml}},
  487. }
  488. var ydivb = []ytab{
  489. {Zm_o, 2, argList{Ymb}},
  490. }
  491. var yimul = []ytab{
  492. {Zm_o, 2, argList{Yml}},
  493. {Zib_rr, 1, argList{Yi8, Yrl}},
  494. {Zil_rr, 1, argList{Yi32, Yrl}},
  495. {Zm_r, 2, argList{Yml, Yrl}},
  496. }
  497. var yimul3 = []ytab{
  498. {Zibm_r, 2, argList{Yi8, Yml, Yrl}},
  499. {Zibm_r, 2, argList{Yi32, Yml, Yrl}},
  500. }
  501. var ybyte = []ytab{
  502. {Zbyte, 1, argList{Yi64}},
  503. }
  504. var yin = []ytab{
  505. {Zib_, 1, argList{Yi32}},
  506. {Zlit, 1, argList{}},
  507. }
  508. var yint = []ytab{
  509. {Zib_, 1, argList{Yi32}},
  510. }
  511. var ypushl = []ytab{
  512. {Zrp_, 1, argList{Yrl}},
  513. {Zm_o, 2, argList{Ym}},
  514. {Zib_, 1, argList{Yi8}},
  515. {Zil_, 1, argList{Yi32}},
  516. }
  517. var ypopl = []ytab{
  518. {Z_rp, 1, argList{Yrl}},
  519. {Zo_m, 2, argList{Ym}},
  520. }
  521. var ywrfsbase = []ytab{
  522. {Zm_o, 2, argList{Yrl}},
  523. }
  524. var yrdrand = []ytab{
  525. {Zo_m, 2, argList{Yrl}},
  526. }
  527. var yclflush = []ytab{
  528. {Zo_m, 2, argList{Ym}},
  529. }
  530. var ybswap = []ytab{
  531. {Z_rp, 2, argList{Yrl}},
  532. }
  533. var yscond = []ytab{
  534. {Zo_m, 2, argList{Ymb}},
  535. }
  536. var yjcond = []ytab{
  537. {Zbr, 0, argList{Ybr}},
  538. {Zbr, 0, argList{Yi0, Ybr}},
  539. {Zbr, 1, argList{Yi1, Ybr}},
  540. }
  541. var yloop = []ytab{
  542. {Zloop, 1, argList{Ybr}},
  543. }
  544. var ycall = []ytab{
  545. {Zcallindreg, 0, argList{Yml}},
  546. {Zcallindreg, 2, argList{Yrx, Yrx}},
  547. {Zcallind, 2, argList{Yindir}},
  548. {Zcall, 0, argList{Ybr}},
  549. {Zcallcon, 1, argList{Yi32}},
  550. }
  551. var yduff = []ytab{
  552. {Zcallduff, 1, argList{Yi32}},
  553. }
  554. var yjmp = []ytab{
  555. {Zo_m64, 2, argList{Yml}},
  556. {Zjmp, 0, argList{Ybr}},
  557. {Zjmpcon, 1, argList{Yi32}},
  558. }
  559. var yfmvd = []ytab{
  560. {Zm_o, 2, argList{Ym, Yf0}},
  561. {Zo_m, 2, argList{Yf0, Ym}},
  562. {Zm_o, 2, argList{Yrf, Yf0}},
  563. {Zo_m, 2, argList{Yf0, Yrf}},
  564. }
  565. var yfmvdp = []ytab{
  566. {Zo_m, 2, argList{Yf0, Ym}},
  567. {Zo_m, 2, argList{Yf0, Yrf}},
  568. }
  569. var yfmvf = []ytab{
  570. {Zm_o, 2, argList{Ym, Yf0}},
  571. {Zo_m, 2, argList{Yf0, Ym}},
  572. }
  573. var yfmvx = []ytab{
  574. {Zm_o, 2, argList{Ym, Yf0}},
  575. }
  576. var yfmvp = []ytab{
  577. {Zo_m, 2, argList{Yf0, Ym}},
  578. }
  579. var yfcmv = []ytab{
  580. {Zm_o, 2, argList{Yrf, Yf0}},
  581. }
  582. var yfadd = []ytab{
  583. {Zm_o, 2, argList{Ym, Yf0}},
  584. {Zm_o, 2, argList{Yrf, Yf0}},
  585. {Zo_m, 2, argList{Yf0, Yrf}},
  586. }
  587. var yfxch = []ytab{
  588. {Zo_m, 2, argList{Yf0, Yrf}},
  589. {Zm_o, 2, argList{Yrf, Yf0}},
  590. }
  591. var ycompp = []ytab{
  592. {Zo_m, 2, argList{Yf0, Yrf}}, // botch is really f0,f1
  593. }
  594. var ystsw = []ytab{
  595. {Zo_m, 2, argList{Ym}},
  596. {Zlit, 1, argList{Yax}},
  597. }
  598. var ysvrs_mo = []ytab{
  599. {Zm_o, 2, argList{Ym}},
  600. }
  601. // unaryDst version of "ysvrs_mo".
  602. var ysvrs_om = []ytab{
  603. {Zo_m, 2, argList{Ym}},
  604. }
  605. var ymm = []ytab{
  606. {Zm_r_xm, 1, argList{Ymm, Ymr}},
  607. {Zm_r_xm, 2, argList{Yxm, Yxr}},
  608. }
  609. var yxm = []ytab{
  610. {Zm_r_xm, 1, argList{Yxm, Yxr}},
  611. }
  612. var yxm_q4 = []ytab{
  613. {Zm_r, 1, argList{Yxm, Yxr}},
  614. }
  615. var yxcvm1 = []ytab{
  616. {Zm_r_xm, 2, argList{Yxm, Yxr}},
  617. {Zm_r_xm, 2, argList{Yxm, Ymr}},
  618. }
  619. var yxcvm2 = []ytab{
  620. {Zm_r_xm, 2, argList{Yxm, Yxr}},
  621. {Zm_r_xm, 2, argList{Ymm, Yxr}},
  622. }
  623. var yxr = []ytab{
  624. {Zm_r_xm, 1, argList{Yxr, Yxr}},
  625. }
  626. var yxr_ml = []ytab{
  627. {Zr_m_xm, 1, argList{Yxr, Yml}},
  628. }
  629. var ymr = []ytab{
  630. {Zm_r, 1, argList{Ymr, Ymr}},
  631. }
  632. var ymr_ml = []ytab{
  633. {Zr_m_xm, 1, argList{Ymr, Yml}},
  634. }
  635. var yxcmpi = []ytab{
  636. {Zm_r_i_xm, 2, argList{Yxm, Yxr, Yi8}},
  637. }
  638. var yxmov = []ytab{
  639. {Zm_r_xm, 1, argList{Yxm, Yxr}},
  640. {Zr_m_xm, 1, argList{Yxr, Yxm}},
  641. }
  642. var yxcvfl = []ytab{
  643. {Zm_r_xm, 1, argList{Yxm, Yrl}},
  644. }
  645. var yxcvlf = []ytab{
  646. {Zm_r_xm, 1, argList{Yml, Yxr}},
  647. }
  648. var yxcvfq = []ytab{
  649. {Zm_r_xm, 2, argList{Yxm, Yrl}},
  650. }
  651. var yxcvqf = []ytab{
  652. {Zm_r_xm, 2, argList{Yml, Yxr}},
  653. }
  654. var yps = []ytab{
  655. {Zm_r_xm, 1, argList{Ymm, Ymr}},
  656. {Zibo_m_xm, 2, argList{Yi8, Ymr}},
  657. {Zm_r_xm, 2, argList{Yxm, Yxr}},
  658. {Zibo_m_xm, 3, argList{Yi8, Yxr}},
  659. }
  660. var yxrrl = []ytab{
  661. {Zm_r, 1, argList{Yxr, Yrl}},
  662. }
  663. var ymrxr = []ytab{
  664. {Zm_r, 1, argList{Ymr, Yxr}},
  665. {Zm_r_xm, 1, argList{Yxm, Yxr}},
  666. }
  667. var ymshuf = []ytab{
  668. {Zibm_r, 2, argList{Yi8, Ymm, Ymr}},
  669. }
  670. var ymshufb = []ytab{
  671. {Zm2_r, 2, argList{Yxm, Yxr}},
  672. }
  673. // It should never have more than 1 entry,
  674. // because some optab entries you opcode secuences that
  675. // are longer than 2 bytes (zoffset=2 here),
  676. // ROUNDPD and ROUNDPS and recently added BLENDPD,
  677. // to name a few.
  678. var yxshuf = []ytab{
  679. {Zibm_r, 2, argList{Yu8, Yxm, Yxr}},
  680. }
  681. var yextrw = []ytab{
  682. {Zibm_r, 2, argList{Yu8, Yxr, Yrl}},
  683. {Zibr_m, 2, argList{Yu8, Yxr, Yml}},
  684. }
  685. var yextr = []ytab{
  686. {Zibr_m, 3, argList{Yu8, Yxr, Ymm}},
  687. }
  688. var yinsrw = []ytab{
  689. {Zibm_r, 2, argList{Yu8, Yml, Yxr}},
  690. }
  691. var yinsr = []ytab{
  692. {Zibm_r, 3, argList{Yu8, Ymm, Yxr}},
  693. }
  694. var ypsdq = []ytab{
  695. {Zibo_m, 2, argList{Yi8, Yxr}},
  696. }
  697. var ymskb = []ytab{
  698. {Zm_r_xm, 2, argList{Yxr, Yrl}},
  699. {Zm_r_xm, 1, argList{Ymr, Yrl}},
  700. }
  701. var ycrc32l = []ytab{
  702. {Zlitm_r, 0, argList{Yml, Yrl}},
  703. }
  704. var ycrc32b = []ytab{
  705. {Zlitm_r, 0, argList{Ymb, Yrl}},
  706. }
  707. var yprefetch = []ytab{
  708. {Zm_o, 2, argList{Ym}},
  709. }
  710. var yaes = []ytab{
  711. {Zlitm_r, 2, argList{Yxm, Yxr}},
  712. }
  713. var yxbegin = []ytab{
  714. {Zjmp, 1, argList{Ybr}},
  715. }
  716. var yxabort = []ytab{
  717. {Zib_, 1, argList{Yu8}},
  718. }
  719. var ylddqu = []ytab{
  720. {Zm_r, 1, argList{Ym, Yxr}},
  721. }
  722. var ypalignr = []ytab{
  723. {Zibm_r, 2, argList{Yu8, Yxm, Yxr}},
  724. }
  725. var ysha256rnds2 = []ytab{
  726. {Zlit_m_r, 0, argList{Yxr0, Yxm, Yxr}},
  727. }
  728. var yblendvpd = []ytab{
  729. {Z_m_r, 1, argList{Yxr0, Yxm, Yxr}},
  730. }
  731. var ymmxmm0f38 = []ytab{
  732. {Zlitm_r, 3, argList{Ymm, Ymr}},
  733. {Zlitm_r, 5, argList{Yxm, Yxr}},
  734. }
  735. var yextractps = []ytab{
  736. {Zibr_m, 2, argList{Yu2, Yxr, Yml}},
  737. }
  738. var ysha1rnds4 = []ytab{
  739. {Zibm_r, 2, argList{Yu2, Yxm, Yxr}},
  740. }
  741. // You are doasm, holding in your hand a *obj.Prog with p.As set to, say,
  742. // ACRC32, and p.From and p.To as operands (obj.Addr). The linker scans optab
  743. // to find the entry with the given p.As and then looks through the ytable for
  744. // that instruction (the second field in the optab struct) for a line whose
  745. // first two values match the Ytypes of the p.From and p.To operands. The
  746. // function oclass computes the specific Ytype of an operand and then the set
  747. // of more general Ytypes that it satisfies is implied by the ycover table, set
  748. // up in instinit. For example, oclass distinguishes the constants 0 and 1
  749. // from the more general 8-bit constants, but instinit says
  750. //
  751. // ycover[Yi0*Ymax+Ys32] = 1
  752. // ycover[Yi1*Ymax+Ys32] = 1
  753. // ycover[Yi8*Ymax+Ys32] = 1
  754. //
  755. // which means that Yi0, Yi1, and Yi8 all count as Ys32 (signed 32)
  756. // if that's what an instruction can handle.
  757. //
  758. // In parallel with the scan through the ytable for the appropriate line, there
  759. // is a z pointer that starts out pointing at the strange magic byte list in
  760. // the Optab struct. With each step past a non-matching ytable line, z
  761. // advances by the 4th entry in the line. When a matching line is found, that
  762. // z pointer has the extra data to use in laying down the instruction bytes.
  763. // The actual bytes laid down are a function of the 3rd entry in the line (that
  764. // is, the Ztype) and the z bytes.
  765. //
  766. // For example, let's look at AADDL. The optab line says:
  767. // {AADDL, yaddl, Px, opBytes{0x83, 00, 0x05, 0x81, 00, 0x01, 0x03}},
  768. //
  769. // and yaddl says
  770. // var yaddl = []ytab{
  771. // {Yi8, Ynone, Yml, Zibo_m, 2},
  772. // {Yi32, Ynone, Yax, Zil_, 1},
  773. // {Yi32, Ynone, Yml, Zilo_m, 2},
  774. // {Yrl, Ynone, Yml, Zr_m, 1},
  775. // {Yml, Ynone, Yrl, Zm_r, 1},
  776. // }
  777. //
  778. // so there are 5 possible types of ADDL instruction that can be laid down, and
  779. // possible states used to lay them down (Ztype and z pointer, assuming z
  780. // points at opBytes{0x83, 00, 0x05,0x81, 00, 0x01, 0x03}) are:
  781. //
  782. // Yi8, Yml -> Zibo_m, z (0x83, 00)
  783. // Yi32, Yax -> Zil_, z+2 (0x05)
  784. // Yi32, Yml -> Zilo_m, z+2+1 (0x81, 0x00)
  785. // Yrl, Yml -> Zr_m, z+2+1+2 (0x01)
  786. // Yml, Yrl -> Zm_r, z+2+1+2+1 (0x03)
  787. //
  788. // The Pconstant in the optab line controls the prefix bytes to emit. That's
  789. // relatively straightforward as this program goes.
  790. //
  791. // The switch on yt.zcase in doasm implements the various Z cases. Zibo_m, for
  792. // example, is an opcode byte (z[0]) then an asmando (which is some kind of
  793. // encoded addressing mode for the Yml arg), and then a single immediate byte.
  794. // Zilo_m is the same but a long (32-bit) immediate.
  795. var optab =
  796. // as, ytab, andproto, opcode
  797. [...]Optab{
  798. {obj.AXXX, nil, 0, opBytes{}},
  799. {AAAA, ynone, P32, opBytes{0x37}},
  800. {AAAD, ynone, P32, opBytes{0xd5, 0x0a}},
  801. {AAAM, ynone, P32, opBytes{0xd4, 0x0a}},
  802. {AAAS, ynone, P32, opBytes{0x3f}},
  803. {AADCB, yxorb, Pb, opBytes{0x14, 0x80, 02, 0x10, 0x12}},
  804. {AADCL, yaddl, Px, opBytes{0x83, 02, 0x15, 0x81, 02, 0x11, 0x13}},
  805. {AADCQ, yaddl, Pw, opBytes{0x83, 02, 0x15, 0x81, 02, 0x11, 0x13}},
  806. {AADCW, yaddl, Pe, opBytes{0x83, 02, 0x15, 0x81, 02, 0x11, 0x13}},
  807. {AADCXL, yml_rl, Pq4, opBytes{0xf6}},
  808. {AADCXQ, yml_rl, Pq4w, opBytes{0xf6}},
  809. {AADDB, yxorb, Pb, opBytes{0x04, 0x80, 00, 0x00, 0x02}},
  810. {AADDL, yaddl, Px, opBytes{0x83, 00, 0x05, 0x81, 00, 0x01, 0x03}},
  811. {AADDPD, yxm, Pq, opBytes{0x58}},
  812. {AADDPS, yxm, Pm, opBytes{0x58}},
  813. {AADDQ, yaddl, Pw, opBytes{0x83, 00, 0x05, 0x81, 00, 0x01, 0x03}},
  814. {AADDSD, yxm, Pf2, opBytes{0x58}},
  815. {AADDSS, yxm, Pf3, opBytes{0x58}},
  816. {AADDSUBPD, yxm, Pq, opBytes{0xd0}},
  817. {AADDSUBPS, yxm, Pf2, opBytes{0xd0}},
  818. {AADDW, yaddl, Pe, opBytes{0x83, 00, 0x05, 0x81, 00, 0x01, 0x03}},
  819. {AADOXL, yml_rl, Pq5, opBytes{0xf6}},
  820. {AADOXQ, yml_rl, Pq5w, opBytes{0xf6}},
  821. {AADJSP, nil, 0, opBytes{}},
  822. {AANDB, yxorb, Pb, opBytes{0x24, 0x80, 04, 0x20, 0x22}},
  823. {AANDL, yaddl, Px, opBytes{0x83, 04, 0x25, 0x81, 04, 0x21, 0x23}},
  824. {AANDNPD, yxm, Pq, opBytes{0x55}},
  825. {AANDNPS, yxm, Pm, opBytes{0x55}},
  826. {AANDPD, yxm, Pq, opBytes{0x54}},
  827. {AANDPS, yxm, Pm, opBytes{0x54}},
  828. {AANDQ, yaddl, Pw, opBytes{0x83, 04, 0x25, 0x81, 04, 0x21, 0x23}},
  829. {AANDW, yaddl, Pe, opBytes{0x83, 04, 0x25, 0x81, 04, 0x21, 0x23}},
  830. {AARPL, yrl_ml, P32, opBytes{0x63}},
  831. {ABOUNDL, yrl_m, P32, opBytes{0x62}},
  832. {ABOUNDW, yrl_m, Pe, opBytes{0x62}},
  833. {ABSFL, yml_rl, Pm, opBytes{0xbc}},
  834. {ABSFQ, yml_rl, Pw, opBytes{0x0f, 0xbc}},
  835. {ABSFW, yml_rl, Pq, opBytes{0xbc}},
  836. {ABSRL, yml_rl, Pm, opBytes{0xbd}},
  837. {ABSRQ, yml_rl, Pw, opBytes{0x0f, 0xbd}},
  838. {ABSRW, yml_rl, Pq, opBytes{0xbd}},
  839. {ABSWAPL, ybswap, Px, opBytes{0x0f, 0xc8}},
  840. {ABSWAPQ, ybswap, Pw, opBytes{0x0f, 0xc8}},
  841. {ABTCL, ybtl, Pm, opBytes{0xba, 07, 0xbb}},
  842. {ABTCQ, ybtl, Pw, opBytes{0x0f, 0xba, 07, 0x0f, 0xbb}},
  843. {ABTCW, ybtl, Pq, opBytes{0xba, 07, 0xbb}},
  844. {ABTL, ybtl, Pm, opBytes{0xba, 04, 0xa3}},
  845. {ABTQ, ybtl, Pw, opBytes{0x0f, 0xba, 04, 0x0f, 0xa3}},
  846. {ABTRL, ybtl, Pm, opBytes{0xba, 06, 0xb3}},
  847. {ABTRQ, ybtl, Pw, opBytes{0x0f, 0xba, 06, 0x0f, 0xb3}},
  848. {ABTRW, ybtl, Pq, opBytes{0xba, 06, 0xb3}},
  849. {ABTSL, ybtl, Pm, opBytes{0xba, 05, 0xab}},
  850. {ABTSQ, ybtl, Pw, opBytes{0x0f, 0xba, 05, 0x0f, 0xab}},
  851. {ABTSW, ybtl, Pq, opBytes{0xba, 05, 0xab}},
  852. {ABTW, ybtl, Pq, opBytes{0xba, 04, 0xa3}},
  853. {ABYTE, ybyte, Px, opBytes{1}},
  854. {obj.ACALL, ycall, Px, opBytes{0xff, 02, 0xff, 0x15, 0xe8}},
  855. {ACBW, ynone, Pe, opBytes{0x98}},
  856. {ACDQ, ynone, Px, opBytes{0x99}},
  857. {ACDQE, ynone, Pw, opBytes{0x98}},
  858. {ACLAC, ynone, Pm, opBytes{01, 0xca}},
  859. {ACLC, ynone, Px, opBytes{0xf8}},
  860. {ACLD, ynone, Px, opBytes{0xfc}},
  861. {ACLDEMOTE, yclflush, Pm, opBytes{0x1c, 00}},
  862. {ACLFLUSH, yclflush, Pm, opBytes{0xae, 07}},
  863. {ACLFLUSHOPT, yclflush, Pq, opBytes{0xae, 07}},
  864. {ACLI, ynone, Px, opBytes{0xfa}},
  865. {ACLTS, ynone, Pm, opBytes{0x06}},
  866. {ACLWB, yclflush, Pq, opBytes{0xae, 06}},
  867. {ACMC, ynone, Px, opBytes{0xf5}},
  868. {ACMOVLCC, yml_rl, Pm, opBytes{0x43}},
  869. {ACMOVLCS, yml_rl, Pm, opBytes{0x42}},
  870. {ACMOVLEQ, yml_rl, Pm, opBytes{0x44}},
  871. {ACMOVLGE, yml_rl, Pm, opBytes{0x4d}},
  872. {ACMOVLGT, yml_rl, Pm, opBytes{0x4f}},
  873. {ACMOVLHI, yml_rl, Pm, opBytes{0x47}},
  874. {ACMOVLLE, yml_rl, Pm, opBytes{0x4e}},
  875. {ACMOVLLS, yml_rl, Pm, opBytes{0x46}},
  876. {ACMOVLLT, yml_rl, Pm, opBytes{0x4c}},
  877. {ACMOVLMI, yml_rl, Pm, opBytes{0x48}},
  878. {ACMOVLNE, yml_rl, Pm, opBytes{0x45}},
  879. {ACMOVLOC, yml_rl, Pm, opBytes{0x41}},
  880. {ACMOVLOS, yml_rl, Pm, opBytes{0x40}},
  881. {ACMOVLPC, yml_rl, Pm, opBytes{0x4b}},
  882. {ACMOVLPL, yml_rl, Pm, opBytes{0x49}},
  883. {ACMOVLPS, yml_rl, Pm, opBytes{0x4a}},
  884. {ACMOVQCC, yml_rl, Pw, opBytes{0x0f, 0x43}},
  885. {ACMOVQCS, yml_rl, Pw, opBytes{0x0f, 0x42}},
  886. {ACMOVQEQ, yml_rl, Pw, opBytes{0x0f, 0x44}},
  887. {ACMOVQGE, yml_rl, Pw, opBytes{0x0f, 0x4d}},
  888. {ACMOVQGT, yml_rl, Pw, opBytes{0x0f, 0x4f}},
  889. {ACMOVQHI, yml_rl, Pw, opBytes{0x0f, 0x47}},
  890. {ACMOVQLE, yml_rl, Pw, opBytes{0x0f, 0x4e}},
  891. {ACMOVQLS, yml_rl, Pw, opBytes{0x0f, 0x46}},
  892. {ACMOVQLT, yml_rl, Pw, opBytes{0x0f, 0x4c}},
  893. {ACMOVQMI, yml_rl, Pw, opBytes{0x0f, 0x48}},
  894. {ACMOVQNE, yml_rl, Pw, opBytes{0x0f, 0x45}},
  895. {ACMOVQOC, yml_rl, Pw, opBytes{0x0f, 0x41}},
  896. {ACMOVQOS, yml_rl, Pw, opBytes{0x0f, 0x40}},
  897. {ACMOVQPC, yml_rl, Pw, opBytes{0x0f, 0x4b}},
  898. {ACMOVQPL, yml_rl, Pw, opBytes{0x0f, 0x49}},
  899. {ACMOVQPS, yml_rl, Pw, opBytes{0x0f, 0x4a}},
  900. {ACMOVWCC, yml_rl, Pq, opBytes{0x43}},
  901. {ACMOVWCS, yml_rl, Pq, opBytes{0x42}},
  902. {ACMOVWEQ, yml_rl, Pq, opBytes{0x44}},
  903. {ACMOVWGE, yml_rl, Pq, opBytes{0x4d}},
  904. {ACMOVWGT, yml_rl, Pq, opBytes{0x4f}},
  905. {ACMOVWHI, yml_rl, Pq, opBytes{0x47}},
  906. {ACMOVWLE, yml_rl, Pq, opBytes{0x4e}},
  907. {ACMOVWLS, yml_rl, Pq, opBytes{0x46}},
  908. {ACMOVWLT, yml_rl, Pq, opBytes{0x4c}},
  909. {ACMOVWMI, yml_rl, Pq, opBytes{0x48}},
  910. {ACMOVWNE, yml_rl, Pq, opBytes{0x45}},
  911. {ACMOVWOC, yml_rl, Pq, opBytes{0x41}},
  912. {ACMOVWOS, yml_rl, Pq, opBytes{0x40}},
  913. {ACMOVWPC, yml_rl, Pq, opBytes{0x4b}},
  914. {ACMOVWPL, yml_rl, Pq, opBytes{0x49}},
  915. {ACMOVWPS, yml_rl, Pq, opBytes{0x4a}},
  916. {ACMPB, ycmpb, Pb, opBytes{0x3c, 0x80, 07, 0x38, 0x3a}},
  917. {ACMPL, ycmpl, Px, opBytes{0x83, 07, 0x3d, 0x81, 07, 0x39, 0x3b}},
  918. {ACMPPD, yxcmpi, Px, opBytes{Pe, 0xc2}},
  919. {ACMPPS, yxcmpi, Pm, opBytes{0xc2, 0}},
  920. {ACMPQ, ycmpl, Pw, opBytes{0x83, 07, 0x3d, 0x81, 07, 0x39, 0x3b}},
  921. {ACMPSB, ynone, Pb, opBytes{0xa6}},
  922. {ACMPSD, yxcmpi, Px, opBytes{Pf2, 0xc2}},
  923. {ACMPSL, ynone, Px, opBytes{0xa7}},
  924. {ACMPSQ, ynone, Pw, opBytes{0xa7}},
  925. {ACMPSS, yxcmpi, Px, opBytes{Pf3, 0xc2}},
  926. {ACMPSW, ynone, Pe, opBytes{0xa7}},
  927. {ACMPW, ycmpl, Pe, opBytes{0x83, 07, 0x3d, 0x81, 07, 0x39, 0x3b}},
  928. {ACOMISD, yxm, Pe, opBytes{0x2f}},
  929. {ACOMISS, yxm, Pm, opBytes{0x2f}},
  930. {ACPUID, ynone, Pm, opBytes{0xa2}},
  931. {ACVTPL2PD, yxcvm2, Px, opBytes{Pf3, 0xe6, Pe, 0x2a}},
  932. {ACVTPL2PS, yxcvm2, Pm, opBytes{0x5b, 0, 0x2a, 0}},
  933. {ACVTPD2PL, yxcvm1, Px, opBytes{Pf2, 0xe6, Pe, 0x2d}},
  934. {ACVTPD2PS, yxm, Pe, opBytes{0x5a}},
  935. {ACVTPS2PL, yxcvm1, Px, opBytes{Pe, 0x5b, Pm, 0x2d}},
  936. {ACVTPS2PD, yxm, Pm, opBytes{0x5a}},
  937. {ACVTSD2SL, yxcvfl, Pf2, opBytes{0x2d}},
  938. {ACVTSD2SQ, yxcvfq, Pw, opBytes{Pf2, 0x2d}},
  939. {ACVTSD2SS, yxm, Pf2, opBytes{0x5a}},
  940. {ACVTSL2SD, yxcvlf, Pf2, opBytes{0x2a}},
  941. {ACVTSQ2SD, yxcvqf, Pw, opBytes{Pf2, 0x2a}},
  942. {ACVTSL2SS, yxcvlf, Pf3, opBytes{0x2a}},
  943. {ACVTSQ2SS, yxcvqf, Pw, opBytes{Pf3, 0x2a}},
  944. {ACVTSS2SD, yxm, Pf3, opBytes{0x5a}},
  945. {ACVTSS2SL, yxcvfl, Pf3, opBytes{0x2d}},
  946. {ACVTSS2SQ, yxcvfq, Pw, opBytes{Pf3, 0x2d}},
  947. {ACVTTPD2PL, yxcvm1, Px, opBytes{Pe, 0xe6, Pe, 0x2c}},
  948. {ACVTTPS2PL, yxcvm1, Px, opBytes{Pf3, 0x5b, Pm, 0x2c}},
  949. {ACVTTSD2SL, yxcvfl, Pf2, opBytes{0x2c}},
  950. {ACVTTSD2SQ, yxcvfq, Pw, opBytes{Pf2, 0x2c}},
  951. {ACVTTSS2SL, yxcvfl, Pf3, opBytes{0x2c}},
  952. {ACVTTSS2SQ, yxcvfq, Pw, opBytes{Pf3, 0x2c}},
  953. {ACWD, ynone, Pe, opBytes{0x99}},
  954. {ACWDE, ynone, Px, opBytes{0x98}},
  955. {ACQO, ynone, Pw, opBytes{0x99}},
  956. {ADAA, ynone, P32, opBytes{0x27}},
  957. {ADAS, ynone, P32, opBytes{0x2f}},
  958. {ADECB, yscond, Pb, opBytes{0xfe, 01}},
  959. {ADECL, yincl, Px1, opBytes{0x48, 0xff, 01}},
  960. {ADECQ, yincq, Pw, opBytes{0xff, 01}},
  961. {ADECW, yincq, Pe, opBytes{0xff, 01}},
  962. {ADIVB, ydivb, Pb, opBytes{0xf6, 06}},
  963. {ADIVL, ydivl, Px, opBytes{0xf7, 06}},
  964. {ADIVPD, yxm, Pe, opBytes{0x5e}},
  965. {ADIVPS, yxm, Pm, opBytes{0x5e}},
  966. {ADIVQ, ydivl, Pw, opBytes{0xf7, 06}},
  967. {ADIVSD, yxm, Pf2, opBytes{0x5e}},
  968. {ADIVSS, yxm, Pf3, opBytes{0x5e}},
  969. {ADIVW, ydivl, Pe, opBytes{0xf7, 06}},
  970. {ADPPD, yxshuf, Pq, opBytes{0x3a, 0x41, 0}},
  971. {ADPPS, yxshuf, Pq, opBytes{0x3a, 0x40, 0}},
  972. {AEMMS, ynone, Pm, opBytes{0x77}},
  973. {AEXTRACTPS, yextractps, Pq, opBytes{0x3a, 0x17, 0}},
  974. {AENTER, nil, 0, opBytes{}}, // botch
  975. {AFXRSTOR, ysvrs_mo, Pm, opBytes{0xae, 01, 0xae, 01}},
  976. {AFXSAVE, ysvrs_om, Pm, opBytes{0xae, 00, 0xae, 00}},
  977. {AFXRSTOR64, ysvrs_mo, Pw, opBytes{0x0f, 0xae, 01, 0x0f, 0xae, 01}},
  978. {AFXSAVE64, ysvrs_om, Pw, opBytes{0x0f, 0xae, 00, 0x0f, 0xae, 00}},
  979. {AHLT, ynone, Px, opBytes{0xf4}},
  980. {AIDIVB, ydivb, Pb, opBytes{0xf6, 07}},
  981. {AIDIVL, ydivl, Px, opBytes{0xf7, 07}},
  982. {AIDIVQ, ydivl, Pw, opBytes{0xf7, 07}},
  983. {AIDIVW, ydivl, Pe, opBytes{0xf7, 07}},
  984. {AIMULB, ydivb, Pb, opBytes{0xf6, 05}},
  985. {AIMULL, yimul, Px, opBytes{0xf7, 05, 0x6b, 0x69, Pm, 0xaf}},
  986. {AIMULQ, yimul, Pw, opBytes{0xf7, 05, 0x6b, 0x69, Pm, 0xaf}},
  987. {AIMULW, yimul, Pe, opBytes{0xf7, 05, 0x6b, 0x69, Pm, 0xaf}},
  988. {AIMUL3W, yimul3, Pe, opBytes{0x6b, 00, 0x69, 00}},
  989. {AIMUL3L, yimul3, Px, opBytes{0x6b, 00, 0x69, 00}},
  990. {AIMUL3Q, yimul3, Pw, opBytes{0x6b, 00, 0x69, 00}},
  991. {AINB, yin, Pb, opBytes{0xe4, 0xec}},
  992. {AINW, yin, Pe, opBytes{0xe5, 0xed}},
  993. {AINL, yin, Px, opBytes{0xe5, 0xed}},
  994. {AINCB, yscond, Pb, opBytes{0xfe, 00}},
  995. {AINCL, yincl, Px1, opBytes{0x40, 0xff, 00}},
  996. {AINCQ, yincq, Pw, opBytes{0xff, 00}},
  997. {AINCW, yincq, Pe, opBytes{0xff, 00}},
  998. {AINSB, ynone, Pb, opBytes{0x6c}},
  999. {AINSL, ynone, Px, opBytes{0x6d}},
  1000. {AINSERTPS, yxshuf, Pq, opBytes{0x3a, 0x21, 0}},
  1001. {AINSW, ynone, Pe, opBytes{0x6d}},
  1002. {AICEBP, ynone, Px, opBytes{0xf1}},
  1003. {AINT, yint, Px, opBytes{0xcd}},
  1004. {AINTO, ynone, P32, opBytes{0xce}},
  1005. {AIRETL, ynone, Px, opBytes{0xcf}},
  1006. {AIRETQ, ynone, Pw, opBytes{0xcf}},
  1007. {AIRETW, ynone, Pe, opBytes{0xcf}},
  1008. {AJCC, yjcond, Px, opBytes{0x73, 0x83, 00}},
  1009. {AJCS, yjcond, Px, opBytes{0x72, 0x82}},
  1010. {AJCXZL, yloop, Px, opBytes{0xe3}},
  1011. {AJCXZW, yloop, Px, opBytes{0xe3}},
  1012. {AJCXZQ, yloop, Px, opBytes{0xe3}},
  1013. {AJEQ, yjcond, Px, opBytes{0x74, 0x84}},
  1014. {AJGE, yjcond, Px, opBytes{0x7d, 0x8d}},
  1015. {AJGT, yjcond, Px, opBytes{0x7f, 0x8f}},
  1016. {AJHI, yjcond, Px, opBytes{0x77, 0x87}},
  1017. {AJLE, yjcond, Px, opBytes{0x7e, 0x8e}},
  1018. {AJLS, yjcond, Px, opBytes{0x76, 0x86}},
  1019. {AJLT, yjcond, Px, opBytes{0x7c, 0x8c}},
  1020. {AJMI, yjcond, Px, opBytes{0x78, 0x88}},
  1021. {obj.AJMP, yjmp, Px, opBytes{0xff, 04, 0xeb, 0xe9}},
  1022. {AJNE, yjcond, Px, opBytes{0x75, 0x85}},
  1023. {AJOC, yjcond, Px, opBytes{0x71, 0x81, 00}},
  1024. {AJOS, yjcond, Px, opBytes{0x70, 0x80, 00}},
  1025. {AJPC, yjcond, Px, opBytes{0x7b, 0x8b}},
  1026. {AJPL, yjcond, Px, opBytes{0x79, 0x89}},
  1027. {AJPS, yjcond, Px, opBytes{0x7a, 0x8a}},
  1028. {AHADDPD, yxm, Pq, opBytes{0x7c}},
  1029. {AHADDPS, yxm, Pf2, opBytes{0x7c}},
  1030. {AHSUBPD, yxm, Pq, opBytes{0x7d}},
  1031. {AHSUBPS, yxm, Pf2, opBytes{0x7d}},
  1032. {ALAHF, ynone, Px, opBytes{0x9f}},
  1033. {ALARL, yml_rl, Pm, opBytes{0x02}},
  1034. {ALARQ, yml_rl, Pw, opBytes{0x0f, 0x02}},
  1035. {ALARW, yml_rl, Pq, opBytes{0x02}},
  1036. {ALDDQU, ylddqu, Pf2, opBytes{0xf0}},
  1037. {ALDMXCSR, ysvrs_mo, Pm, opBytes{0xae, 02, 0xae, 02}},
  1038. {ALEAL, ym_rl, Px, opBytes{0x8d}},
  1039. {ALEAQ, ym_rl, Pw, opBytes{0x8d}},
  1040. {ALEAVEL, ynone, P32, opBytes{0xc9}},
  1041. {ALEAVEQ, ynone, Py, opBytes{0xc9}},
  1042. {ALEAVEW, ynone, Pe, opBytes{0xc9}},
  1043. {ALEAW, ym_rl, Pe, opBytes{0x8d}},
  1044. {ALOCK, ynone, Px, opBytes{0xf0}},
  1045. {ALODSB, ynone, Pb, opBytes{0xac}},
  1046. {ALODSL, ynone, Px, opBytes{0xad}},
  1047. {ALODSQ, ynone, Pw, opBytes{0xad}},
  1048. {ALODSW, ynone, Pe, opBytes{0xad}},
  1049. {ALONG, ybyte, Px, opBytes{4}},
  1050. {ALOOP, yloop, Px, opBytes{0xe2}},
  1051. {ALOOPEQ, yloop, Px, opBytes{0xe1}},
  1052. {ALOOPNE, yloop, Px, opBytes{0xe0}},
  1053. {ALTR, ydivl, Pm, opBytes{0x00, 03}},
  1054. {ALZCNTL, yml_rl, Pf3, opBytes{0xbd}},
  1055. {ALZCNTQ, yml_rl, Pfw, opBytes{0xbd}},
  1056. {ALZCNTW, yml_rl, Pef3, opBytes{0xbd}},
  1057. {ALSLL, yml_rl, Pm, opBytes{0x03}},
  1058. {ALSLW, yml_rl, Pq, opBytes{0x03}},
  1059. {ALSLQ, yml_rl, Pw, opBytes{0x0f, 0x03}},
  1060. {AMASKMOVOU, yxr, Pe, opBytes{0xf7}},
  1061. {AMASKMOVQ, ymr, Pm, opBytes{0xf7}},
  1062. {AMAXPD, yxm, Pe, opBytes{0x5f}},
  1063. {AMAXPS, yxm, Pm, opBytes{0x5f}},
  1064. {AMAXSD, yxm, Pf2, opBytes{0x5f}},
  1065. {AMAXSS, yxm, Pf3, opBytes{0x5f}},
  1066. {AMINPD, yxm, Pe, opBytes{0x5d}},
  1067. {AMINPS, yxm, Pm, opBytes{0x5d}},
  1068. {AMINSD, yxm, Pf2, opBytes{0x5d}},
  1069. {AMINSS, yxm, Pf3, opBytes{0x5d}},
  1070. {AMONITOR, ynone, Px, opBytes{0x0f, 0x01, 0xc8, 0}},
  1071. {AMWAIT, ynone, Px, opBytes{0x0f, 0x01, 0xc9, 0}},
  1072. {AMOVAPD, yxmov, Pe, opBytes{0x28, 0x29}},
  1073. {AMOVAPS, yxmov, Pm, opBytes{0x28, 0x29}},
  1074. {AMOVB, ymovb, Pb, opBytes{0x88, 0x8a, 0xb0, 0xc6, 00}},
  1075. {AMOVBLSX, ymb_rl, Pm, opBytes{0xbe}},
  1076. {AMOVBLZX, ymb_rl, Pm, opBytes{0xb6}},
  1077. {AMOVBQSX, ymb_rl, Pw, opBytes{0x0f, 0xbe}},
  1078. {AMOVBQZX, ymb_rl, Pw, opBytes{0x0f, 0xb6}},
  1079. {AMOVBWSX, ymb_rl, Pq, opBytes{0xbe}},
  1080. {AMOVSWW, ymb_rl, Pe, opBytes{0x0f, 0xbf}},
  1081. {AMOVBWZX, ymb_rl, Pq, opBytes{0xb6}},
  1082. {AMOVZWW, ymb_rl, Pe, opBytes{0x0f, 0xb7}},
  1083. {AMOVO, yxmov, Pe, opBytes{0x6f, 0x7f}},
  1084. {AMOVOU, yxmov, Pf3, opBytes{0x6f, 0x7f}},
  1085. {AMOVHLPS, yxr, Pm, opBytes{0x12}},
  1086. {AMOVHPD, yxmov, Pe, opBytes{0x16, 0x17}},
  1087. {AMOVHPS, yxmov, Pm, opBytes{0x16, 0x17}},
  1088. {AMOVL, ymovl, Px, opBytes{0x89, 0x8b, 0xb8, 0xc7, 00, 0x6e, 0x7e, Pe, 0x6e, Pe, 0x7e, 0}},
  1089. {AMOVLHPS, yxr, Pm, opBytes{0x16}},
  1090. {AMOVLPD, yxmov, Pe, opBytes{0x12, 0x13}},
  1091. {AMOVLPS, yxmov, Pm, opBytes{0x12, 0x13}},
  1092. {AMOVLQSX, yml_rl, Pw, opBytes{0x63}},
  1093. {AMOVLQZX, yml_rl, Px, opBytes{0x8b}},
  1094. {AMOVMSKPD, yxrrl, Pq, opBytes{0x50}},
  1095. {AMOVMSKPS, yxrrl, Pm, opBytes{0x50}},
  1096. {AMOVNTO, yxr_ml, Pe, opBytes{0xe7}},
  1097. {AMOVNTDQA, ylddqu, Pq4, opBytes{0x2a}},
  1098. {AMOVNTPD, yxr_ml, Pe, opBytes{0x2b}},
  1099. {AMOVNTPS, yxr_ml, Pm, opBytes{0x2b}},
  1100. {AMOVNTQ, ymr_ml, Pm, opBytes{0xe7}},
  1101. {AMOVQ, ymovq, Pw8, opBytes{0x6f, 0x7f, Pf2, 0xd6, Pf3, 0x7e, Pe, 0xd6, 0x89, 0x8b, 0xc7, 00, 0xb8, 0xc7, 00, 0x6e, 0x7e, Pe, 0x6e, Pe, 0x7e, 0}},
  1102. {AMOVQOZX, ymrxr, Pf3, opBytes{0xd6, 0x7e}},
  1103. {AMOVSB, ynone, Pb, opBytes{0xa4}},
  1104. {AMOVSD, yxmov, Pf2, opBytes{0x10, 0x11}},
  1105. {AMOVSL, ynone, Px, opBytes{0xa5}},
  1106. {AMOVSQ, ynone, Pw, opBytes{0xa5}},
  1107. {AMOVSS, yxmov, Pf3, opBytes{0x10, 0x11}},
  1108. {AMOVSW, ynone, Pe, opBytes{0xa5}},
  1109. {AMOVUPD, yxmov, Pe, opBytes{0x10, 0x11}},
  1110. {AMOVUPS, yxmov, Pm, opBytes{0x10, 0x11}},
  1111. {AMOVW, ymovw, Pe, opBytes{0x89, 0x8b, 0xb8, 0xc7, 00, 0}},
  1112. {AMOVWLSX, yml_rl, Pm, opBytes{0xbf}},
  1113. {AMOVWLZX, yml_rl, Pm, opBytes{0xb7}},
  1114. {AMOVWQSX, yml_rl, Pw, opBytes{0x0f, 0xbf}},
  1115. {AMOVWQZX, yml_rl, Pw, opBytes{0x0f, 0xb7}},
  1116. {AMPSADBW, yxshuf, Pq, opBytes{0x3a, 0x42, 0}},
  1117. {AMULB, ydivb, Pb, opBytes{0xf6, 04}},
  1118. {AMULL, ydivl, Px, opBytes{0xf7, 04}},
  1119. {AMULPD, yxm, Pe, opBytes{0x59}},
  1120. {AMULPS, yxm, Ym, opBytes{0x59}},
  1121. {AMULQ, ydivl, Pw, opBytes{0xf7, 04}},
  1122. {AMULSD, yxm, Pf2, opBytes{0x59}},
  1123. {AMULSS, yxm, Pf3, opBytes{0x59}},
  1124. {AMULW, ydivl, Pe, opBytes{0xf7, 04}},
  1125. {ANEGB, yscond, Pb, opBytes{0xf6, 03}},
  1126. {ANEGL, yscond, Px, opBytes{0xf7, 03}},
  1127. {ANEGQ, yscond, Pw, opBytes{0xf7, 03}},
  1128. {ANEGW, yscond, Pe, opBytes{0xf7, 03}},
  1129. {obj.ANOP, ynop, Px, opBytes{0, 0}},
  1130. {ANOTB, yscond, Pb, opBytes{0xf6, 02}},
  1131. {ANOTL, yscond, Px, opBytes{0xf7, 02}}, // TODO(rsc): yscond is wrong here.
  1132. {ANOTQ, yscond, Pw, opBytes{0xf7, 02}},
  1133. {ANOTW, yscond, Pe, opBytes{0xf7, 02}},
  1134. {AORB, yxorb, Pb, opBytes{0x0c, 0x80, 01, 0x08, 0x0a}},
  1135. {AORL, yaddl, Px, opBytes{0x83, 01, 0x0d, 0x81, 01, 0x09, 0x0b}},
  1136. {AORPD, yxm, Pq, opBytes{0x56}},
  1137. {AORPS, yxm, Pm, opBytes{0x56}},
  1138. {AORQ, yaddl, Pw, opBytes{0x83, 01, 0x0d, 0x81, 01, 0x09, 0x0b}},
  1139. {AORW, yaddl, Pe, opBytes{0x83, 01, 0x0d, 0x81, 01, 0x09, 0x0b}},
  1140. {AOUTB, yin, Pb, opBytes{0xe6, 0xee}},
  1141. {AOUTL, yin, Px, opBytes{0xe7, 0xef}},
  1142. {AOUTW, yin, Pe, opBytes{0xe7, 0xef}},
  1143. {AOUTSB, ynone, Pb, opBytes{0x6e}},
  1144. {AOUTSL, ynone, Px, opBytes{0x6f}},
  1145. {AOUTSW, ynone, Pe, opBytes{0x6f}},
  1146. {APABSB, yxm_q4, Pq4, opBytes{0x1c}},
  1147. {APABSD, yxm_q4, Pq4, opBytes{0x1e}},
  1148. {APABSW, yxm_q4, Pq4, opBytes{0x1d}},
  1149. {APACKSSLW, ymm, Py1, opBytes{0x6b, Pe, 0x6b}},
  1150. {APACKSSWB, ymm, Py1, opBytes{0x63, Pe, 0x63}},
  1151. {APACKUSDW, yxm_q4, Pq4, opBytes{0x2b}},
  1152. {APACKUSWB, ymm, Py1, opBytes{0x67, Pe, 0x67}},
  1153. {APADDB, ymm, Py1, opBytes{0xfc, Pe, 0xfc}},
  1154. {APADDL, ymm, Py1, opBytes{0xfe, Pe, 0xfe}},
  1155. {APADDQ, yxm, Pe, opBytes{0xd4}},
  1156. {APADDSB, ymm, Py1, opBytes{0xec, Pe, 0xec}},
  1157. {APADDSW, ymm, Py1, opBytes{0xed, Pe, 0xed}},
  1158. {APADDUSB, ymm, Py1, opBytes{0xdc, Pe, 0xdc}},
  1159. {APADDUSW, ymm, Py1, opBytes{0xdd, Pe, 0xdd}},
  1160. {APADDW, ymm, Py1, opBytes{0xfd, Pe, 0xfd}},
  1161. {APALIGNR, ypalignr, Pq, opBytes{0x3a, 0x0f}},
  1162. {APAND, ymm, Py1, opBytes{0xdb, Pe, 0xdb}},
  1163. {APANDN, ymm, Py1, opBytes{0xdf, Pe, 0xdf}},
  1164. {APAUSE, ynone, Px, opBytes{0xf3, 0x90}},
  1165. {APAVGB, ymm, Py1, opBytes{0xe0, Pe, 0xe0}},
  1166. {APAVGW, ymm, Py1, opBytes{0xe3, Pe, 0xe3}},
  1167. {APBLENDW, yxshuf, Pq, opBytes{0x3a, 0x0e, 0}},
  1168. {APCMPEQB, ymm, Py1, opBytes{0x74, Pe, 0x74}},
  1169. {APCMPEQL, ymm, Py1, opBytes{0x76, Pe, 0x76}},
  1170. {APCMPEQQ, yxm_q4, Pq4, opBytes{0x29}},
  1171. {APCMPEQW, ymm, Py1, opBytes{0x75, Pe, 0x75}},
  1172. {APCMPGTB, ymm, Py1, opBytes{0x64, Pe, 0x64}},
  1173. {APCMPGTL, ymm, Py1, opBytes{0x66, Pe, 0x66}},
  1174. {APCMPGTQ, yxm_q4, Pq4, opBytes{0x37}},
  1175. {APCMPGTW, ymm, Py1, opBytes{0x65, Pe, 0x65}},
  1176. {APCMPISTRI, yxshuf, Pq, opBytes{0x3a, 0x63, 0}},
  1177. {APCMPISTRM, yxshuf, Pq, opBytes{0x3a, 0x62, 0}},
  1178. {APEXTRW, yextrw, Pq, opBytes{0xc5, 0, 0x3a, 0x15, 0}},
  1179. {APEXTRB, yextr, Pq, opBytes{0x3a, 0x14, 00}},
  1180. {APEXTRD, yextr, Pq, opBytes{0x3a, 0x16, 00}},
  1181. {APEXTRQ, yextr, Pq3, opBytes{0x3a, 0x16, 00}},
  1182. {APHADDD, ymmxmm0f38, Px, opBytes{0x0F, 0x38, 0x02, 0, 0x66, 0x0F, 0x38, 0x02, 0}},
  1183. {APHADDSW, yxm_q4, Pq4, opBytes{0x03}},
  1184. {APHADDW, yxm_q4, Pq4, opBytes{0x01}},
  1185. {APHMINPOSUW, yxm_q4, Pq4, opBytes{0x41}},
  1186. {APHSUBD, yxm_q4, Pq4, opBytes{0x06}},
  1187. {APHSUBSW, yxm_q4, Pq4, opBytes{0x07}},
  1188. {APHSUBW, yxm_q4, Pq4, opBytes{0x05}},
  1189. {APINSRW, yinsrw, Pq, opBytes{0xc4, 00}},
  1190. {APINSRB, yinsr, Pq, opBytes{0x3a, 0x20, 00}},
  1191. {APINSRD, yinsr, Pq, opBytes{0x3a, 0x22, 00}},
  1192. {APINSRQ, yinsr, Pq3, opBytes{0x3a, 0x22, 00}},
  1193. {APMADDUBSW, yxm_q4, Pq4, opBytes{0x04}},
  1194. {APMADDWL, ymm, Py1, opBytes{0xf5, Pe, 0xf5}},
  1195. {APMAXSB, yxm_q4, Pq4, opBytes{0x3c}},
  1196. {APMAXSD, yxm_q4, Pq4, opBytes{0x3d}},
  1197. {APMAXSW, yxm, Pe, opBytes{0xee}},
  1198. {APMAXUB, yxm, Pe, opBytes{0xde}},
  1199. {APMAXUD, yxm_q4, Pq4, opBytes{0x3f}},
  1200. {APMAXUW, yxm_q4, Pq4, opBytes{0x3e}},
  1201. {APMINSB, yxm_q4, Pq4, opBytes{0x38}},
  1202. {APMINSD, yxm_q4, Pq4, opBytes{0x39}},
  1203. {APMINSW, yxm, Pe, opBytes{0xea}},
  1204. {APMINUB, yxm, Pe, opBytes{0xda}},
  1205. {APMINUD, yxm_q4, Pq4, opBytes{0x3b}},
  1206. {APMINUW, yxm_q4, Pq4, opBytes{0x3a}},
  1207. {APMOVMSKB, ymskb, Px, opBytes{Pe, 0xd7, 0xd7}},
  1208. {APMOVSXBD, yxm_q4, Pq4, opBytes{0x21}},
  1209. {APMOVSXBQ, yxm_q4, Pq4, opBytes{0x22}},
  1210. {APMOVSXBW, yxm_q4, Pq4, opBytes{0x20}},
  1211. {APMOVSXDQ, yxm_q4, Pq4, opBytes{0x25}},
  1212. {APMOVSXWD, yxm_q4, Pq4, opBytes{0x23}},
  1213. {APMOVSXWQ, yxm_q4, Pq4, opBytes{0x24}},
  1214. {APMOVZXBD, yxm_q4, Pq4, opBytes{0x31}},
  1215. {APMOVZXBQ, yxm_q4, Pq4, opBytes{0x32}},
  1216. {APMOVZXBW, yxm_q4, Pq4, opBytes{0x30}},
  1217. {APMOVZXDQ, yxm_q4, Pq4, opBytes{0x35}},
  1218. {APMOVZXWD, yxm_q4, Pq4, opBytes{0x33}},
  1219. {APMOVZXWQ, yxm_q4, Pq4, opBytes{0x34}},
  1220. {APMULDQ, yxm_q4, Pq4, opBytes{0x28}},
  1221. {APMULHRSW, yxm_q4, Pq4, opBytes{0x0b}},
  1222. {APMULHUW, ymm, Py1, opBytes{0xe4, Pe, 0xe4}},
  1223. {APMULHW, ymm, Py1, opBytes{0xe5, Pe, 0xe5}},
  1224. {APMULLD, yxm_q4, Pq4, opBytes{0x40}},
  1225. {APMULLW, ymm, Py1, opBytes{0xd5, Pe, 0xd5}},
  1226. {APMULULQ, ymm, Py1, opBytes{0xf4, Pe, 0xf4}},
  1227. {APOPAL, ynone, P32, opBytes{0x61}},
  1228. {APOPAW, ynone, Pe, opBytes{0x61}},
  1229. {APOPCNTW, yml_rl, Pef3, opBytes{0xb8}},
  1230. {APOPCNTL, yml_rl, Pf3, opBytes{0xb8}},
  1231. {APOPCNTQ, yml_rl, Pfw, opBytes{0xb8}},
  1232. {APOPFL, ynone, P32, opBytes{0x9d}},
  1233. {APOPFQ, ynone, Py, opBytes{0x9d}},
  1234. {APOPFW, ynone, Pe, opBytes{0x9d}},
  1235. {APOPL, ypopl, P32, opBytes{0x58, 0x8f, 00}},
  1236. {APOPQ, ypopl, Py, opBytes{0x58, 0x8f, 00}},
  1237. {APOPW, ypopl, Pe, opBytes{0x58, 0x8f, 00}},
  1238. {APOR, ymm, Py1, opBytes{0xeb, Pe, 0xeb}},
  1239. {APSADBW, yxm, Pq, opBytes{0xf6}},
  1240. {APSHUFHW, yxshuf, Pf3, opBytes{0x70, 00}},
  1241. {APSHUFL, yxshuf, Pq, opBytes{0x70, 00}},
  1242. {APSHUFLW, yxshuf, Pf2, opBytes{0x70, 00}},
  1243. {APSHUFW, ymshuf, Pm, opBytes{0x70, 00}},
  1244. {APSHUFB, ymshufb, Pq, opBytes{0x38, 0x00}},
  1245. {APSIGNB, yxm_q4, Pq4, opBytes{0x08}},
  1246. {APSIGND, yxm_q4, Pq4, opBytes{0x0a}},
  1247. {APSIGNW, yxm_q4, Pq4, opBytes{0x09}},
  1248. {APSLLO, ypsdq, Pq, opBytes{0x73, 07}},
  1249. {APSLLL, yps, Py3, opBytes{0xf2, 0x72, 06, Pe, 0xf2, Pe, 0x72, 06}},
  1250. {APSLLQ, yps, Py3, opBytes{0xf3, 0x73, 06, Pe, 0xf3, Pe, 0x73, 06}},
  1251. {APSLLW, yps, Py3, opBytes{0xf1, 0x71, 06, Pe, 0xf1, Pe, 0x71, 06}},
  1252. {APSRAL, yps, Py3, opBytes{0xe2, 0x72, 04, Pe, 0xe2, Pe, 0x72, 04}},
  1253. {APSRAW, yps, Py3, opBytes{0xe1, 0x71, 04, Pe, 0xe1, Pe, 0x71, 04}},
  1254. {APSRLO, ypsdq, Pq, opBytes{0x73, 03}},
  1255. {APSRLL, yps, Py3, opBytes{0xd2, 0x72, 02, Pe, 0xd2, Pe, 0x72, 02}},
  1256. {APSRLQ, yps, Py3, opBytes{0xd3, 0x73, 02, Pe, 0xd3, Pe, 0x73, 02}},
  1257. {APSRLW, yps, Py3, opBytes{0xd1, 0x71, 02, Pe, 0xd1, Pe, 0x71, 02}},
  1258. {APSUBB, yxm, Pe, opBytes{0xf8}},
  1259. {APSUBL, yxm, Pe, opBytes{0xfa}},
  1260. {APSUBQ, yxm, Pe, opBytes{0xfb}},
  1261. {APSUBSB, yxm, Pe, opBytes{0xe8}},
  1262. {APSUBSW, yxm, Pe, opBytes{0xe9}},
  1263. {APSUBUSB, yxm, Pe, opBytes{0xd8}},
  1264. {APSUBUSW, yxm, Pe, opBytes{0xd9}},
  1265. {APSUBW, yxm, Pe, opBytes{0xf9}},
  1266. {APTEST, yxm_q4, Pq4, opBytes{0x17}},
  1267. {APUNPCKHBW, ymm, Py1, opBytes{0x68, Pe, 0x68}},
  1268. {APUNPCKHLQ, ymm, Py1, opBytes{0x6a, Pe, 0x6a}},
  1269. {APUNPCKHQDQ, yxm, Pe, opBytes{0x6d}},
  1270. {APUNPCKHWL, ymm, Py1, opBytes{0x69, Pe, 0x69}},
  1271. {APUNPCKLBW, ymm, Py1, opBytes{0x60, Pe, 0x60}},
  1272. {APUNPCKLLQ, ymm, Py1, opBytes{0x62, Pe, 0x62}},
  1273. {APUNPCKLQDQ, yxm, Pe, opBytes{0x6c}},
  1274. {APUNPCKLWL, ymm, Py1, opBytes{0x61, Pe, 0x61}},
  1275. {APUSHAL, ynone, P32, opBytes{0x60}},
  1276. {APUSHAW, ynone, Pe, opBytes{0x60}},
  1277. {APUSHFL, ynone, P32, opBytes{0x9c}},
  1278. {APUSHFQ, ynone, Py, opBytes{0x9c}},
  1279. {APUSHFW, ynone, Pe, opBytes{0x9c}},
  1280. {APUSHL, ypushl, P32, opBytes{0x50, 0xff, 06, 0x6a, 0x68}},
  1281. {APUSHQ, ypushl, Py, opBytes{0x50, 0xff, 06, 0x6a, 0x68}},
  1282. {APUSHW, ypushl, Pe, opBytes{0x50, 0xff, 06, 0x6a, 0x68}},
  1283. {APXOR, ymm, Py1, opBytes{0xef, Pe, 0xef}},
  1284. {AQUAD, ybyte, Px, opBytes{8}},
  1285. {ARCLB, yshb, Pb, opBytes{0xd0, 02, 0xc0, 02, 0xd2, 02}},
  1286. {ARCLL, yshl, Px, opBytes{0xd1, 02, 0xc1, 02, 0xd3, 02, 0xd3, 02}},
  1287. {ARCLQ, yshl, Pw, opBytes{0xd1, 02, 0xc1, 02, 0xd3, 02, 0xd3, 02}},
  1288. {ARCLW, yshl, Pe, opBytes{0xd1, 02, 0xc1, 02, 0xd3, 02, 0xd3, 02}},
  1289. {ARCPPS, yxm, Pm, opBytes{0x53}},
  1290. {ARCPSS, yxm, Pf3, opBytes{0x53}},
  1291. {ARCRB, yshb, Pb, opBytes{0xd0, 03, 0xc0, 03, 0xd2, 03}},
  1292. {ARCRL, yshl, Px, opBytes{0xd1, 03, 0xc1, 03, 0xd3, 03, 0xd3, 03}},
  1293. {ARCRQ, yshl, Pw, opBytes{0xd1, 03, 0xc1, 03, 0xd3, 03, 0xd3, 03}},
  1294. {ARCRW, yshl, Pe, opBytes{0xd1, 03, 0xc1, 03, 0xd3, 03, 0xd3, 03}},
  1295. {AREP, ynone, Px, opBytes{0xf3}},
  1296. {AREPN, ynone, Px, opBytes{0xf2}},
  1297. {obj.ARET, ynone, Px, opBytes{0xc3}},
  1298. {ARETFW, yret, Pe, opBytes{0xcb, 0xca}},
  1299. {ARETFL, yret, Px, opBytes{0xcb, 0xca}},
  1300. {ARETFQ, yret, Pw, opBytes{0xcb, 0xca}},
  1301. {AROLB, yshb, Pb, opBytes{0xd0, 00, 0xc0, 00, 0xd2, 00}},
  1302. {AROLL, yshl, Px, opBytes{0xd1, 00, 0xc1, 00, 0xd3, 00, 0xd3, 00}},
  1303. {AROLQ, yshl, Pw, opBytes{0xd1, 00, 0xc1, 00, 0xd3, 00, 0xd3, 00}},
  1304. {AROLW, yshl, Pe, opBytes{0xd1, 00, 0xc1, 00, 0xd3, 00, 0xd3, 00}},
  1305. {ARORB, yshb, Pb, opBytes{0xd0, 01, 0xc0, 01, 0xd2, 01}},
  1306. {ARORL, yshl, Px, opBytes{0xd1, 01, 0xc1, 01, 0xd3, 01, 0xd3, 01}},
  1307. {ARORQ, yshl, Pw, opBytes{0xd1, 01, 0xc1, 01, 0xd3, 01, 0xd3, 01}},
  1308. {ARORW, yshl, Pe, opBytes{0xd1, 01, 0xc1, 01, 0xd3, 01, 0xd3, 01}},
  1309. {ARSQRTPS, yxm, Pm, opBytes{0x52}},
  1310. {ARSQRTSS, yxm, Pf3, opBytes{0x52}},
  1311. {ASAHF, ynone, Px, opBytes{0x9e, 00, 0x86, 0xe0, 0x50, 0x9d}}, // XCHGB AH,AL; PUSH AX; POPFL
  1312. {ASALB, yshb, Pb, opBytes{0xd0, 04, 0xc0, 04, 0xd2, 04}},
  1313. {ASALL, yshl, Px, opBytes{0xd1, 04, 0xc1, 04, 0xd3, 04, 0xd3, 04}},
  1314. {ASALQ, yshl, Pw, opBytes{0xd1, 04, 0xc1, 04, 0xd3, 04, 0xd3, 04}},
  1315. {ASALW, yshl, Pe, opBytes{0xd1, 04, 0xc1, 04, 0xd3, 04, 0xd3, 04}},
  1316. {ASARB, yshb, Pb, opBytes{0xd0, 07, 0xc0, 07, 0xd2, 07}},
  1317. {ASARL, yshl, Px, opBytes{0xd1, 07, 0xc1, 07, 0xd3, 07, 0xd3, 07}},
  1318. {ASARQ, yshl, Pw, opBytes{0xd1, 07, 0xc1, 07, 0xd3, 07, 0xd3, 07}},
  1319. {ASARW, yshl, Pe, opBytes{0xd1, 07, 0xc1, 07, 0xd3, 07, 0xd3, 07}},
  1320. {ASBBB, yxorb, Pb, opBytes{0x1c, 0x80, 03, 0x18, 0x1a}},
  1321. {ASBBL, yaddl, Px, opBytes{0x83, 03, 0x1d, 0x81, 03, 0x19, 0x1b}},
  1322. {ASBBQ, yaddl, Pw, opBytes{0x83, 03, 0x1d, 0x81, 03, 0x19, 0x1b}},
  1323. {ASBBW, yaddl, Pe, opBytes{0x83, 03, 0x1d, 0x81, 03, 0x19, 0x1b}},
  1324. {ASCASB, ynone, Pb, opBytes{0xae}},
  1325. {ASCASL, ynone, Px, opBytes{0xaf}},
  1326. {ASCASQ, ynone, Pw, opBytes{0xaf}},
  1327. {ASCASW, ynone, Pe, opBytes{0xaf}},
  1328. {ASETCC, yscond, Pb, opBytes{0x0f, 0x93, 00}},
  1329. {ASETCS, yscond, Pb, opBytes{0x0f, 0x92, 00}},
  1330. {ASETEQ, yscond, Pb, opBytes{0x0f, 0x94, 00}},
  1331. {ASETGE, yscond, Pb, opBytes{0x0f, 0x9d, 00}},
  1332. {ASETGT, yscond, Pb, opBytes{0x0f, 0x9f, 00}},
  1333. {ASETHI, yscond, Pb, opBytes{0x0f, 0x97, 00}},
  1334. {ASETLE, yscond, Pb, opBytes{0x0f, 0x9e, 00}},
  1335. {ASETLS, yscond, Pb, opBytes{0x0f, 0x96, 00}},
  1336. {ASETLT, yscond, Pb, opBytes{0x0f, 0x9c, 00}},
  1337. {ASETMI, yscond, Pb, opBytes{0x0f, 0x98, 00}},
  1338. {ASETNE, yscond, Pb, opBytes{0x0f, 0x95, 00}},
  1339. {ASETOC, yscond, Pb, opBytes{0x0f, 0x91, 00}},
  1340. {ASETOS, yscond, Pb, opBytes{0x0f, 0x90, 00}},
  1341. {ASETPC, yscond, Pb, opBytes{0x0f, 0x9b, 00}},
  1342. {ASETPL, yscond, Pb, opBytes{0x0f, 0x99, 00}},
  1343. {ASETPS, yscond, Pb, opBytes{0x0f, 0x9a, 00}},
  1344. {ASHLB, yshb, Pb, opBytes{0xd0, 04, 0xc0, 04, 0xd2, 04}},
  1345. {ASHLL, yshl, Px, opBytes{0xd1, 04, 0xc1, 04, 0xd3, 04, 0xd3, 04}},
  1346. {ASHLQ, yshl, Pw, opBytes{0xd1, 04, 0xc1, 04, 0xd3, 04, 0xd3, 04}},
  1347. {ASHLW, yshl, Pe, opBytes{0xd1, 04, 0xc1, 04, 0xd3, 04, 0xd3, 04}},
  1348. {ASHRB, yshb, Pb, opBytes{0xd0, 05, 0xc0, 05, 0xd2, 05}},
  1349. {ASHRL, yshl, Px, opBytes{0xd1, 05, 0xc1, 05, 0xd3, 05, 0xd3, 05}},
  1350. {ASHRQ, yshl, Pw, opBytes{0xd1, 05, 0xc1, 05, 0xd3, 05, 0xd3, 05}},
  1351. {ASHRW, yshl, Pe, opBytes{0xd1, 05, 0xc1, 05, 0xd3, 05, 0xd3, 05}},
  1352. {ASHUFPD, yxshuf, Pq, opBytes{0xc6, 00}},
  1353. {ASHUFPS, yxshuf, Pm, opBytes{0xc6, 00}},
  1354. {ASQRTPD, yxm, Pe, opBytes{0x51}},
  1355. {ASQRTPS, yxm, Pm, opBytes{0x51}},
  1356. {ASQRTSD, yxm, Pf2, opBytes{0x51}},
  1357. {ASQRTSS, yxm, Pf3, opBytes{0x51}},
  1358. {ASTC, ynone, Px, opBytes{0xf9}},
  1359. {ASTD, ynone, Px, opBytes{0xfd}},
  1360. {ASTI, ynone, Px, opBytes{0xfb}},
  1361. {ASTMXCSR, ysvrs_om, Pm, opBytes{0xae, 03, 0xae, 03}},
  1362. {ASTOSB, ynone, Pb, opBytes{0xaa}},
  1363. {ASTOSL, ynone, Px, opBytes{0xab}},
  1364. {ASTOSQ, ynone, Pw, opBytes{0xab}},
  1365. {ASTOSW, ynone, Pe, opBytes{0xab}},
  1366. {ASUBB, yxorb, Pb, opBytes{0x2c, 0x80, 05, 0x28, 0x2a}},
  1367. {ASUBL, yaddl, Px, opBytes{0x83, 05, 0x2d, 0x81, 05, 0x29, 0x2b}},
  1368. {ASUBPD, yxm, Pe, opBytes{0x5c}},
  1369. {ASUBPS, yxm, Pm, opBytes{0x5c}},
  1370. {ASUBQ, yaddl, Pw, opBytes{0x83, 05, 0x2d, 0x81, 05, 0x29, 0x2b}},
  1371. {ASUBSD, yxm, Pf2, opBytes{0x5c}},
  1372. {ASUBSS, yxm, Pf3, opBytes{0x5c}},
  1373. {ASUBW, yaddl, Pe, opBytes{0x83, 05, 0x2d, 0x81, 05, 0x29, 0x2b}},
  1374. {ASWAPGS, ynone, Pm, opBytes{0x01, 0xf8}},
  1375. {ASYSCALL, ynone, Px, opBytes{0x0f, 0x05}}, // fast syscall
  1376. {ATESTB, yxorb, Pb, opBytes{0xa8, 0xf6, 00, 0x84, 0x84}},
  1377. {ATESTL, ytestl, Px, opBytes{0xa9, 0xf7, 00, 0x85, 0x85}},
  1378. {ATESTQ, ytestl, Pw, opBytes{0xa9, 0xf7, 00, 0x85, 0x85}},
  1379. {ATESTW, ytestl, Pe, opBytes{0xa9, 0xf7, 00, 0x85, 0x85}},
  1380. {ATPAUSE, ywrfsbase, Pq, opBytes{0xae, 06}},
  1381. {obj.ATEXT, ytext, Px, opBytes{}},
  1382. {AUCOMISD, yxm, Pe, opBytes{0x2e}},
  1383. {AUCOMISS, yxm, Pm, opBytes{0x2e}},
  1384. {AUNPCKHPD, yxm, Pe, opBytes{0x15}},
  1385. {AUNPCKHPS, yxm, Pm, opBytes{0x15}},
  1386. {AUNPCKLPD, yxm, Pe, opBytes{0x14}},
  1387. {AUNPCKLPS, yxm, Pm, opBytes{0x14}},
  1388. {AUMONITOR, ywrfsbase, Pf3, opBytes{0xae, 06}},
  1389. {AVERR, ydivl, Pm, opBytes{0x00, 04}},
  1390. {AVERW, ydivl, Pm, opBytes{0x00, 05}},
  1391. {AWAIT, ynone, Px, opBytes{0x9b}},
  1392. {AWORD, ybyte, Px, opBytes{2}},
  1393. {AXCHGB, yml_mb, Pb, opBytes{0x86, 0x86}},
  1394. {AXCHGL, yxchg, Px, opBytes{0x90, 0x90, 0x87, 0x87}},
  1395. {AXCHGQ, yxchg, Pw, opBytes{0x90, 0x90, 0x87, 0x87}},
  1396. {AXCHGW, yxchg, Pe, opBytes{0x90, 0x90, 0x87, 0x87}},
  1397. {AXLAT, ynone, Px, opBytes{0xd7}},
  1398. {AXORB, yxorb, Pb, opBytes{0x34, 0x80, 06, 0x30, 0x32}},
  1399. {AXORL, yaddl, Px, opBytes{0x83, 06, 0x35, 0x81, 06, 0x31, 0x33}},
  1400. {AXORPD, yxm, Pe, opBytes{0x57}},
  1401. {AXORPS, yxm, Pm, opBytes{0x57}},
  1402. {AXORQ, yaddl, Pw, opBytes{0x83, 06, 0x35, 0x81, 06, 0x31, 0x33}},
  1403. {AXORW, yaddl, Pe, opBytes{0x83, 06, 0x35, 0x81, 06, 0x31, 0x33}},
  1404. {AFMOVB, yfmvx, Px, opBytes{0xdf, 04}},
  1405. {AFMOVBP, yfmvp, Px, opBytes{0xdf, 06}},
  1406. {AFMOVD, yfmvd, Px, opBytes{0xdd, 00, 0xdd, 02, 0xd9, 00, 0xdd, 02}},
  1407. {AFMOVDP, yfmvdp, Px, opBytes{0xdd, 03, 0xdd, 03}},
  1408. {AFMOVF, yfmvf, Px, opBytes{0xd9, 00, 0xd9, 02}},
  1409. {AFMOVFP, yfmvp, Px, opBytes{0xd9, 03}},
  1410. {AFMOVL, yfmvf, Px, opBytes{0xdb, 00, 0xdb, 02}},
  1411. {AFMOVLP, yfmvp, Px, opBytes{0xdb, 03}},
  1412. {AFMOVV, yfmvx, Px, opBytes{0xdf, 05}},
  1413. {AFMOVVP, yfmvp, Px, opBytes{0xdf, 07}},
  1414. {AFMOVW, yfmvf, Px, opBytes{0xdf, 00, 0xdf, 02}},
  1415. {AFMOVWP, yfmvp, Px, opBytes{0xdf, 03}},
  1416. {AFMOVX, yfmvx, Px, opBytes{0xdb, 05}},
  1417. {AFMOVXP, yfmvp, Px, opBytes{0xdb, 07}},
  1418. {AFCMOVCC, yfcmv, Px, opBytes{0xdb, 00}},
  1419. {AFCMOVCS, yfcmv, Px, opBytes{0xda, 00}},
  1420. {AFCMOVEQ, yfcmv, Px, opBytes{0xda, 01}},
  1421. {AFCMOVHI, yfcmv, Px, opBytes{0xdb, 02}},
  1422. {AFCMOVLS, yfcmv, Px, opBytes{0xda, 02}},
  1423. {AFCMOVB, yfcmv, Px, opBytes{0xda, 00}},
  1424. {AFCMOVBE, yfcmv, Px, opBytes{0xda, 02}},
  1425. {AFCMOVNB, yfcmv, Px, opBytes{0xdb, 00}},
  1426. {AFCMOVNBE, yfcmv, Px, opBytes{0xdb, 02}},
  1427. {AFCMOVE, yfcmv, Px, opBytes{0xda, 01}},
  1428. {AFCMOVNE, yfcmv, Px, opBytes{0xdb, 01}},
  1429. {AFCMOVNU, yfcmv, Px, opBytes{0xdb, 03}},
  1430. {AFCMOVU, yfcmv, Px, opBytes{0xda, 03}},
  1431. {AFCMOVUN, yfcmv, Px, opBytes{0xda, 03}},
  1432. {AFCOMD, yfadd, Px, opBytes{0xdc, 02, 0xd8, 02, 0xdc, 02}}, // botch
  1433. {AFCOMDP, yfadd, Px, opBytes{0xdc, 03, 0xd8, 03, 0xdc, 03}}, // botch
  1434. {AFCOMDPP, ycompp, Px, opBytes{0xde, 03}},
  1435. {AFCOMF, yfmvx, Px, opBytes{0xd8, 02}},
  1436. {AFCOMFP, yfmvx, Px, opBytes{0xd8, 03}},
  1437. {AFCOMI, yfcmv, Px, opBytes{0xdb, 06}},
  1438. {AFCOMIP, yfcmv, Px, opBytes{0xdf, 06}},
  1439. {AFCOML, yfmvx, Px, opBytes{0xda, 02}},
  1440. {AFCOMLP, yfmvx, Px, opBytes{0xda, 03}},
  1441. {AFCOMW, yfmvx, Px, opBytes{0xde, 02}},
  1442. {AFCOMWP, yfmvx, Px, opBytes{0xde, 03}},
  1443. {AFUCOM, ycompp, Px, opBytes{0xdd, 04}},
  1444. {AFUCOMI, ycompp, Px, opBytes{0xdb, 05}},
  1445. {AFUCOMIP, ycompp, Px, opBytes{0xdf, 05}},
  1446. {AFUCOMP, ycompp, Px, opBytes{0xdd, 05}},
  1447. {AFUCOMPP, ycompp, Px, opBytes{0xda, 13}},
  1448. {AFADDDP, ycompp, Px, opBytes{0xde, 00}},
  1449. {AFADDW, yfmvx, Px, opBytes{0xde, 00}},
  1450. {AFADDL, yfmvx, Px, opBytes{0xda, 00}},
  1451. {AFADDF, yfmvx, Px, opBytes{0xd8, 00}},
  1452. {AFADDD, yfadd, Px, opBytes{0xdc, 00, 0xd8, 00, 0xdc, 00}},
  1453. {AFMULDP, ycompp, Px, opBytes{0xde, 01}},
  1454. {AFMULW, yfmvx, Px, opBytes{0xde, 01}},
  1455. {AFMULL, yfmvx, Px, opBytes{0xda, 01}},
  1456. {AFMULF, yfmvx, Px, opBytes{0xd8, 01}},
  1457. {AFMULD, yfadd, Px, opBytes{0xdc, 01, 0xd8, 01, 0xdc, 01}},
  1458. {AFSUBDP, ycompp, Px, opBytes{0xde, 05}},
  1459. {AFSUBW, yfmvx, Px, opBytes{0xde, 04}},
  1460. {AFSUBL, yfmvx, Px, opBytes{0xda, 04}},
  1461. {AFSUBF, yfmvx, Px, opBytes{0xd8, 04}},
  1462. {AFSUBD, yfadd, Px, opBytes{0xdc, 04, 0xd8, 04, 0xdc, 05}},
  1463. {AFSUBRDP, ycompp, Px, opBytes{0xde, 04}},
  1464. {AFSUBRW, yfmvx, Px, opBytes{0xde, 05}},
  1465. {AFSUBRL, yfmvx, Px, opBytes{0xda, 05}},
  1466. {AFSUBRF, yfmvx, Px, opBytes{0xd8, 05}},
  1467. {AFSUBRD, yfadd, Px, opBytes{0xdc, 05, 0xd8, 05, 0xdc, 04}},
  1468. {AFDIVDP, ycompp, Px, opBytes{0xde, 07}},
  1469. {AFDIVW, yfmvx, Px, opBytes{0xde, 06}},
  1470. {AFDIVL, yfmvx, Px, opBytes{0xda, 06}},
  1471. {AFDIVF, yfmvx, Px, opBytes{0xd8, 06}},
  1472. {AFDIVD, yfadd, Px, opBytes{0xdc, 06, 0xd8, 06, 0xdc, 07}},
  1473. {AFDIVRDP, ycompp, Px, opBytes{0xde, 06}},
  1474. {AFDIVRW, yfmvx, Px, opBytes{0xde, 07}},
  1475. {AFDIVRL, yfmvx, Px, opBytes{0xda, 07}},
  1476. {AFDIVRF, yfmvx, Px, opBytes{0xd8, 07}},
  1477. {AFDIVRD, yfadd, Px, opBytes{0xdc, 07, 0xd8, 07, 0xdc, 06}},
  1478. {AFXCHD, yfxch, Px, opBytes{0xd9, 01, 0xd9, 01}},
  1479. {AFFREE, nil, 0, opBytes{}},
  1480. {AFLDCW, ysvrs_mo, Px, opBytes{0xd9, 05, 0xd9, 05}},
  1481. {AFLDENV, ysvrs_mo, Px, opBytes{0xd9, 04, 0xd9, 04}},
  1482. {AFRSTOR, ysvrs_mo, Px, opBytes{0xdd, 04, 0xdd, 04}},
  1483. {AFSAVE, ysvrs_om, Px, opBytes{0xdd, 06, 0xdd, 06}},
  1484. {AFSTCW, ysvrs_om, Px, opBytes{0xd9, 07, 0xd9, 07}},
  1485. {AFSTENV, ysvrs_om, Px, opBytes{0xd9, 06, 0xd9, 06}},
  1486. {AFSTSW, ystsw, Px, opBytes{0xdd, 07, 0xdf, 0xe0}},
  1487. {AF2XM1, ynone, Px, opBytes{0xd9, 0xf0}},
  1488. {AFABS, ynone, Px, opBytes{0xd9, 0xe1}},
  1489. {AFBLD, ysvrs_mo, Px, opBytes{0xdf, 04}},
  1490. {AFBSTP, yclflush, Px, opBytes{0xdf, 06}},
  1491. {AFCHS, ynone, Px, opBytes{0xd9, 0xe0}},
  1492. {AFCLEX, ynone, Px, opBytes{0xdb, 0xe2}},
  1493. {AFCOS, ynone, Px, opBytes{0xd9, 0xff}},
  1494. {AFDECSTP, ynone, Px, opBytes{0xd9, 0xf6}},
  1495. {AFINCSTP, ynone, Px, opBytes{0xd9, 0xf7}},
  1496. {AFINIT, ynone, Px, opBytes{0xdb, 0xe3}},
  1497. {AFLD1, ynone, Px, opBytes{0xd9, 0xe8}},
  1498. {AFLDL2E, ynone, Px, opBytes{0xd9, 0xea}},
  1499. {AFLDL2T, ynone, Px, opBytes{0xd9, 0xe9}},
  1500. {AFLDLG2, ynone, Px, opBytes{0xd9, 0xec}},
  1501. {AFLDLN2, ynone, Px, opBytes{0xd9, 0xed}},
  1502. {AFLDPI, ynone, Px, opBytes{0xd9, 0xeb}},
  1503. {AFLDZ, ynone, Px, opBytes{0xd9, 0xee}},
  1504. {AFNOP, ynone, Px, opBytes{0xd9, 0xd0}},
  1505. {AFPATAN, ynone, Px, opBytes{0xd9, 0xf3}},
  1506. {AFPREM, ynone, Px, opBytes{0xd9, 0xf8}},
  1507. {AFPREM1, ynone, Px, opBytes{0xd9, 0xf5}},
  1508. {AFPTAN, ynone, Px, opBytes{0xd9, 0xf2}},
  1509. {AFRNDINT, ynone, Px, opBytes{0xd9, 0xfc}},
  1510. {AFSCALE, ynone, Px, opBytes{0xd9, 0xfd}},
  1511. {AFSIN, ynone, Px, opBytes{0xd9, 0xfe}},
  1512. {AFSINCOS, ynone, Px, opBytes{0xd9, 0xfb}},
  1513. {AFSQRT, ynone, Px, opBytes{0xd9, 0xfa}},
  1514. {AFTST, ynone, Px, opBytes{0xd9, 0xe4}},
  1515. {AFXAM, ynone, Px, opBytes{0xd9, 0xe5}},
  1516. {AFXTRACT, ynone, Px, opBytes{0xd9, 0xf4}},
  1517. {AFYL2X, ynone, Px, opBytes{0xd9, 0xf1}},
  1518. {AFYL2XP1, ynone, Px, opBytes{0xd9, 0xf9}},
  1519. {ACMPXCHGB, yrb_mb, Pb, opBytes{0x0f, 0xb0}},
  1520. {ACMPXCHGL, yrl_ml, Px, opBytes{0x0f, 0xb1}},
  1521. {ACMPXCHGW, yrl_ml, Pe, opBytes{0x0f, 0xb1}},
  1522. {ACMPXCHGQ, yrl_ml, Pw, opBytes{0x0f, 0xb1}},
  1523. {ACMPXCHG8B, yscond, Pm, opBytes{0xc7, 01}},
  1524. {ACMPXCHG16B, yscond, Pw, opBytes{0x0f, 0xc7, 01}},
  1525. {AINVD, ynone, Pm, opBytes{0x08}},
  1526. {AINVLPG, ydivb, Pm, opBytes{0x01, 07}},
  1527. {AINVPCID, ycrc32l, Pe, opBytes{0x0f, 0x38, 0x82, 0}},
  1528. {ALFENCE, ynone, Pm, opBytes{0xae, 0xe8}},
  1529. {AMFENCE, ynone, Pm, opBytes{0xae, 0xf0}},
  1530. {AMOVNTIL, yrl_ml, Pm, opBytes{0xc3}},
  1531. {AMOVNTIQ, yrl_ml, Pw, opBytes{0x0f, 0xc3}},
  1532. {ARDPKRU, ynone, Pm, opBytes{0x01, 0xee, 0}},
  1533. {ARDMSR, ynone, Pm, opBytes{0x32}},
  1534. {ARDPMC, ynone, Pm, opBytes{0x33}},
  1535. {ARDTSC, ynone, Pm, opBytes{0x31}},
  1536. {ARSM, ynone, Pm, opBytes{0xaa}},
  1537. {ASFENCE, ynone, Pm, opBytes{0xae, 0xf8}},
  1538. {ASYSRET, ynone, Pm, opBytes{0x07}},
  1539. {AWBINVD, ynone, Pm, opBytes{0x09}},
  1540. {AWRMSR, ynone, Pm, opBytes{0x30}},
  1541. {AWRPKRU, ynone, Pm, opBytes{0x01, 0xef, 0}},
  1542. {AXADDB, yrb_mb, Pb, opBytes{0x0f, 0xc0}},
  1543. {AXADDL, yrl_ml, Px, opBytes{0x0f, 0xc1}},
  1544. {AXADDQ, yrl_ml, Pw, opBytes{0x0f, 0xc1}},
  1545. {AXADDW, yrl_ml, Pe, opBytes{0x0f, 0xc1}},
  1546. {ACRC32B, ycrc32b, Px, opBytes{0xf2, 0x0f, 0x38, 0xf0, 0}},
  1547. {ACRC32L, ycrc32l, Px, opBytes{0xf2, 0x0f, 0x38, 0xf1, 0}},
  1548. {ACRC32Q, ycrc32l, Pw, opBytes{0xf2, 0x0f, 0x38, 0xf1, 0}},
  1549. {ACRC32W, ycrc32l, Pe, opBytes{0xf2, 0x0f, 0x38, 0xf1, 0}},
  1550. {APREFETCHT0, yprefetch, Pm, opBytes{0x18, 01}},
  1551. {APREFETCHT1, yprefetch, Pm, opBytes{0x18, 02}},
  1552. {APREFETCHT2, yprefetch, Pm, opBytes{0x18, 03}},
  1553. {APREFETCHNTA, yprefetch, Pm, opBytes{0x18, 00}},
  1554. {AMOVQL, yrl_ml, Px, opBytes{0x89}},
  1555. {obj.AUNDEF, ynone, Px, opBytes{0x0f, 0x0b}},
  1556. {AAESENC, yaes, Pq, opBytes{0x38, 0xdc, 0}},
  1557. {AAESENCLAST, yaes, Pq, opBytes{0x38, 0xdd, 0}},
  1558. {AAESDEC, yaes, Pq, opBytes{0x38, 0xde, 0}},
  1559. {AAESDECLAST, yaes, Pq, opBytes{0x38, 0xdf, 0}},
  1560. {AAESIMC, yaes, Pq, opBytes{0x38, 0xdb, 0}},
  1561. {AAESKEYGENASSIST, yxshuf, Pq, opBytes{0x3a, 0xdf, 0}},
  1562. {AROUNDPD, yxshuf, Pq, opBytes{0x3a, 0x09, 0}},
  1563. {AROUNDPS, yxshuf, Pq, opBytes{0x3a, 0x08, 0}},
  1564. {AROUNDSD, yxshuf, Pq, opBytes{0x3a, 0x0b, 0}},
  1565. {AROUNDSS, yxshuf, Pq, opBytes{0x3a, 0x0a, 0}},
  1566. {APSHUFD, yxshuf, Pq, opBytes{0x70, 0}},
  1567. {APCLMULQDQ, yxshuf, Pq, opBytes{0x3a, 0x44, 0}},
  1568. {APCMPESTRI, yxshuf, Pq, opBytes{0x3a, 0x61, 0}},
  1569. {APCMPESTRM, yxshuf, Pq, opBytes{0x3a, 0x60, 0}},
  1570. {AMOVDDUP, yxm, Pf2, opBytes{0x12}},
  1571. {AMOVSHDUP, yxm, Pf3, opBytes{0x16}},
  1572. {AMOVSLDUP, yxm, Pf3, opBytes{0x12}},
  1573. {ARDTSCP, ynone, Pm, opBytes{0x01, 0xf9, 0}},
  1574. {ASTAC, ynone, Pm, opBytes{0x01, 0xcb, 0}},
  1575. {AUD1, ynone, Pm, opBytes{0xb9, 0}},
  1576. {AUD2, ynone, Pm, opBytes{0x0b, 0}},
  1577. {AUMWAIT, ywrfsbase, Pf2, opBytes{0xae, 06}},
  1578. {ASYSENTER, ynone, Px, opBytes{0x0f, 0x34, 0}},
  1579. {ASYSENTER64, ynone, Pw, opBytes{0x0f, 0x34, 0}},
  1580. {ASYSEXIT, ynone, Px, opBytes{0x0f, 0x35, 0}},
  1581. {ASYSEXIT64, ynone, Pw, opBytes{0x0f, 0x35, 0}},
  1582. {ALMSW, ydivl, Pm, opBytes{0x01, 06}},
  1583. {ALLDT, ydivl, Pm, opBytes{0x00, 02}},
  1584. {ALIDT, ysvrs_mo, Pm, opBytes{0x01, 03}},
  1585. {ALGDT, ysvrs_mo, Pm, opBytes{0x01, 02}},
  1586. {ATZCNTW, ycrc32l, Pe, opBytes{0xf3, 0x0f, 0xbc, 0}},
  1587. {ATZCNTL, ycrc32l, Px, opBytes{0xf3, 0x0f, 0xbc, 0}},
  1588. {ATZCNTQ, ycrc32l, Pw, opBytes{0xf3, 0x0f, 0xbc, 0}},
  1589. {AXRSTOR, ydivl, Px, opBytes{0x0f, 0xae, 05}},
  1590. {AXRSTOR64, ydivl, Pw, opBytes{0x0f, 0xae, 05}},
  1591. {AXRSTORS, ydivl, Px, opBytes{0x0f, 0xc7, 03}},
  1592. {AXRSTORS64, ydivl, Pw, opBytes{0x0f, 0xc7, 03}},
  1593. {AXSAVE, yclflush, Px, opBytes{0x0f, 0xae, 04}},
  1594. {AXSAVE64, yclflush, Pw, opBytes{0x0f, 0xae, 04}},
  1595. {AXSAVEOPT, yclflush, Px, opBytes{0x0f, 0xae, 06}},
  1596. {AXSAVEOPT64, yclflush, Pw, opBytes{0x0f, 0xae, 06}},
  1597. {AXSAVEC, yclflush, Px, opBytes{0x0f, 0xc7, 04}},
  1598. {AXSAVEC64, yclflush, Pw, opBytes{0x0f, 0xc7, 04}},
  1599. {AXSAVES, yclflush, Px, opBytes{0x0f, 0xc7, 05}},
  1600. {AXSAVES64, yclflush, Pw, opBytes{0x0f, 0xc7, 05}},
  1601. {ASGDT, yclflush, Pm, opBytes{0x01, 00}},
  1602. {ASIDT, yclflush, Pm, opBytes{0x01, 01}},
  1603. {ARDRANDW, yrdrand, Pe, opBytes{0x0f, 0xc7, 06}},
  1604. {ARDRANDL, yrdrand, Px, opBytes{0x0f, 0xc7, 06}},
  1605. {ARDRANDQ, yrdrand, Pw, opBytes{0x0f, 0xc7, 06}},
  1606. {ARDSEEDW, yrdrand, Pe, opBytes{0x0f, 0xc7, 07}},
  1607. {ARDSEEDL, yrdrand, Px, opBytes{0x0f, 0xc7, 07}},
  1608. {ARDSEEDQ, yrdrand, Pw, opBytes{0x0f, 0xc7, 07}},
  1609. {ASTRW, yincq, Pe, opBytes{0x0f, 0x00, 01}},
  1610. {ASTRL, yincq, Px, opBytes{0x0f, 0x00, 01}},
  1611. {ASTRQ, yincq, Pw, opBytes{0x0f, 0x00, 01}},
  1612. {AXSETBV, ynone, Pm, opBytes{0x01, 0xd1, 0}},
  1613. {AMOVBEWW, ymovbe, Pq, opBytes{0x38, 0xf0, 0, 0x38, 0xf1, 0}},
  1614. {AMOVBELL, ymovbe, Pm, opBytes{0x38, 0xf0, 0, 0x38, 0xf1, 0}},
  1615. {AMOVBEQQ, ymovbe, Pw, opBytes{0x0f, 0x38, 0xf0, 0, 0x0f, 0x38, 0xf1, 0}},
  1616. {ANOPW, ydivl, Pe, opBytes{0x0f, 0x1f, 00}},
  1617. {ANOPL, ydivl, Px, opBytes{0x0f, 0x1f, 00}},
  1618. {ASLDTW, yincq, Pe, opBytes{0x0f, 0x00, 00}},
  1619. {ASLDTL, yincq, Px, opBytes{0x0f, 0x00, 00}},
  1620. {ASLDTQ, yincq, Pw, opBytes{0x0f, 0x00, 00}},
  1621. {ASMSWW, yincq, Pe, opBytes{0x0f, 0x01, 04}},
  1622. {ASMSWL, yincq, Px, opBytes{0x0f, 0x01, 04}},
  1623. {ASMSWQ, yincq, Pw, opBytes{0x0f, 0x01, 04}},
  1624. {ABLENDVPS, yblendvpd, Pq4, opBytes{0x14}},
  1625. {ABLENDVPD, yblendvpd, Pq4, opBytes{0x15}},
  1626. {APBLENDVB, yblendvpd, Pq4, opBytes{0x10}},
  1627. {ASHA1MSG1, yaes, Px, opBytes{0x0f, 0x38, 0xc9, 0}},
  1628. {ASHA1MSG2, yaes, Px, opBytes{0x0f, 0x38, 0xca, 0}},
  1629. {ASHA1NEXTE, yaes, Px, opBytes{0x0f, 0x38, 0xc8, 0}},
  1630. {ASHA256MSG1, yaes, Px, opBytes{0x0f, 0x38, 0xcc, 0}},
  1631. {ASHA256MSG2, yaes, Px, opBytes{0x0f, 0x38, 0xcd, 0}},
  1632. {ASHA1RNDS4, ysha1rnds4, Pm, opBytes{0x3a, 0xcc, 0}},
  1633. {ASHA256RNDS2, ysha256rnds2, Px, opBytes{0x0f, 0x38, 0xcb, 0}},
  1634. {ARDFSBASEL, yrdrand, Pf3, opBytes{0xae, 00}},
  1635. {ARDFSBASEQ, yrdrand, Pfw, opBytes{0xae, 00}},
  1636. {ARDGSBASEL, yrdrand, Pf3, opBytes{0xae, 01}},
  1637. {ARDGSBASEQ, yrdrand, Pfw, opBytes{0xae, 01}},
  1638. {AWRFSBASEL, ywrfsbase, Pf3, opBytes{0xae, 02}},
  1639. {AWRFSBASEQ, ywrfsbase, Pfw, opBytes{0xae, 02}},
  1640. {AWRGSBASEL, ywrfsbase, Pf3, opBytes{0xae, 03}},
  1641. {AWRGSBASEQ, ywrfsbase, Pfw, opBytes{0xae, 03}},
  1642. {ALFSW, ym_rl, Pe, opBytes{0x0f, 0xb4}},
  1643. {ALFSL, ym_rl, Px, opBytes{0x0f, 0xb4}},
  1644. {ALFSQ, ym_rl, Pw, opBytes{0x0f, 0xb4}},
  1645. {ALGSW, ym_rl, Pe, opBytes{0x0f, 0xb5}},
  1646. {ALGSL, ym_rl, Px, opBytes{0x0f, 0xb5}},
  1647. {ALGSQ, ym_rl, Pw, opBytes{0x0f, 0xb5}},
  1648. {ALSSW, ym_rl, Pe, opBytes{0x0f, 0xb2}},
  1649. {ALSSL, ym_rl, Px, opBytes{0x0f, 0xb2}},
  1650. {ALSSQ, ym_rl, Pw, opBytes{0x0f, 0xb2}},
  1651. {ABLENDPD, yxshuf, Pq, opBytes{0x3a, 0x0d, 0}},
  1652. {ABLENDPS, yxshuf, Pq, opBytes{0x3a, 0x0c, 0}},
  1653. {AXACQUIRE, ynone, Px, opBytes{0xf2}},
  1654. {AXRELEASE, ynone, Px, opBytes{0xf3}},
  1655. {AXBEGIN, yxbegin, Px, opBytes{0xc7, 0xf8}},
  1656. {AXABORT, yxabort, Px, opBytes{0xc6, 0xf8}},
  1657. {AXEND, ynone, Px, opBytes{0x0f, 01, 0xd5}},
  1658. {AXTEST, ynone, Px, opBytes{0x0f, 01, 0xd6}},
  1659. {AXGETBV, ynone, Pm, opBytes{01, 0xd0}},
  1660. {obj.AFUNCDATA, yfuncdata, Px, opBytes{0, 0}},
  1661. {obj.APCDATA, ypcdata, Px, opBytes{0, 0}},
  1662. {obj.ADUFFCOPY, yduff, Px, opBytes{0xe8}},
  1663. {obj.ADUFFZERO, yduff, Px, opBytes{0xe8}},
  1664. {obj.AEND, nil, 0, opBytes{}},
  1665. {0, nil, 0, opBytes{}},
  1666. }
  1667. var opindex [(ALAST + 1) & obj.AMask]*Optab
  1668. // useAbs reports whether s describes a symbol that must avoid pc-relative addressing.
  1669. // This happens on systems like Solaris that call .so functions instead of system calls.
  1670. // It does not seem to be necessary for any other systems. This is probably working
  1671. // around a Solaris-specific bug that should be fixed differently, but we don't know
  1672. // what that bug is. And this does fix it.
  1673. func useAbs(ctxt *obj.Link, s *obj.LSym) bool {
  1674. if ctxt.Headtype == objabi.Hsolaris {
  1675. // All the Solaris dynamic imports from libc.so begin with "libc_".
  1676. return strings.HasPrefix(s.Name, "libc_")
  1677. }
  1678. return ctxt.Arch.Family == sys.I386 && !ctxt.Flag_shared
  1679. }
  1680. // single-instruction no-ops of various lengths.
  1681. // constructed by hand and disassembled with gdb to verify.
  1682. // see http://www.agner.org/optimize/optimizing_assembly.pdf for discussion.
  1683. var nop = [][16]uint8{
  1684. {0x90},
  1685. {0x66, 0x90},
  1686. {0x0F, 0x1F, 0x00},
  1687. {0x0F, 0x1F, 0x40, 0x00},
  1688. {0x0F, 0x1F, 0x44, 0x00, 0x00},
  1689. {0x66, 0x0F, 0x1F, 0x44, 0x00, 0x00},
  1690. {0x0F, 0x1F, 0x80, 0x00, 0x00, 0x00, 0x00},
  1691. {0x0F, 0x1F, 0x84, 0x00, 0x00, 0x00, 0x00, 0x00},
  1692. {0x66, 0x0F, 0x1F, 0x84, 0x00, 0x00, 0x00, 0x00, 0x00},
  1693. }
  1694. // Native Client rejects the repeated 0x66 prefix.
  1695. // {0x66, 0x66, 0x0F, 0x1F, 0x84, 0x00, 0x00, 0x00, 0x00, 0x00},
  1696. func fillnop(p []byte, n int) {
  1697. var m int
  1698. for n > 0 {
  1699. m = n
  1700. if m > len(nop) {
  1701. m = len(nop)
  1702. }
  1703. copy(p[:m], nop[m-1][:m])
  1704. p = p[m:]
  1705. n -= m
  1706. }
  1707. }
  1708. func noppad(ctxt *obj.Link, s *obj.LSym, c int32, pad int32) int32 {
  1709. s.Grow(int64(c) + int64(pad))
  1710. fillnop(s.P[c:], int(pad))
  1711. return c + pad
  1712. }
  1713. func spadjop(ctxt *obj.Link, l, q obj.As) obj.As {
  1714. if ctxt.Arch.Family != sys.AMD64 || ctxt.Arch.PtrSize == 4 {
  1715. return l
  1716. }
  1717. return q
  1718. }
  1719. // If the environment variable GOAMD64=alignedjumps the assembler will ensure that
  1720. // no standalone or macro-fused jump will straddle or end on a 32 byte boundary
  1721. // by inserting NOPs before the jumps
  1722. func isJump(p *obj.Prog) bool {
  1723. return p.To.Target() != nil || p.As == obj.AJMP || p.As == obj.ACALL ||
  1724. p.As == obj.ARET || p.As == obj.ADUFFCOPY || p.As == obj.ADUFFZERO
  1725. }
  1726. // lookForJCC returns the first real instruction starting from p, if that instruction is a conditional
  1727. // jump. Otherwise, nil is returned.
  1728. func lookForJCC(p *obj.Prog) *obj.Prog {
  1729. // Skip any PCDATA, FUNCDATA or NOP instructions
  1730. var q *obj.Prog
  1731. for q = p.Link; q != nil && (q.As == obj.APCDATA || q.As == obj.AFUNCDATA || q.As == obj.ANOP); q = q.Link {
  1732. }
  1733. if q == nil || q.To.Target() == nil || p.As == obj.AJMP || p.As == obj.ACALL {
  1734. return nil
  1735. }
  1736. switch q.As {
  1737. case AJOS, AJOC, AJCS, AJCC, AJEQ, AJNE, AJLS, AJHI,
  1738. AJMI, AJPL, AJPS, AJPC, AJLT, AJGE, AJLE, AJGT:
  1739. default:
  1740. return nil
  1741. }
  1742. return q
  1743. }
  1744. // fusedJump determines whether p can be fused with a subsequent conditional jump instruction.
  1745. // If it can, we return true followed by the total size of the fused jump. If it can't, we return false.
  1746. // Macro fusion rules are derived from the Intel Optimization Manual (April 2019) section 3.4.2.2.
  1747. func fusedJump(p *obj.Prog) (bool, uint8) {
  1748. var fusedSize uint8
  1749. // The first instruction in a macro fused pair may be preceeded by the LOCK prefix,
  1750. // or possibly an XACQUIRE/XRELEASE prefix followed by a LOCK prefix. If it is, we
  1751. // need to be careful to insert any padding before the locks rather than directly after them.
  1752. if p.As == AXRELEASE || p.As == AXACQUIRE {
  1753. fusedSize += p.Isize
  1754. for p = p.Link; p != nil && (p.As == obj.APCDATA || p.As == obj.AFUNCDATA); p = p.Link {
  1755. }
  1756. if p == nil {
  1757. return false, 0
  1758. }
  1759. }
  1760. if p.As == ALOCK {
  1761. fusedSize += p.Isize
  1762. for p = p.Link; p != nil && (p.As == obj.APCDATA || p.As == obj.AFUNCDATA); p = p.Link {
  1763. }
  1764. if p == nil {
  1765. return false, 0
  1766. }
  1767. }
  1768. cmp := p.As == ACMPB || p.As == ACMPL || p.As == ACMPQ || p.As == ACMPW
  1769. cmpAddSub := p.As == AADDB || p.As == AADDL || p.As == AADDW || p.As == AADDQ ||
  1770. p.As == ASUBB || p.As == ASUBL || p.As == ASUBW || p.As == ASUBQ || cmp
  1771. testAnd := p.As == ATESTB || p.As == ATESTL || p.As == ATESTQ || p.As == ATESTW ||
  1772. p.As == AANDB || p.As == AANDL || p.As == AANDQ || p.As == AANDW
  1773. incDec := p.As == AINCB || p.As == AINCL || p.As == AINCQ || p.As == AINCW ||
  1774. p.As == ADECB || p.As == ADECL || p.As == ADECQ || p.As == ADECW
  1775. if !cmpAddSub && !testAnd && !incDec {
  1776. return false, 0
  1777. }
  1778. if !incDec {
  1779. var argOne obj.AddrType
  1780. var argTwo obj.AddrType
  1781. if cmp {
  1782. argOne = p.From.Type
  1783. argTwo = p.To.Type
  1784. } else {
  1785. argOne = p.To.Type
  1786. argTwo = p.From.Type
  1787. }
  1788. if argOne == obj.TYPE_REG {
  1789. if argTwo != obj.TYPE_REG && argTwo != obj.TYPE_CONST && argTwo != obj.TYPE_MEM {
  1790. return false, 0
  1791. }
  1792. } else if argOne == obj.TYPE_MEM {
  1793. if argTwo != obj.TYPE_REG {
  1794. return false, 0
  1795. }
  1796. } else {
  1797. return false, 0
  1798. }
  1799. }
  1800. fusedSize += p.Isize
  1801. jmp := lookForJCC(p)
  1802. if jmp == nil {
  1803. return false, 0
  1804. }
  1805. fusedSize += jmp.Isize
  1806. if testAnd {
  1807. return true, fusedSize
  1808. }
  1809. if jmp.As == AJOC || jmp.As == AJOS || jmp.As == AJMI ||
  1810. jmp.As == AJPL || jmp.As == AJPS || jmp.As == AJPC {
  1811. return false, 0
  1812. }
  1813. if cmpAddSub {
  1814. return true, fusedSize
  1815. }
  1816. if jmp.As == AJCS || jmp.As == AJCC || jmp.As == AJHI || jmp.As == AJLS {
  1817. return false, 0
  1818. }
  1819. return true, fusedSize
  1820. }
  1821. type padJumpsCtx int32
  1822. func makePjcCtx(ctxt *obj.Link) padJumpsCtx {
  1823. // Disable jump padding on 32 bit builds by settting
  1824. // padJumps to 0.
  1825. if ctxt.Arch.Family == sys.I386 {
  1826. return padJumpsCtx(0)
  1827. }
  1828. // Disable jump padding for hand written assembly code.
  1829. if ctxt.IsAsm {
  1830. return padJumpsCtx(0)
  1831. }
  1832. if objabi.GOAMD64 != "alignedjumps" {
  1833. return padJumpsCtx(0)
  1834. }
  1835. return padJumpsCtx(32)
  1836. }
  1837. // padJump detects whether the instruction being assembled is a standalone or a macro-fused
  1838. // jump that needs to be padded. If it is, NOPs are inserted to ensure that the jump does
  1839. // not cross or end on a 32 byte boundary.
  1840. func (pjc padJumpsCtx) padJump(ctxt *obj.Link, s *obj.LSym, p *obj.Prog, c int32) int32 {
  1841. if pjc == 0 {
  1842. return c
  1843. }
  1844. var toPad int32
  1845. fj, fjSize := fusedJump(p)
  1846. mask := int32(pjc - 1)
  1847. if fj {
  1848. if (c&mask)+int32(fjSize) >= int32(pjc) {
  1849. toPad = int32(pjc) - (c & mask)
  1850. }
  1851. } else if isJump(p) {
  1852. if (c&mask)+int32(p.Isize) >= int32(pjc) {
  1853. toPad = int32(pjc) - (c & mask)
  1854. }
  1855. }
  1856. if toPad <= 0 {
  1857. return c
  1858. }
  1859. return noppad(ctxt, s, c, toPad)
  1860. }
  1861. // reAssemble is called if an instruction's size changes during assembly. If
  1862. // it does and the instruction is a standalone or a macro-fused jump we need to
  1863. // reassemble.
  1864. func (pjc padJumpsCtx) reAssemble(p *obj.Prog) bool {
  1865. if pjc == 0 {
  1866. return false
  1867. }
  1868. fj, _ := fusedJump(p)
  1869. return fj || isJump(p)
  1870. }
  1871. type nopPad struct {
  1872. p *obj.Prog // Instruction before the pad
  1873. n int32 // Size of the pad
  1874. }
  1875. func span6(ctxt *obj.Link, s *obj.LSym, newprog obj.ProgAlloc) {
  1876. pjc := makePjcCtx(ctxt)
  1877. if s.P != nil {
  1878. return
  1879. }
  1880. if ycover[0] == 0 {
  1881. ctxt.Diag("x86 tables not initialized, call x86.instinit first")
  1882. }
  1883. for p := s.Func.Text; p != nil; p = p.Link {
  1884. if p.To.Type == obj.TYPE_BRANCH && p.To.Target() == nil {
  1885. p.To.SetTarget(p)
  1886. }
  1887. if p.As == AADJSP {
  1888. p.To.Type = obj.TYPE_REG
  1889. p.To.Reg = REG_SP
  1890. // Generate 'ADDQ $x, SP' or 'SUBQ $x, SP', with x positive.
  1891. // One exception: It is smaller to encode $-0x80 than $0x80.
  1892. // For that case, flip the sign and the op:
  1893. // Instead of 'ADDQ $0x80, SP', generate 'SUBQ $-0x80, SP'.
  1894. switch v := p.From.Offset; {
  1895. case v == 0:
  1896. p.As = obj.ANOP
  1897. case v == 0x80 || (v < 0 && v != -0x80):
  1898. p.As = spadjop(ctxt, AADDL, AADDQ)
  1899. p.From.Offset *= -1
  1900. default:
  1901. p.As = spadjop(ctxt, ASUBL, ASUBQ)
  1902. }
  1903. }
  1904. if ctxt.Retpoline && (p.As == obj.ACALL || p.As == obj.AJMP) && (p.To.Type == obj.TYPE_REG || p.To.Type == obj.TYPE_MEM) {
  1905. if p.To.Type != obj.TYPE_REG {
  1906. ctxt.Diag("non-retpoline-compatible: %v", p)
  1907. continue
  1908. }
  1909. p.To.Type = obj.TYPE_BRANCH
  1910. p.To.Name = obj.NAME_EXTERN
  1911. p.To.Sym = ctxt.Lookup("runtime.retpoline" + obj.Rconv(int(p.To.Reg)))
  1912. p.To.Reg = 0
  1913. p.To.Offset = 0
  1914. }
  1915. }
  1916. var count int64 // rough count of number of instructions
  1917. for p := s.Func.Text; p != nil; p = p.Link {
  1918. count++
  1919. p.Back = branchShort // use short branches first time through
  1920. if q := p.To.Target(); q != nil && (q.Back&branchShort != 0) {
  1921. p.Back |= branchBackwards
  1922. q.Back |= branchLoopHead
  1923. }
  1924. }
  1925. s.GrowCap(count * 5) // preallocate roughly 5 bytes per instruction
  1926. var ab AsmBuf
  1927. var n int
  1928. var c int32
  1929. errors := ctxt.Errors
  1930. var nops []nopPad // Padding for a particular assembly (reuse slice storage if multiple assemblies)
  1931. for {
  1932. // This loop continues while there are reasons to re-assemble
  1933. // whole block, like the presence of long forward jumps.
  1934. reAssemble := false
  1935. for i := range s.R {
  1936. s.R[i] = obj.Reloc{}
  1937. }
  1938. s.R = s.R[:0]
  1939. s.P = s.P[:0]
  1940. c = 0
  1941. var pPrev *obj.Prog
  1942. nops = nops[:0]
  1943. for p := s.Func.Text; p != nil; p = p.Link {
  1944. c0 := c
  1945. c = pjc.padJump(ctxt, s, p, c)
  1946. if maxLoopPad > 0 && p.Back&branchLoopHead != 0 && c&(loopAlign-1) != 0 {
  1947. // pad with NOPs
  1948. v := -c & (loopAlign - 1)
  1949. if v <= maxLoopPad {
  1950. s.Grow(int64(c) + int64(v))
  1951. fillnop(s.P[c:], int(v))
  1952. c += v
  1953. }
  1954. }
  1955. p.Pc = int64(c)
  1956. // process forward jumps to p
  1957. for q := p.Rel; q != nil; q = q.Forwd {
  1958. v := int32(p.Pc - (q.Pc + int64(q.Isize)))
  1959. if q.Back&branchShort != 0 {
  1960. if v > 127 {
  1961. reAssemble = true
  1962. q.Back ^= branchShort
  1963. }
  1964. if q.As == AJCXZL || q.As == AXBEGIN {
  1965. s.P[q.Pc+2] = byte(v)
  1966. } else {
  1967. s.P[q.Pc+1] = byte(v)
  1968. }
  1969. } else {
  1970. binary.LittleEndian.PutUint32(s.P[q.Pc+int64(q.Isize)-4:], uint32(v))
  1971. }
  1972. }
  1973. p.Rel = nil
  1974. p.Pc = int64(c)
  1975. ab.asmins(ctxt, s, p)
  1976. m := ab.Len()
  1977. if int(p.Isize) != m {
  1978. p.Isize = uint8(m)
  1979. if pjc.reAssemble(p) {
  1980. // We need to re-assemble here to check for jumps and fused jumps
  1981. // that span or end on 32 byte boundaries.
  1982. reAssemble = true
  1983. }
  1984. }
  1985. s.Grow(p.Pc + int64(m))
  1986. copy(s.P[p.Pc:], ab.Bytes())
  1987. // If there was padding, remember it.
  1988. if pPrev != nil && !ctxt.IsAsm && c > c0 {
  1989. nops = append(nops, nopPad{p: pPrev, n: c - c0})
  1990. }
  1991. c += int32(m)
  1992. pPrev = p
  1993. }
  1994. n++
  1995. if n > 20 {
  1996. ctxt.Diag("span must be looping")
  1997. log.Fatalf("loop")
  1998. }
  1999. if !reAssemble {
  2000. break
  2001. }
  2002. if ctxt.Errors > errors {
  2003. return
  2004. }
  2005. }
  2006. // splice padding nops into Progs
  2007. for _, n := range nops {
  2008. pp := n.p
  2009. np := &obj.Prog{Link: pp.Link, Ctxt: pp.Ctxt, As: obj.ANOP, Pos: pp.Pos.WithNotStmt(), Pc: pp.Pc + int64(pp.Isize), Isize: uint8(n.n)}
  2010. pp.Link = np
  2011. }
  2012. s.Size = int64(c)
  2013. if false { /* debug['a'] > 1 */
  2014. fmt.Printf("span1 %s %d (%d tries)\n %.6x", s.Name, s.Size, n, 0)
  2015. var i int
  2016. for i = 0; i < len(s.P); i++ {
  2017. fmt.Printf(" %.2x", s.P[i])
  2018. if i%16 == 15 {
  2019. fmt.Printf("\n %.6x", uint(i+1))
  2020. }
  2021. }
  2022. if i%16 != 0 {
  2023. fmt.Printf("\n")
  2024. }
  2025. for i := 0; i < len(s.R); i++ {
  2026. r := &s.R[i]
  2027. fmt.Printf(" rel %#.4x/%d %s%+d\n", uint32(r.Off), r.Siz, r.Sym.Name, r.Add)
  2028. }
  2029. }
  2030. // Mark nonpreemptible instruction sequences.
  2031. // The 2-instruction TLS access sequence
  2032. // MOVQ TLS, BX
  2033. // MOVQ 0(BX)(TLS*1), BX
  2034. // is not async preemptible, as if it is preempted and resumed on
  2035. // a different thread, the TLS address may become invalid.
  2036. if !CanUse1InsnTLS(ctxt) {
  2037. useTLS := func(p *obj.Prog) bool {
  2038. // Only need to mark the second instruction, which has
  2039. // REG_TLS as Index. (It is okay to interrupt and restart
  2040. // the first instruction.)
  2041. return p.From.Index == REG_TLS
  2042. }
  2043. obj.MarkUnsafePoints(ctxt, s.Func.Text, newprog, useTLS, nil)
  2044. }
  2045. }
  2046. func instinit(ctxt *obj.Link) {
  2047. if ycover[0] != 0 {
  2048. // Already initialized; stop now.
  2049. // This happens in the cmd/asm tests,
  2050. // each of which re-initializes the arch.
  2051. return
  2052. }
  2053. switch ctxt.Headtype {
  2054. case objabi.Hplan9:
  2055. plan9privates = ctxt.Lookup("_privates")
  2056. }
  2057. for i := range avxOptab {
  2058. c := avxOptab[i].as
  2059. if opindex[c&obj.AMask] != nil {
  2060. ctxt.Diag("phase error in avxOptab: %d (%v)", i, c)
  2061. }
  2062. opindex[c&obj.AMask] = &avxOptab[i]
  2063. }
  2064. for i := 1; optab[i].as != 0; i++ {
  2065. c := optab[i].as
  2066. if opindex[c&obj.AMask] != nil {
  2067. ctxt.Diag("phase error in optab: %d (%v)", i, c)
  2068. }
  2069. opindex[c&obj.AMask] = &optab[i]
  2070. }
  2071. for i := 0; i < Ymax; i++ {
  2072. ycover[i*Ymax+i] = 1
  2073. }
  2074. ycover[Yi0*Ymax+Yu2] = 1
  2075. ycover[Yi1*Ymax+Yu2] = 1
  2076. ycover[Yi0*Ymax+Yi8] = 1
  2077. ycover[Yi1*Ymax+Yi8] = 1
  2078. ycover[Yu2*Ymax+Yi8] = 1
  2079. ycover[Yu7*Ymax+Yi8] = 1
  2080. ycover[Yi0*Ymax+Yu7] = 1
  2081. ycover[Yi1*Ymax+Yu7] = 1
  2082. ycover[Yu2*Ymax+Yu7] = 1
  2083. ycover[Yi0*Ymax+Yu8] = 1
  2084. ycover[Yi1*Ymax+Yu8] = 1
  2085. ycover[Yu2*Ymax+Yu8] = 1
  2086. ycover[Yu7*Ymax+Yu8] = 1
  2087. ycover[Yi0*Ymax+Ys32] = 1
  2088. ycover[Yi1*Ymax+Ys32] = 1
  2089. ycover[Yu2*Ymax+Ys32] = 1
  2090. ycover[Yu7*Ymax+Ys32] = 1
  2091. ycover[Yu8*Ymax+Ys32] = 1
  2092. ycover[Yi8*Ymax+Ys32] = 1
  2093. ycover[Yi0*Ymax+Yi32] = 1
  2094. ycover[Yi1*Ymax+Yi32] = 1
  2095. ycover[Yu2*Ymax+Yi32] = 1
  2096. ycover[Yu7*Ymax+Yi32] = 1
  2097. ycover[Yu8*Ymax+Yi32] = 1
  2098. ycover[Yi8*Ymax+Yi32] = 1
  2099. ycover[Ys32*Ymax+Yi32] = 1
  2100. ycover[Yi0*Ymax+Yi64] = 1
  2101. ycover[Yi1*Ymax+Yi64] = 1
  2102. ycover[Yu7*Ymax+Yi64] = 1
  2103. ycover[Yu2*Ymax+Yi64] = 1
  2104. ycover[Yu8*Ymax+Yi64] = 1
  2105. ycover[Yi8*Ymax+Yi64] = 1
  2106. ycover[Ys32*Ymax+Yi64] = 1
  2107. ycover[Yi32*Ymax+Yi64] = 1
  2108. ycover[Yal*Ymax+Yrb] = 1
  2109. ycover[Ycl*Ymax+Yrb] = 1
  2110. ycover[Yax*Ymax+Yrb] = 1
  2111. ycover[Ycx*Ymax+Yrb] = 1
  2112. ycover[Yrx*Ymax+Yrb] = 1
  2113. ycover[Yrl*Ymax+Yrb] = 1 // but not Yrl32
  2114. ycover[Ycl*Ymax+Ycx] = 1
  2115. ycover[Yax*Ymax+Yrx] = 1
  2116. ycover[Ycx*Ymax+Yrx] = 1
  2117. ycover[Yax*Ymax+Yrl] = 1
  2118. ycover[Ycx*Ymax+Yrl] = 1
  2119. ycover[Yrx*Ymax+Yrl] = 1
  2120. ycover[Yrl32*Ymax+Yrl] = 1
  2121. ycover[Yf0*Ymax+Yrf] = 1
  2122. ycover[Yal*Ymax+Ymb] = 1
  2123. ycover[Ycl*Ymax+Ymb] = 1
  2124. ycover[Yax*Ymax+Ymb] = 1
  2125. ycover[Ycx*Ymax+Ymb] = 1
  2126. ycover[Yrx*Ymax+Ymb] = 1
  2127. ycover[Yrb*Ymax+Ymb] = 1
  2128. ycover[Yrl*Ymax+Ymb] = 1 // but not Yrl32
  2129. ycover[Ym*Ymax+Ymb] = 1
  2130. ycover[Yax*Ymax+Yml] = 1
  2131. ycover[Ycx*Ymax+Yml] = 1
  2132. ycover[Yrx*Ymax+Yml] = 1
  2133. ycover[Yrl*Ymax+Yml] = 1
  2134. ycover[Yrl32*Ymax+Yml] = 1
  2135. ycover[Ym*Ymax+Yml] = 1
  2136. ycover[Yax*Ymax+Ymm] = 1
  2137. ycover[Ycx*Ymax+Ymm] = 1
  2138. ycover[Yrx*Ymax+Ymm] = 1
  2139. ycover[Yrl*Ymax+Ymm] = 1
  2140. ycover[Yrl32*Ymax+Ymm] = 1
  2141. ycover[Ym*Ymax+Ymm] = 1
  2142. ycover[Ymr*Ymax+Ymm] = 1
  2143. ycover[Yxr0*Ymax+Yxr] = 1
  2144. ycover[Ym*Ymax+Yxm] = 1
  2145. ycover[Yxr0*Ymax+Yxm] = 1
  2146. ycover[Yxr*Ymax+Yxm] = 1
  2147. ycover[Ym*Ymax+Yym] = 1
  2148. ycover[Yyr*Ymax+Yym] = 1
  2149. ycover[Yxr0*Ymax+YxrEvex] = 1
  2150. ycover[Yxr*Ymax+YxrEvex] = 1
  2151. ycover[Ym*Ymax+YxmEvex] = 1
  2152. ycover[Yxr0*Ymax+YxmEvex] = 1
  2153. ycover[Yxr*Ymax+YxmEvex] = 1
  2154. ycover[YxrEvex*Ymax+YxmEvex] = 1
  2155. ycover[Yyr*Ymax+YyrEvex] = 1
  2156. ycover[Ym*Ymax+YymEvex] = 1
  2157. ycover[Yyr*Ymax+YymEvex] = 1
  2158. ycover[YyrEvex*Ymax+YymEvex] = 1
  2159. ycover[Ym*Ymax+Yzm] = 1
  2160. ycover[Yzr*Ymax+Yzm] = 1
  2161. ycover[Yk0*Ymax+Yk] = 1
  2162. ycover[Yknot0*Ymax+Yk] = 1
  2163. ycover[Yk0*Ymax+Ykm] = 1
  2164. ycover[Yknot0*Ymax+Ykm] = 1
  2165. ycover[Yk*Ymax+Ykm] = 1
  2166. ycover[Ym*Ymax+Ykm] = 1
  2167. ycover[Yxvm*Ymax+YxvmEvex] = 1
  2168. ycover[Yyvm*Ymax+YyvmEvex] = 1
  2169. for i := 0; i < MAXREG; i++ {
  2170. reg[i] = -1
  2171. if i >= REG_AL && i <= REG_R15B {
  2172. reg[i] = (i - REG_AL) & 7
  2173. if i >= REG_SPB && i <= REG_DIB {
  2174. regrex[i] = 0x40
  2175. }
  2176. if i >= REG_R8B && i <= REG_R15B {
  2177. regrex[i] = Rxr | Rxx | Rxb
  2178. }
  2179. }
  2180. if i >= REG_AH && i <= REG_BH {
  2181. reg[i] = 4 + ((i - REG_AH) & 7)
  2182. }
  2183. if i >= REG_AX && i <= REG_R15 {
  2184. reg[i] = (i - REG_AX) & 7
  2185. if i >= REG_R8 {
  2186. regrex[i] = Rxr | Rxx | Rxb
  2187. }
  2188. }
  2189. if i >= REG_F0 && i <= REG_F0+7 {
  2190. reg[i] = (i - REG_F0) & 7
  2191. }
  2192. if i >= REG_M0 && i <= REG_M0+7 {
  2193. reg[i] = (i - REG_M0) & 7
  2194. }
  2195. if i >= REG_K0 && i <= REG_K0+7 {
  2196. reg[i] = (i - REG_K0) & 7
  2197. }
  2198. if i >= REG_X0 && i <= REG_X0+15 {
  2199. reg[i] = (i - REG_X0) & 7
  2200. if i >= REG_X0+8 {
  2201. regrex[i] = Rxr | Rxx | Rxb
  2202. }
  2203. }
  2204. if i >= REG_X16 && i <= REG_X16+15 {
  2205. reg[i] = (i - REG_X16) & 7
  2206. if i >= REG_X16+8 {
  2207. regrex[i] = Rxr | Rxx | Rxb | RxrEvex
  2208. } else {
  2209. regrex[i] = RxrEvex
  2210. }
  2211. }
  2212. if i >= REG_Y0 && i <= REG_Y0+15 {
  2213. reg[i] = (i - REG_Y0) & 7
  2214. if i >= REG_Y0+8 {
  2215. regrex[i] = Rxr | Rxx | Rxb
  2216. }
  2217. }
  2218. if i >= REG_Y16 && i <= REG_Y16+15 {
  2219. reg[i] = (i - REG_Y16) & 7
  2220. if i >= REG_Y16+8 {
  2221. regrex[i] = Rxr | Rxx | Rxb | RxrEvex
  2222. } else {
  2223. regrex[i] = RxrEvex
  2224. }
  2225. }
  2226. if i >= REG_Z0 && i <= REG_Z0+15 {
  2227. reg[i] = (i - REG_Z0) & 7
  2228. if i > REG_Z0+7 {
  2229. regrex[i] = Rxr | Rxx | Rxb
  2230. }
  2231. }
  2232. if i >= REG_Z16 && i <= REG_Z16+15 {
  2233. reg[i] = (i - REG_Z16) & 7
  2234. if i >= REG_Z16+8 {
  2235. regrex[i] = Rxr | Rxx | Rxb | RxrEvex
  2236. } else {
  2237. regrex[i] = RxrEvex
  2238. }
  2239. }
  2240. if i >= REG_CR+8 && i <= REG_CR+15 {
  2241. regrex[i] = Rxr
  2242. }
  2243. }
  2244. }
  2245. var isAndroid = objabi.GOOS == "android"
  2246. func prefixof(ctxt *obj.Link, a *obj.Addr) int {
  2247. if a.Reg < REG_CS && a.Index < REG_CS { // fast path
  2248. return 0
  2249. }
  2250. if a.Type == obj.TYPE_MEM && a.Name == obj.NAME_NONE {
  2251. switch a.Reg {
  2252. case REG_CS:
  2253. return 0x2e
  2254. case REG_DS:
  2255. return 0x3e
  2256. case REG_ES:
  2257. return 0x26
  2258. case REG_FS:
  2259. return 0x64
  2260. case REG_GS:
  2261. return 0x65
  2262. case REG_TLS:
  2263. // NOTE: Systems listed here should be only systems that
  2264. // support direct TLS references like 8(TLS) implemented as
  2265. // direct references from FS or GS. Systems that require
  2266. // the initial-exec model, where you load the TLS base into
  2267. // a register and then index from that register, do not reach
  2268. // this code and should not be listed.
  2269. if ctxt.Arch.Family == sys.I386 {
  2270. switch ctxt.Headtype {
  2271. default:
  2272. if isAndroid {
  2273. return 0x65 // GS
  2274. }
  2275. log.Fatalf("unknown TLS base register for %v", ctxt.Headtype)
  2276. case objabi.Hdarwin,
  2277. objabi.Hdragonfly,
  2278. objabi.Hfreebsd,
  2279. objabi.Hnetbsd,
  2280. objabi.Hopenbsd:
  2281. return 0x65 // GS
  2282. }
  2283. }
  2284. switch ctxt.Headtype {
  2285. default:
  2286. log.Fatalf("unknown TLS base register for %v", ctxt.Headtype)
  2287. case objabi.Hlinux:
  2288. if isAndroid {
  2289. return 0x64 // FS
  2290. }
  2291. if ctxt.Flag_shared {
  2292. log.Fatalf("unknown TLS base register for linux with -shared")
  2293. } else {
  2294. return 0x64 // FS
  2295. }
  2296. case objabi.Hdragonfly,
  2297. objabi.Hfreebsd,
  2298. objabi.Hnetbsd,
  2299. objabi.Hopenbsd,
  2300. objabi.Hsolaris:
  2301. return 0x64 // FS
  2302. case objabi.Hdarwin:
  2303. return 0x65 // GS
  2304. }
  2305. }
  2306. }
  2307. if ctxt.Arch.Family == sys.I386 {
  2308. if a.Index == REG_TLS && ctxt.Flag_shared {
  2309. // When building for inclusion into a shared library, an instruction of the form
  2310. // MOVL off(CX)(TLS*1), AX
  2311. // becomes
  2312. // mov %gs:off(%ecx), %eax
  2313. // which assumes that the correct TLS offset has been loaded into %ecx (today
  2314. // there is only one TLS variable -- g -- so this is OK). When not building for
  2315. // a shared library the instruction it becomes
  2316. // mov 0x0(%ecx), %eax
  2317. // and a R_TLS_LE relocation, and so does not require a prefix.
  2318. return 0x65 // GS
  2319. }
  2320. return 0
  2321. }
  2322. switch a.Index {
  2323. case REG_CS:
  2324. return 0x2e
  2325. case REG_DS:
  2326. return 0x3e
  2327. case REG_ES:
  2328. return 0x26
  2329. case REG_TLS:
  2330. if ctxt.Flag_shared && ctxt.Headtype != objabi.Hwindows {
  2331. // When building for inclusion into a shared library, an instruction of the form
  2332. // MOV off(CX)(TLS*1), AX
  2333. // becomes
  2334. // mov %fs:off(%rcx), %rax
  2335. // which assumes that the correct TLS offset has been loaded into %rcx (today
  2336. // there is only one TLS variable -- g -- so this is OK). When not building for
  2337. // a shared library the instruction does not require a prefix.
  2338. return 0x64
  2339. }
  2340. case REG_FS:
  2341. return 0x64
  2342. case REG_GS:
  2343. return 0x65
  2344. }
  2345. return 0
  2346. }
  2347. // oclassRegList returns multisource operand class for addr.
  2348. func oclassRegList(ctxt *obj.Link, addr *obj.Addr) int {
  2349. // TODO(quasilyte): when oclass register case is refactored into
  2350. // lookup table, use it here to get register kind more easily.
  2351. // Helper functions like regIsXmm should go away too (they will become redundant).
  2352. regIsXmm := func(r int) bool { return r >= REG_X0 && r <= REG_X31 }
  2353. regIsYmm := func(r int) bool { return r >= REG_Y0 && r <= REG_Y31 }
  2354. regIsZmm := func(r int) bool { return r >= REG_Z0 && r <= REG_Z31 }
  2355. reg0, reg1 := decodeRegisterRange(addr.Offset)
  2356. low := regIndex(int16(reg0))
  2357. high := regIndex(int16(reg1))
  2358. if ctxt.Arch.Family == sys.I386 {
  2359. if low >= 8 || high >= 8 {
  2360. return Yxxx
  2361. }
  2362. }
  2363. switch high - low {
  2364. case 3:
  2365. switch {
  2366. case regIsXmm(reg0) && regIsXmm(reg1):
  2367. return YxrEvexMulti4
  2368. case regIsYmm(reg0) && regIsYmm(reg1):
  2369. return YyrEvexMulti4
  2370. case regIsZmm(reg0) && regIsZmm(reg1):
  2371. return YzrMulti4
  2372. default:
  2373. return Yxxx
  2374. }
  2375. default:
  2376. return Yxxx
  2377. }
  2378. }
  2379. // oclassVMem returns V-mem (vector memory with VSIB) operand class.
  2380. // For addr that is not V-mem returns (Yxxx, false).
  2381. func oclassVMem(ctxt *obj.Link, addr *obj.Addr) (int, bool) {
  2382. switch addr.Index {
  2383. case REG_X0 + 0,
  2384. REG_X0 + 1,
  2385. REG_X0 + 2,
  2386. REG_X0 + 3,
  2387. REG_X0 + 4,
  2388. REG_X0 + 5,
  2389. REG_X0 + 6,
  2390. REG_X0 + 7:
  2391. return Yxvm, true
  2392. case REG_X8 + 0,
  2393. REG_X8 + 1,
  2394. REG_X8 + 2,
  2395. REG_X8 + 3,
  2396. REG_X8 + 4,
  2397. REG_X8 + 5,
  2398. REG_X8 + 6,
  2399. REG_X8 + 7:
  2400. if ctxt.Arch.Family == sys.I386 {
  2401. return Yxxx, true
  2402. }
  2403. return Yxvm, true
  2404. case REG_X16 + 0,
  2405. REG_X16 + 1,
  2406. REG_X16 + 2,
  2407. REG_X16 + 3,
  2408. REG_X16 + 4,
  2409. REG_X16 + 5,
  2410. REG_X16 + 6,
  2411. REG_X16 + 7,
  2412. REG_X16 + 8,
  2413. REG_X16 + 9,
  2414. REG_X16 + 10,
  2415. REG_X16 + 11,
  2416. REG_X16 + 12,
  2417. REG_X16 + 13,
  2418. REG_X16 + 14,
  2419. REG_X16 + 15:
  2420. if ctxt.Arch.Family == sys.I386 {
  2421. return Yxxx, true
  2422. }
  2423. return YxvmEvex, true
  2424. case REG_Y0 + 0,
  2425. REG_Y0 + 1,
  2426. REG_Y0 + 2,
  2427. REG_Y0 + 3,
  2428. REG_Y0 + 4,
  2429. REG_Y0 + 5,
  2430. REG_Y0 + 6,
  2431. REG_Y0 + 7:
  2432. return Yyvm, true
  2433. case REG_Y8 + 0,
  2434. REG_Y8 + 1,
  2435. REG_Y8 + 2,
  2436. REG_Y8 + 3,
  2437. REG_Y8 + 4,
  2438. REG_Y8 + 5,
  2439. REG_Y8 + 6,
  2440. REG_Y8 + 7:
  2441. if ctxt.Arch.Family == sys.I386 {
  2442. return Yxxx, true
  2443. }
  2444. return Yyvm, true
  2445. case REG_Y16 + 0,
  2446. REG_Y16 + 1,
  2447. REG_Y16 + 2,
  2448. REG_Y16 + 3,
  2449. REG_Y16 + 4,
  2450. REG_Y16 + 5,
  2451. REG_Y16 + 6,
  2452. REG_Y16 + 7,
  2453. REG_Y16 + 8,
  2454. REG_Y16 + 9,
  2455. REG_Y16 + 10,
  2456. REG_Y16 + 11,
  2457. REG_Y16 + 12,
  2458. REG_Y16 + 13,
  2459. REG_Y16 + 14,
  2460. REG_Y16 + 15:
  2461. if ctxt.Arch.Family == sys.I386 {
  2462. return Yxxx, true
  2463. }
  2464. return YyvmEvex, true
  2465. case REG_Z0 + 0,
  2466. REG_Z0 + 1,
  2467. REG_Z0 + 2,
  2468. REG_Z0 + 3,
  2469. REG_Z0 + 4,
  2470. REG_Z0 + 5,
  2471. REG_Z0 + 6,
  2472. REG_Z0 + 7:
  2473. return Yzvm, true
  2474. case REG_Z8 + 0,
  2475. REG_Z8 + 1,
  2476. REG_Z8 + 2,
  2477. REG_Z8 + 3,
  2478. REG_Z8 + 4,
  2479. REG_Z8 + 5,
  2480. REG_Z8 + 6,
  2481. REG_Z8 + 7,
  2482. REG_Z8 + 8,
  2483. REG_Z8 + 9,
  2484. REG_Z8 + 10,
  2485. REG_Z8 + 11,
  2486. REG_Z8 + 12,
  2487. REG_Z8 + 13,
  2488. REG_Z8 + 14,
  2489. REG_Z8 + 15,
  2490. REG_Z8 + 16,
  2491. REG_Z8 + 17,
  2492. REG_Z8 + 18,
  2493. REG_Z8 + 19,
  2494. REG_Z8 + 20,
  2495. REG_Z8 + 21,
  2496. REG_Z8 + 22,
  2497. REG_Z8 + 23:
  2498. if ctxt.Arch.Family == sys.I386 {
  2499. return Yxxx, true
  2500. }
  2501. return Yzvm, true
  2502. }
  2503. return Yxxx, false
  2504. }
  2505. func oclass(ctxt *obj.Link, p *obj.Prog, a *obj.Addr) int {
  2506. switch a.Type {
  2507. case obj.TYPE_REGLIST:
  2508. return oclassRegList(ctxt, a)
  2509. case obj.TYPE_NONE:
  2510. return Ynone
  2511. case obj.TYPE_BRANCH:
  2512. return Ybr
  2513. case obj.TYPE_INDIR:
  2514. if a.Name != obj.NAME_NONE && a.Reg == REG_NONE && a.Index == REG_NONE && a.Scale == 0 {
  2515. return Yindir
  2516. }
  2517. return Yxxx
  2518. case obj.TYPE_MEM:
  2519. // Pseudo registers have negative index, but SP is
  2520. // not pseudo on x86, hence REG_SP check is not redundant.
  2521. if a.Index == REG_SP || a.Index < 0 {
  2522. // Can't use FP/SB/PC/SP as the index register.
  2523. return Yxxx
  2524. }
  2525. if vmem, ok := oclassVMem(ctxt, a); ok {
  2526. return vmem
  2527. }
  2528. if ctxt.Arch.Family == sys.AMD64 {
  2529. switch a.Name {
  2530. case obj.NAME_EXTERN, obj.NAME_STATIC, obj.NAME_GOTREF:
  2531. // Global variables can't use index registers and their
  2532. // base register is %rip (%rip is encoded as REG_NONE).
  2533. if a.Reg != REG_NONE || a.Index != REG_NONE || a.Scale != 0 {
  2534. return Yxxx
  2535. }
  2536. case obj.NAME_AUTO, obj.NAME_PARAM:
  2537. // These names must have a base of SP. The old compiler
  2538. // uses 0 for the base register. SSA uses REG_SP.
  2539. if a.Reg != REG_SP && a.Reg != 0 {
  2540. return Yxxx
  2541. }
  2542. case obj.NAME_NONE:
  2543. // everything is ok
  2544. default:
  2545. // unknown name
  2546. return Yxxx
  2547. }
  2548. }
  2549. return Ym
  2550. case obj.TYPE_ADDR:
  2551. switch a.Name {
  2552. case obj.NAME_GOTREF:
  2553. ctxt.Diag("unexpected TYPE_ADDR with NAME_GOTREF")
  2554. return Yxxx
  2555. case obj.NAME_EXTERN,
  2556. obj.NAME_STATIC:
  2557. if a.Sym != nil && useAbs(ctxt, a.Sym) {
  2558. return Yi32
  2559. }
  2560. return Yiauto // use pc-relative addressing
  2561. case obj.NAME_AUTO,
  2562. obj.NAME_PARAM:
  2563. return Yiauto
  2564. }
  2565. // TODO(rsc): DUFFZERO/DUFFCOPY encoding forgot to set a->index
  2566. // and got Yi32 in an earlier version of this code.
  2567. // Keep doing that until we fix yduff etc.
  2568. if a.Sym != nil && strings.HasPrefix(a.Sym.Name, "runtime.duff") {
  2569. return Yi32
  2570. }
  2571. if a.Sym != nil || a.Name != obj.NAME_NONE {
  2572. ctxt.Diag("unexpected addr: %v", obj.Dconv(p, a))
  2573. }
  2574. fallthrough
  2575. case obj.TYPE_CONST:
  2576. if a.Sym != nil {
  2577. ctxt.Diag("TYPE_CONST with symbol: %v", obj.Dconv(p, a))
  2578. }
  2579. v := a.Offset
  2580. if ctxt.Arch.Family == sys.I386 {
  2581. v = int64(int32(v))
  2582. }
  2583. switch {
  2584. case v == 0:
  2585. return Yi0
  2586. case v == 1:
  2587. return Yi1
  2588. case v >= 0 && v <= 3:
  2589. return Yu2
  2590. case v >= 0 && v <= 127:
  2591. return Yu7
  2592. case v >= 0 && v <= 255:
  2593. return Yu8
  2594. case v >= -128 && v <= 127:
  2595. return Yi8
  2596. }
  2597. if ctxt.Arch.Family == sys.I386 {
  2598. return Yi32
  2599. }
  2600. l := int32(v)
  2601. if int64(l) == v {
  2602. return Ys32 // can sign extend
  2603. }
  2604. if v>>32 == 0 {
  2605. return Yi32 // unsigned
  2606. }
  2607. return Yi64
  2608. case obj.TYPE_TEXTSIZE:
  2609. return Ytextsize
  2610. }
  2611. if a.Type != obj.TYPE_REG {
  2612. ctxt.Diag("unexpected addr1: type=%d %v", a.Type, obj.Dconv(p, a))
  2613. return Yxxx
  2614. }
  2615. switch a.Reg {
  2616. case REG_AL:
  2617. return Yal
  2618. case REG_AX:
  2619. return Yax
  2620. /*
  2621. case REG_SPB:
  2622. */
  2623. case REG_BPB,
  2624. REG_SIB,
  2625. REG_DIB,
  2626. REG_R8B,
  2627. REG_R9B,
  2628. REG_R10B,
  2629. REG_R11B,
  2630. REG_R12B,
  2631. REG_R13B,
  2632. REG_R14B,
  2633. REG_R15B:
  2634. if ctxt.Arch.Family == sys.I386 {
  2635. return Yxxx
  2636. }
  2637. fallthrough
  2638. case REG_DL,
  2639. REG_BL,
  2640. REG_AH,
  2641. REG_CH,
  2642. REG_DH,
  2643. REG_BH:
  2644. return Yrb
  2645. case REG_CL:
  2646. return Ycl
  2647. case REG_CX:
  2648. return Ycx
  2649. case REG_DX, REG_BX:
  2650. return Yrx
  2651. case REG_R8, // not really Yrl
  2652. REG_R9,
  2653. REG_R10,
  2654. REG_R11,
  2655. REG_R12,
  2656. REG_R13,
  2657. REG_R14,
  2658. REG_R15:
  2659. if ctxt.Arch.Family == sys.I386 {
  2660. return Yxxx
  2661. }
  2662. fallthrough
  2663. case REG_SP, REG_BP, REG_SI, REG_DI:
  2664. if ctxt.Arch.Family == sys.I386 {
  2665. return Yrl32
  2666. }
  2667. return Yrl
  2668. case REG_F0 + 0:
  2669. return Yf0
  2670. case REG_F0 + 1,
  2671. REG_F0 + 2,
  2672. REG_F0 + 3,
  2673. REG_F0 + 4,
  2674. REG_F0 + 5,
  2675. REG_F0 + 6,
  2676. REG_F0 + 7:
  2677. return Yrf
  2678. case REG_M0 + 0,
  2679. REG_M0 + 1,
  2680. REG_M0 + 2,
  2681. REG_M0 + 3,
  2682. REG_M0 + 4,
  2683. REG_M0 + 5,
  2684. REG_M0 + 6,
  2685. REG_M0 + 7:
  2686. return Ymr
  2687. case REG_X0:
  2688. return Yxr0
  2689. case REG_X0 + 1,
  2690. REG_X0 + 2,
  2691. REG_X0 + 3,
  2692. REG_X0 + 4,
  2693. REG_X0 + 5,
  2694. REG_X0 + 6,
  2695. REG_X0 + 7,
  2696. REG_X0 + 8,
  2697. REG_X0 + 9,
  2698. REG_X0 + 10,
  2699. REG_X0 + 11,
  2700. REG_X0 + 12,
  2701. REG_X0 + 13,
  2702. REG_X0 + 14,
  2703. REG_X0 + 15:
  2704. return Yxr
  2705. case REG_X0 + 16,
  2706. REG_X0 + 17,
  2707. REG_X0 + 18,
  2708. REG_X0 + 19,
  2709. REG_X0 + 20,
  2710. REG_X0 + 21,
  2711. REG_X0 + 22,
  2712. REG_X0 + 23,
  2713. REG_X0 + 24,
  2714. REG_X0 + 25,
  2715. REG_X0 + 26,
  2716. REG_X0 + 27,
  2717. REG_X0 + 28,
  2718. REG_X0 + 29,
  2719. REG_X0 + 30,
  2720. REG_X0 + 31:
  2721. return YxrEvex
  2722. case REG_Y0 + 0,
  2723. REG_Y0 + 1,
  2724. REG_Y0 + 2,
  2725. REG_Y0 + 3,
  2726. REG_Y0 + 4,
  2727. REG_Y0 + 5,
  2728. REG_Y0 + 6,
  2729. REG_Y0 + 7,
  2730. REG_Y0 + 8,
  2731. REG_Y0 + 9,
  2732. REG_Y0 + 10,
  2733. REG_Y0 + 11,
  2734. REG_Y0 + 12,
  2735. REG_Y0 + 13,
  2736. REG_Y0 + 14,
  2737. REG_Y0 + 15:
  2738. return Yyr
  2739. case REG_Y0 + 16,
  2740. REG_Y0 + 17,
  2741. REG_Y0 + 18,
  2742. REG_Y0 + 19,
  2743. REG_Y0 + 20,
  2744. REG_Y0 + 21,
  2745. REG_Y0 + 22,
  2746. REG_Y0 + 23,
  2747. REG_Y0 + 24,
  2748. REG_Y0 + 25,
  2749. REG_Y0 + 26,
  2750. REG_Y0 + 27,
  2751. REG_Y0 + 28,
  2752. REG_Y0 + 29,
  2753. REG_Y0 + 30,
  2754. REG_Y0 + 31:
  2755. return YyrEvex
  2756. case REG_Z0 + 0,
  2757. REG_Z0 + 1,
  2758. REG_Z0 + 2,
  2759. REG_Z0 + 3,
  2760. REG_Z0 + 4,
  2761. REG_Z0 + 5,
  2762. REG_Z0 + 6,
  2763. REG_Z0 + 7:
  2764. return Yzr
  2765. case REG_Z0 + 8,
  2766. REG_Z0 + 9,
  2767. REG_Z0 + 10,
  2768. REG_Z0 + 11,
  2769. REG_Z0 + 12,
  2770. REG_Z0 + 13,
  2771. REG_Z0 + 14,
  2772. REG_Z0 + 15,
  2773. REG_Z0 + 16,
  2774. REG_Z0 + 17,
  2775. REG_Z0 + 18,
  2776. REG_Z0 + 19,
  2777. REG_Z0 + 20,
  2778. REG_Z0 + 21,
  2779. REG_Z0 + 22,
  2780. REG_Z0 + 23,
  2781. REG_Z0 + 24,
  2782. REG_Z0 + 25,
  2783. REG_Z0 + 26,
  2784. REG_Z0 + 27,
  2785. REG_Z0 + 28,
  2786. REG_Z0 + 29,
  2787. REG_Z0 + 30,
  2788. REG_Z0 + 31:
  2789. if ctxt.Arch.Family == sys.I386 {
  2790. return Yxxx
  2791. }
  2792. return Yzr
  2793. case REG_K0:
  2794. return Yk0
  2795. case REG_K0 + 1,
  2796. REG_K0 + 2,
  2797. REG_K0 + 3,
  2798. REG_K0 + 4,
  2799. REG_K0 + 5,
  2800. REG_K0 + 6,
  2801. REG_K0 + 7:
  2802. return Yknot0
  2803. case REG_CS:
  2804. return Ycs
  2805. case REG_SS:
  2806. return Yss
  2807. case REG_DS:
  2808. return Yds
  2809. case REG_ES:
  2810. return Yes
  2811. case REG_FS:
  2812. return Yfs
  2813. case REG_GS:
  2814. return Ygs
  2815. case REG_TLS:
  2816. return Ytls
  2817. case REG_GDTR:
  2818. return Ygdtr
  2819. case REG_IDTR:
  2820. return Yidtr
  2821. case REG_LDTR:
  2822. return Yldtr
  2823. case REG_MSW:
  2824. return Ymsw
  2825. case REG_TASK:
  2826. return Ytask
  2827. case REG_CR + 0:
  2828. return Ycr0
  2829. case REG_CR + 1:
  2830. return Ycr1
  2831. case REG_CR + 2:
  2832. return Ycr2
  2833. case REG_CR + 3:
  2834. return Ycr3
  2835. case REG_CR + 4:
  2836. return Ycr4
  2837. case REG_CR + 5:
  2838. return Ycr5
  2839. case REG_CR + 6:
  2840. return Ycr6
  2841. case REG_CR + 7:
  2842. return Ycr7
  2843. case REG_CR + 8:
  2844. return Ycr8
  2845. case REG_DR + 0:
  2846. return Ydr0
  2847. case REG_DR + 1:
  2848. return Ydr1
  2849. case REG_DR + 2:
  2850. return Ydr2
  2851. case REG_DR + 3:
  2852. return Ydr3
  2853. case REG_DR + 4:
  2854. return Ydr4
  2855. case REG_DR + 5:
  2856. return Ydr5
  2857. case REG_DR + 6:
  2858. return Ydr6
  2859. case REG_DR + 7:
  2860. return Ydr7
  2861. case REG_TR + 0:
  2862. return Ytr0
  2863. case REG_TR + 1:
  2864. return Ytr1
  2865. case REG_TR + 2:
  2866. return Ytr2
  2867. case REG_TR + 3:
  2868. return Ytr3
  2869. case REG_TR + 4:
  2870. return Ytr4
  2871. case REG_TR + 5:
  2872. return Ytr5
  2873. case REG_TR + 6:
  2874. return Ytr6
  2875. case REG_TR + 7:
  2876. return Ytr7
  2877. }
  2878. return Yxxx
  2879. }
  2880. // AsmBuf is a simple buffer to assemble variable-length x86 instructions into
  2881. // and hold assembly state.
  2882. type AsmBuf struct {
  2883. buf [100]byte
  2884. off int
  2885. rexflag int
  2886. vexflag bool // Per inst: true for VEX-encoded
  2887. evexflag bool // Per inst: true for EVEX-encoded
  2888. rep bool
  2889. repn bool
  2890. lock bool
  2891. evex evexBits // Initialized when evexflag is true
  2892. }
  2893. // Put1 appends one byte to the end of the buffer.
  2894. func (ab *AsmBuf) Put1(x byte) {
  2895. ab.buf[ab.off] = x
  2896. ab.off++
  2897. }
  2898. // Put2 appends two bytes to the end of the buffer.
  2899. func (ab *AsmBuf) Put2(x, y byte) {
  2900. ab.buf[ab.off+0] = x
  2901. ab.buf[ab.off+1] = y
  2902. ab.off += 2
  2903. }
  2904. // Put3 appends three bytes to the end of the buffer.
  2905. func (ab *AsmBuf) Put3(x, y, z byte) {
  2906. ab.buf[ab.off+0] = x
  2907. ab.buf[ab.off+1] = y
  2908. ab.buf[ab.off+2] = z
  2909. ab.off += 3
  2910. }
  2911. // Put4 appends four bytes to the end of the buffer.
  2912. func (ab *AsmBuf) Put4(x, y, z, w byte) {
  2913. ab.buf[ab.off+0] = x
  2914. ab.buf[ab.off+1] = y
  2915. ab.buf[ab.off+2] = z
  2916. ab.buf[ab.off+3] = w
  2917. ab.off += 4
  2918. }
  2919. // PutInt16 writes v into the buffer using little-endian encoding.
  2920. func (ab *AsmBuf) PutInt16(v int16) {
  2921. ab.buf[ab.off+0] = byte(v)
  2922. ab.buf[ab.off+1] = byte(v >> 8)
  2923. ab.off += 2
  2924. }
  2925. // PutInt32 writes v into the buffer using little-endian encoding.
  2926. func (ab *AsmBuf) PutInt32(v int32) {
  2927. ab.buf[ab.off+0] = byte(v)
  2928. ab.buf[ab.off+1] = byte(v >> 8)
  2929. ab.buf[ab.off+2] = byte(v >> 16)
  2930. ab.buf[ab.off+3] = byte(v >> 24)
  2931. ab.off += 4
  2932. }
  2933. // PutInt64 writes v into the buffer using little-endian encoding.
  2934. func (ab *AsmBuf) PutInt64(v int64) {
  2935. ab.buf[ab.off+0] = byte(v)
  2936. ab.buf[ab.off+1] = byte(v >> 8)
  2937. ab.buf[ab.off+2] = byte(v >> 16)
  2938. ab.buf[ab.off+3] = byte(v >> 24)
  2939. ab.buf[ab.off+4] = byte(v >> 32)
  2940. ab.buf[ab.off+5] = byte(v >> 40)
  2941. ab.buf[ab.off+6] = byte(v >> 48)
  2942. ab.buf[ab.off+7] = byte(v >> 56)
  2943. ab.off += 8
  2944. }
  2945. // Put copies b into the buffer.
  2946. func (ab *AsmBuf) Put(b []byte) {
  2947. copy(ab.buf[ab.off:], b)
  2948. ab.off += len(b)
  2949. }
  2950. // PutOpBytesLit writes zero terminated sequence of bytes from op,
  2951. // starting at specified offset (e.g. z counter value).
  2952. // Trailing 0 is not written.
  2953. //
  2954. // Intended to be used for literal Z cases.
  2955. // Literal Z cases usually have "Zlit" in their name (Zlit, Zlitr_m, Zlitm_r).
  2956. func (ab *AsmBuf) PutOpBytesLit(offset int, op *opBytes) {
  2957. for int(op[offset]) != 0 {
  2958. ab.Put1(byte(op[offset]))
  2959. offset++
  2960. }
  2961. }
  2962. // Insert inserts b at offset i.
  2963. func (ab *AsmBuf) Insert(i int, b byte) {
  2964. ab.off++
  2965. copy(ab.buf[i+1:ab.off], ab.buf[i:ab.off-1])
  2966. ab.buf[i] = b
  2967. }
  2968. // Last returns the byte at the end of the buffer.
  2969. func (ab *AsmBuf) Last() byte { return ab.buf[ab.off-1] }
  2970. // Len returns the length of the buffer.
  2971. func (ab *AsmBuf) Len() int { return ab.off }
  2972. // Bytes returns the contents of the buffer.
  2973. func (ab *AsmBuf) Bytes() []byte { return ab.buf[:ab.off] }
  2974. // Reset empties the buffer.
  2975. func (ab *AsmBuf) Reset() { ab.off = 0 }
  2976. // At returns the byte at offset i.
  2977. func (ab *AsmBuf) At(i int) byte { return ab.buf[i] }
  2978. // asmidx emits SIB byte.
  2979. func (ab *AsmBuf) asmidx(ctxt *obj.Link, scale int, index int, base int) {
  2980. var i int
  2981. // X/Y index register is used in VSIB.
  2982. switch index {
  2983. default:
  2984. goto bad
  2985. case REG_NONE:
  2986. i = 4 << 3
  2987. goto bas
  2988. case REG_R8,
  2989. REG_R9,
  2990. REG_R10,
  2991. REG_R11,
  2992. REG_R12,
  2993. REG_R13,
  2994. REG_R14,
  2995. REG_R15,
  2996. REG_X8,
  2997. REG_X9,
  2998. REG_X10,
  2999. REG_X11,
  3000. REG_X12,
  3001. REG_X13,
  3002. REG_X14,
  3003. REG_X15,
  3004. REG_X16,
  3005. REG_X17,
  3006. REG_X18,
  3007. REG_X19,
  3008. REG_X20,
  3009. REG_X21,
  3010. REG_X22,
  3011. REG_X23,
  3012. REG_X24,
  3013. REG_X25,
  3014. REG_X26,
  3015. REG_X27,
  3016. REG_X28,
  3017. REG_X29,
  3018. REG_X30,
  3019. REG_X31,
  3020. REG_Y8,
  3021. REG_Y9,
  3022. REG_Y10,
  3023. REG_Y11,
  3024. REG_Y12,
  3025. REG_Y13,
  3026. REG_Y14,
  3027. REG_Y15,
  3028. REG_Y16,
  3029. REG_Y17,
  3030. REG_Y18,
  3031. REG_Y19,
  3032. REG_Y20,
  3033. REG_Y21,
  3034. REG_Y22,
  3035. REG_Y23,
  3036. REG_Y24,
  3037. REG_Y25,
  3038. REG_Y26,
  3039. REG_Y27,
  3040. REG_Y28,
  3041. REG_Y29,
  3042. REG_Y30,
  3043. REG_Y31,
  3044. REG_Z8,
  3045. REG_Z9,
  3046. REG_Z10,
  3047. REG_Z11,
  3048. REG_Z12,
  3049. REG_Z13,
  3050. REG_Z14,
  3051. REG_Z15,
  3052. REG_Z16,
  3053. REG_Z17,
  3054. REG_Z18,
  3055. REG_Z19,
  3056. REG_Z20,
  3057. REG_Z21,
  3058. REG_Z22,
  3059. REG_Z23,
  3060. REG_Z24,
  3061. REG_Z25,
  3062. REG_Z26,
  3063. REG_Z27,
  3064. REG_Z28,
  3065. REG_Z29,
  3066. REG_Z30,
  3067. REG_Z31:
  3068. if ctxt.Arch.Family == sys.I386 {
  3069. goto bad
  3070. }
  3071. fallthrough
  3072. case REG_AX,
  3073. REG_CX,
  3074. REG_DX,
  3075. REG_BX,
  3076. REG_BP,
  3077. REG_SI,
  3078. REG_DI,
  3079. REG_X0,
  3080. REG_X1,
  3081. REG_X2,
  3082. REG_X3,
  3083. REG_X4,
  3084. REG_X5,
  3085. REG_X6,
  3086. REG_X7,
  3087. REG_Y0,
  3088. REG_Y1,
  3089. REG_Y2,
  3090. REG_Y3,
  3091. REG_Y4,
  3092. REG_Y5,
  3093. REG_Y6,
  3094. REG_Y7,
  3095. REG_Z0,
  3096. REG_Z1,
  3097. REG_Z2,
  3098. REG_Z3,
  3099. REG_Z4,
  3100. REG_Z5,
  3101. REG_Z6,
  3102. REG_Z7:
  3103. i = reg[index] << 3
  3104. }
  3105. switch scale {
  3106. default:
  3107. goto bad
  3108. case 1:
  3109. break
  3110. case 2:
  3111. i |= 1 << 6
  3112. case 4:
  3113. i |= 2 << 6
  3114. case 8:
  3115. i |= 3 << 6
  3116. }
  3117. bas:
  3118. switch base {
  3119. default:
  3120. goto bad
  3121. case REG_NONE: // must be mod=00
  3122. i |= 5
  3123. case REG_R8,
  3124. REG_R9,
  3125. REG_R10,
  3126. REG_R11,
  3127. REG_R12,
  3128. REG_R13,
  3129. REG_R14,
  3130. REG_R15:
  3131. if ctxt.Arch.Family == sys.I386 {
  3132. goto bad
  3133. }
  3134. fallthrough
  3135. case REG_AX,
  3136. REG_CX,
  3137. REG_DX,
  3138. REG_BX,
  3139. REG_SP,
  3140. REG_BP,
  3141. REG_SI,
  3142. REG_DI:
  3143. i |= reg[base]
  3144. }
  3145. ab.Put1(byte(i))
  3146. return
  3147. bad:
  3148. ctxt.Diag("asmidx: bad address %d/%d/%d", scale, index, base)
  3149. ab.Put1(0)
  3150. }
  3151. func (ab *AsmBuf) relput4(ctxt *obj.Link, cursym *obj.LSym, p *obj.Prog, a *obj.Addr) {
  3152. var rel obj.Reloc
  3153. v := vaddr(ctxt, p, a, &rel)
  3154. if rel.Siz != 0 {
  3155. if rel.Siz != 4 {
  3156. ctxt.Diag("bad reloc")
  3157. }
  3158. r := obj.Addrel(cursym)
  3159. *r = rel
  3160. r.Off = int32(p.Pc + int64(ab.Len()))
  3161. }
  3162. ab.PutInt32(int32(v))
  3163. }
  3164. func vaddr(ctxt *obj.Link, p *obj.Prog, a *obj.Addr, r *obj.Reloc) int64 {
  3165. if r != nil {
  3166. *r = obj.Reloc{}
  3167. }
  3168. switch a.Name {
  3169. case obj.NAME_STATIC,
  3170. obj.NAME_GOTREF,
  3171. obj.NAME_EXTERN:
  3172. s := a.Sym
  3173. if r == nil {
  3174. ctxt.Diag("need reloc for %v", obj.Dconv(p, a))
  3175. log.Fatalf("reloc")
  3176. }
  3177. if a.Name == obj.NAME_GOTREF {
  3178. r.Siz = 4
  3179. r.Type = objabi.R_GOTPCREL
  3180. } else if useAbs(ctxt, s) {
  3181. r.Siz = 4
  3182. r.Type = objabi.R_ADDR
  3183. } else {
  3184. r.Siz = 4
  3185. r.Type = objabi.R_PCREL
  3186. }
  3187. r.Off = -1 // caller must fill in
  3188. r.Sym = s
  3189. r.Add = a.Offset
  3190. return 0
  3191. }
  3192. if (a.Type == obj.TYPE_MEM || a.Type == obj.TYPE_ADDR) && a.Reg == REG_TLS {
  3193. if r == nil {
  3194. ctxt.Diag("need reloc for %v", obj.Dconv(p, a))
  3195. log.Fatalf("reloc")
  3196. }
  3197. if !ctxt.Flag_shared || isAndroid || ctxt.Headtype == objabi.Hdarwin {
  3198. r.Type = objabi.R_TLS_LE
  3199. r.Siz = 4
  3200. r.Off = -1 // caller must fill in
  3201. r.Add = a.Offset
  3202. }
  3203. return 0
  3204. }
  3205. return a.Offset
  3206. }
  3207. func (ab *AsmBuf) asmandsz(ctxt *obj.Link, cursym *obj.LSym, p *obj.Prog, a *obj.Addr, r int, rex int, m64 int) {
  3208. var base int
  3209. var rel obj.Reloc
  3210. rex &= 0x40 | Rxr
  3211. if a.Offset != int64(int32(a.Offset)) {
  3212. // The rules are slightly different for 386 and AMD64,
  3213. // mostly for historical reasons. We may unify them later,
  3214. // but it must be discussed beforehand.
  3215. //
  3216. // For 64bit mode only LEAL is allowed to overflow.
  3217. // It's how https://golang.org/cl/59630 made it.
  3218. // crypto/sha1/sha1block_amd64.s depends on this feature.
  3219. //
  3220. // For 32bit mode rules are more permissive.
  3221. // If offset fits uint32, it's permitted.
  3222. // This is allowed for assembly that wants to use 32-bit hex
  3223. // constants, e.g. LEAL 0x99999999(AX), AX.
  3224. overflowOK := (ctxt.Arch.Family == sys.AMD64 && p.As == ALEAL) ||
  3225. (ctxt.Arch.Family != sys.AMD64 &&
  3226. int64(uint32(a.Offset)) == a.Offset &&
  3227. ab.rexflag&Rxw == 0)
  3228. if !overflowOK {
  3229. ctxt.Diag("offset too large in %s", p)
  3230. }
  3231. }
  3232. v := int32(a.Offset)
  3233. rel.Siz = 0
  3234. switch a.Type {
  3235. case obj.TYPE_ADDR:
  3236. if a.Name == obj.NAME_NONE {
  3237. ctxt.Diag("unexpected TYPE_ADDR with NAME_NONE")
  3238. }
  3239. if a.Index == REG_TLS {
  3240. ctxt.Diag("unexpected TYPE_ADDR with index==REG_TLS")
  3241. }
  3242. goto bad
  3243. case obj.TYPE_REG:
  3244. const regFirst = REG_AL
  3245. const regLast = REG_Z31
  3246. if a.Reg < regFirst || regLast < a.Reg {
  3247. goto bad
  3248. }
  3249. if v != 0 {
  3250. goto bad
  3251. }
  3252. ab.Put1(byte(3<<6 | reg[a.Reg]<<0 | r<<3))
  3253. ab.rexflag |= regrex[a.Reg]&(0x40|Rxb) | rex
  3254. return
  3255. }
  3256. if a.Type != obj.TYPE_MEM {
  3257. goto bad
  3258. }
  3259. if a.Index != REG_NONE && a.Index != REG_TLS {
  3260. base := int(a.Reg)
  3261. switch a.Name {
  3262. case obj.NAME_EXTERN,
  3263. obj.NAME_GOTREF,
  3264. obj.NAME_STATIC:
  3265. if !useAbs(ctxt, a.Sym) && ctxt.Arch.Family == sys.AMD64 {
  3266. goto bad
  3267. }
  3268. if ctxt.Arch.Family == sys.I386 && ctxt.Flag_shared {
  3269. // The base register has already been set. It holds the PC
  3270. // of this instruction returned by a PC-reading thunk.
  3271. // See obj6.go:rewriteToPcrel.
  3272. } else {
  3273. base = REG_NONE
  3274. }
  3275. v = int32(vaddr(ctxt, p, a, &rel))
  3276. case obj.NAME_AUTO,
  3277. obj.NAME_PARAM:
  3278. base = REG_SP
  3279. }
  3280. ab.rexflag |= regrex[int(a.Index)]&Rxx | regrex[base]&Rxb | rex
  3281. if base == REG_NONE {
  3282. ab.Put1(byte(0<<6 | 4<<0 | r<<3))
  3283. ab.asmidx(ctxt, int(a.Scale), int(a.Index), base)
  3284. goto putrelv
  3285. }
  3286. if v == 0 && rel.Siz == 0 && base != REG_BP && base != REG_R13 {
  3287. ab.Put1(byte(0<<6 | 4<<0 | r<<3))
  3288. ab.asmidx(ctxt, int(a.Scale), int(a.Index), base)
  3289. return
  3290. }
  3291. if disp8, ok := toDisp8(v, p, ab); ok && rel.Siz == 0 {
  3292. ab.Put1(byte(1<<6 | 4<<0 | r<<3))
  3293. ab.asmidx(ctxt, int(a.Scale), int(a.Index), base)
  3294. ab.Put1(disp8)
  3295. return
  3296. }
  3297. ab.Put1(byte(2<<6 | 4<<0 | r<<3))
  3298. ab.asmidx(ctxt, int(a.Scale), int(a.Index), base)
  3299. goto putrelv
  3300. }
  3301. base = int(a.Reg)
  3302. switch a.Name {
  3303. case obj.NAME_STATIC,
  3304. obj.NAME_GOTREF,
  3305. obj.NAME_EXTERN:
  3306. if a.Sym == nil {
  3307. ctxt.Diag("bad addr: %v", p)
  3308. }
  3309. if ctxt.Arch.Family == sys.I386 && ctxt.Flag_shared {
  3310. // The base register has already been set. It holds the PC
  3311. // of this instruction returned by a PC-reading thunk.
  3312. // See obj6.go:rewriteToPcrel.
  3313. } else {
  3314. base = REG_NONE
  3315. }
  3316. v = int32(vaddr(ctxt, p, a, &rel))
  3317. case obj.NAME_AUTO,
  3318. obj.NAME_PARAM:
  3319. base = REG_SP
  3320. }
  3321. if base == REG_TLS {
  3322. v = int32(vaddr(ctxt, p, a, &rel))
  3323. }
  3324. ab.rexflag |= regrex[base]&Rxb | rex
  3325. if base == REG_NONE || (REG_CS <= base && base <= REG_GS) || base == REG_TLS {
  3326. if (a.Sym == nil || !useAbs(ctxt, a.Sym)) && base == REG_NONE && (a.Name == obj.NAME_STATIC || a.Name == obj.NAME_EXTERN || a.Name == obj.NAME_GOTREF) || ctxt.Arch.Family != sys.AMD64 {
  3327. if a.Name == obj.NAME_GOTREF && (a.Offset != 0 || a.Index != 0 || a.Scale != 0) {
  3328. ctxt.Diag("%v has offset against gotref", p)
  3329. }
  3330. ab.Put1(byte(0<<6 | 5<<0 | r<<3))
  3331. goto putrelv
  3332. }
  3333. // temporary
  3334. ab.Put2(
  3335. byte(0<<6|4<<0|r<<3), // sib present
  3336. 0<<6|4<<3|5<<0, // DS:d32
  3337. )
  3338. goto putrelv
  3339. }
  3340. if base == REG_SP || base == REG_R12 {
  3341. if v == 0 {
  3342. ab.Put1(byte(0<<6 | reg[base]<<0 | r<<3))
  3343. ab.asmidx(ctxt, int(a.Scale), REG_NONE, base)
  3344. return
  3345. }
  3346. if disp8, ok := toDisp8(v, p, ab); ok {
  3347. ab.Put1(byte(1<<6 | reg[base]<<0 | r<<3))
  3348. ab.asmidx(ctxt, int(a.Scale), REG_NONE, base)
  3349. ab.Put1(disp8)
  3350. return
  3351. }
  3352. ab.Put1(byte(2<<6 | reg[base]<<0 | r<<3))
  3353. ab.asmidx(ctxt, int(a.Scale), REG_NONE, base)
  3354. goto putrelv
  3355. }
  3356. if REG_AX <= base && base <= REG_R15 {
  3357. if a.Index == REG_TLS && !ctxt.Flag_shared && !isAndroid {
  3358. rel = obj.Reloc{}
  3359. rel.Type = objabi.R_TLS_LE
  3360. rel.Siz = 4
  3361. rel.Sym = nil
  3362. rel.Add = int64(v)
  3363. v = 0
  3364. }
  3365. if v == 0 && rel.Siz == 0 && base != REG_BP && base != REG_R13 {
  3366. ab.Put1(byte(0<<6 | reg[base]<<0 | r<<3))
  3367. return
  3368. }
  3369. if disp8, ok := toDisp8(v, p, ab); ok && rel.Siz == 0 {
  3370. ab.Put2(byte(1<<6|reg[base]<<0|r<<3), disp8)
  3371. return
  3372. }
  3373. ab.Put1(byte(2<<6 | reg[base]<<0 | r<<3))
  3374. goto putrelv
  3375. }
  3376. goto bad
  3377. putrelv:
  3378. if rel.Siz != 0 {
  3379. if rel.Siz != 4 {
  3380. ctxt.Diag("bad rel")
  3381. goto bad
  3382. }
  3383. r := obj.Addrel(cursym)
  3384. *r = rel
  3385. r.Off = int32(p.Pc + int64(ab.Len()))
  3386. }
  3387. ab.PutInt32(v)
  3388. return
  3389. bad:
  3390. ctxt.Diag("asmand: bad address %v", obj.Dconv(p, a))
  3391. }
  3392. func (ab *AsmBuf) asmand(ctxt *obj.Link, cursym *obj.LSym, p *obj.Prog, a *obj.Addr, ra *obj.Addr) {
  3393. ab.asmandsz(ctxt, cursym, p, a, reg[ra.Reg], regrex[ra.Reg], 0)
  3394. }
  3395. func (ab *AsmBuf) asmando(ctxt *obj.Link, cursym *obj.LSym, p *obj.Prog, a *obj.Addr, o int) {
  3396. ab.asmandsz(ctxt, cursym, p, a, o, 0, 0)
  3397. }
  3398. func bytereg(a *obj.Addr, t *uint8) {
  3399. if a.Type == obj.TYPE_REG && a.Index == REG_NONE && (REG_AX <= a.Reg && a.Reg <= REG_R15) {
  3400. a.Reg += REG_AL - REG_AX
  3401. *t = 0
  3402. }
  3403. }
  3404. func unbytereg(a *obj.Addr, t *uint8) {
  3405. if a.Type == obj.TYPE_REG && a.Index == REG_NONE && (REG_AL <= a.Reg && a.Reg <= REG_R15B) {
  3406. a.Reg += REG_AX - REG_AL
  3407. *t = 0
  3408. }
  3409. }
  3410. const (
  3411. movLit uint8 = iota // Like Zlit
  3412. movRegMem
  3413. movMemReg
  3414. movRegMem2op
  3415. movMemReg2op
  3416. movFullPtr // Load full pointer, trash heap (unsupported)
  3417. movDoubleShift
  3418. movTLSReg
  3419. )
  3420. var ymovtab = []movtab{
  3421. // push
  3422. {APUSHL, Ycs, Ynone, Ynone, movLit, [4]uint8{0x0e, 0}},
  3423. {APUSHL, Yss, Ynone, Ynone, movLit, [4]uint8{0x16, 0}},
  3424. {APUSHL, Yds, Ynone, Ynone, movLit, [4]uint8{0x1e, 0}},
  3425. {APUSHL, Yes, Ynone, Ynone, movLit, [4]uint8{0x06, 0}},
  3426. {APUSHL, Yfs, Ynone, Ynone, movLit, [4]uint8{0x0f, 0xa0, 0}},
  3427. {APUSHL, Ygs, Ynone, Ynone, movLit, [4]uint8{0x0f, 0xa8, 0}},
  3428. {APUSHQ, Yfs, Ynone, Ynone, movLit, [4]uint8{0x0f, 0xa0, 0}},
  3429. {APUSHQ, Ygs, Ynone, Ynone, movLit, [4]uint8{0x0f, 0xa8, 0}},
  3430. {APUSHW, Ycs, Ynone, Ynone, movLit, [4]uint8{Pe, 0x0e, 0}},
  3431. {APUSHW, Yss, Ynone, Ynone, movLit, [4]uint8{Pe, 0x16, 0}},
  3432. {APUSHW, Yds, Ynone, Ynone, movLit, [4]uint8{Pe, 0x1e, 0}},
  3433. {APUSHW, Yes, Ynone, Ynone, movLit, [4]uint8{Pe, 0x06, 0}},
  3434. {APUSHW, Yfs, Ynone, Ynone, movLit, [4]uint8{Pe, 0x0f, 0xa0, 0}},
  3435. {APUSHW, Ygs, Ynone, Ynone, movLit, [4]uint8{Pe, 0x0f, 0xa8, 0}},
  3436. // pop
  3437. {APOPL, Ynone, Ynone, Yds, movLit, [4]uint8{0x1f, 0}},
  3438. {APOPL, Ynone, Ynone, Yes, movLit, [4]uint8{0x07, 0}},
  3439. {APOPL, Ynone, Ynone, Yss, movLit, [4]uint8{0x17, 0}},
  3440. {APOPL, Ynone, Ynone, Yfs, movLit, [4]uint8{0x0f, 0xa1, 0}},
  3441. {APOPL, Ynone, Ynone, Ygs, movLit, [4]uint8{0x0f, 0xa9, 0}},
  3442. {APOPQ, Ynone, Ynone, Yfs, movLit, [4]uint8{0x0f, 0xa1, 0}},
  3443. {APOPQ, Ynone, Ynone, Ygs, movLit, [4]uint8{0x0f, 0xa9, 0}},
  3444. {APOPW, Ynone, Ynone, Yds, movLit, [4]uint8{Pe, 0x1f, 0}},
  3445. {APOPW, Ynone, Ynone, Yes, movLit, [4]uint8{Pe, 0x07, 0}},
  3446. {APOPW, Ynone, Ynone, Yss, movLit, [4]uint8{Pe, 0x17, 0}},
  3447. {APOPW, Ynone, Ynone, Yfs, movLit, [4]uint8{Pe, 0x0f, 0xa1, 0}},
  3448. {APOPW, Ynone, Ynone, Ygs, movLit, [4]uint8{Pe, 0x0f, 0xa9, 0}},
  3449. // mov seg
  3450. {AMOVW, Yes, Ynone, Yml, movRegMem, [4]uint8{0x8c, 0, 0, 0}},
  3451. {AMOVW, Ycs, Ynone, Yml, movRegMem, [4]uint8{0x8c, 1, 0, 0}},
  3452. {AMOVW, Yss, Ynone, Yml, movRegMem, [4]uint8{0x8c, 2, 0, 0}},
  3453. {AMOVW, Yds, Ynone, Yml, movRegMem, [4]uint8{0x8c, 3, 0, 0}},
  3454. {AMOVW, Yfs, Ynone, Yml, movRegMem, [4]uint8{0x8c, 4, 0, 0}},
  3455. {AMOVW, Ygs, Ynone, Yml, movRegMem, [4]uint8{0x8c, 5, 0, 0}},
  3456. {AMOVW, Yml, Ynone, Yes, movMemReg, [4]uint8{0x8e, 0, 0, 0}},
  3457. {AMOVW, Yml, Ynone, Ycs, movMemReg, [4]uint8{0x8e, 1, 0, 0}},
  3458. {AMOVW, Yml, Ynone, Yss, movMemReg, [4]uint8{0x8e, 2, 0, 0}},
  3459. {AMOVW, Yml, Ynone, Yds, movMemReg, [4]uint8{0x8e, 3, 0, 0}},
  3460. {AMOVW, Yml, Ynone, Yfs, movMemReg, [4]uint8{0x8e, 4, 0, 0}},
  3461. {AMOVW, Yml, Ynone, Ygs, movMemReg, [4]uint8{0x8e, 5, 0, 0}},
  3462. // mov cr
  3463. {AMOVL, Ycr0, Ynone, Yrl, movRegMem2op, [4]uint8{0x0f, 0x20, 0, 0}},
  3464. {AMOVL, Ycr2, Ynone, Yrl, movRegMem2op, [4]uint8{0x0f, 0x20, 2, 0}},
  3465. {AMOVL, Ycr3, Ynone, Yrl, movRegMem2op, [4]uint8{0x0f, 0x20, 3, 0}},
  3466. {AMOVL, Ycr4, Ynone, Yrl, movRegMem2op, [4]uint8{0x0f, 0x20, 4, 0}},
  3467. {AMOVL, Ycr8, Ynone, Yrl, movRegMem2op, [4]uint8{0x0f, 0x20, 8, 0}},
  3468. {AMOVQ, Ycr0, Ynone, Yrl, movRegMem2op, [4]uint8{0x0f, 0x20, 0, 0}},
  3469. {AMOVQ, Ycr2, Ynone, Yrl, movRegMem2op, [4]uint8{0x0f, 0x20, 2, 0}},
  3470. {AMOVQ, Ycr3, Ynone, Yrl, movRegMem2op, [4]uint8{0x0f, 0x20, 3, 0}},
  3471. {AMOVQ, Ycr4, Ynone, Yrl, movRegMem2op, [4]uint8{0x0f, 0x20, 4, 0}},
  3472. {AMOVQ, Ycr8, Ynone, Yrl, movRegMem2op, [4]uint8{0x0f, 0x20, 8, 0}},
  3473. {AMOVL, Yrl, Ynone, Ycr0, movMemReg2op, [4]uint8{0x0f, 0x22, 0, 0}},
  3474. {AMOVL, Yrl, Ynone, Ycr2, movMemReg2op, [4]uint8{0x0f, 0x22, 2, 0}},
  3475. {AMOVL, Yrl, Ynone, Ycr3, movMemReg2op, [4]uint8{0x0f, 0x22, 3, 0}},
  3476. {AMOVL, Yrl, Ynone, Ycr4, movMemReg2op, [4]uint8{0x0f, 0x22, 4, 0}},
  3477. {AMOVL, Yrl, Ynone, Ycr8, movMemReg2op, [4]uint8{0x0f, 0x22, 8, 0}},
  3478. {AMOVQ, Yrl, Ynone, Ycr0, movMemReg2op, [4]uint8{0x0f, 0x22, 0, 0}},
  3479. {AMOVQ, Yrl, Ynone, Ycr2, movMemReg2op, [4]uint8{0x0f, 0x22, 2, 0}},
  3480. {AMOVQ, Yrl, Ynone, Ycr3, movMemReg2op, [4]uint8{0x0f, 0x22, 3, 0}},
  3481. {AMOVQ, Yrl, Ynone, Ycr4, movMemReg2op, [4]uint8{0x0f, 0x22, 4, 0}},
  3482. {AMOVQ, Yrl, Ynone, Ycr8, movMemReg2op, [4]uint8{0x0f, 0x22, 8, 0}},
  3483. // mov dr
  3484. {AMOVL, Ydr0, Ynone, Yrl, movRegMem2op, [4]uint8{0x0f, 0x21, 0, 0}},
  3485. {AMOVL, Ydr6, Ynone, Yrl, movRegMem2op, [4]uint8{0x0f, 0x21, 6, 0}},
  3486. {AMOVL, Ydr7, Ynone, Yrl, movRegMem2op, [4]uint8{0x0f, 0x21, 7, 0}},
  3487. {AMOVQ, Ydr0, Ynone, Yrl, movRegMem2op, [4]uint8{0x0f, 0x21, 0, 0}},
  3488. {AMOVQ, Ydr2, Ynone, Yrl, movRegMem2op, [4]uint8{0x0f, 0x21, 2, 0}},
  3489. {AMOVQ, Ydr3, Ynone, Yrl, movRegMem2op, [4]uint8{0x0f, 0x21, 3, 0}},
  3490. {AMOVQ, Ydr6, Ynone, Yrl, movRegMem2op, [4]uint8{0x0f, 0x21, 6, 0}},
  3491. {AMOVQ, Ydr7, Ynone, Yrl, movRegMem2op, [4]uint8{0x0f, 0x21, 7, 0}},
  3492. {AMOVL, Yrl, Ynone, Ydr0, movMemReg2op, [4]uint8{0x0f, 0x23, 0, 0}},
  3493. {AMOVL, Yrl, Ynone, Ydr6, movMemReg2op, [4]uint8{0x0f, 0x23, 6, 0}},
  3494. {AMOVL, Yrl, Ynone, Ydr7, movMemReg2op, [4]uint8{0x0f, 0x23, 7, 0}},
  3495. {AMOVQ, Yrl, Ynone, Ydr0, movMemReg2op, [4]uint8{0x0f, 0x23, 0, 0}},
  3496. {AMOVQ, Yrl, Ynone, Ydr2, movMemReg2op, [4]uint8{0x0f, 0x23, 2, 0}},
  3497. {AMOVQ, Yrl, Ynone, Ydr3, movMemReg2op, [4]uint8{0x0f, 0x23, 3, 0}},
  3498. {AMOVQ, Yrl, Ynone, Ydr6, movMemReg2op, [4]uint8{0x0f, 0x23, 6, 0}},
  3499. {AMOVQ, Yrl, Ynone, Ydr7, movMemReg2op, [4]uint8{0x0f, 0x23, 7, 0}},
  3500. // mov tr
  3501. {AMOVL, Ytr6, Ynone, Yml, movRegMem2op, [4]uint8{0x0f, 0x24, 6, 0}},
  3502. {AMOVL, Ytr7, Ynone, Yml, movRegMem2op, [4]uint8{0x0f, 0x24, 7, 0}},
  3503. {AMOVL, Yml, Ynone, Ytr6, movMemReg2op, [4]uint8{0x0f, 0x26, 6, 0xff}},
  3504. {AMOVL, Yml, Ynone, Ytr7, movMemReg2op, [4]uint8{0x0f, 0x26, 7, 0xff}},
  3505. // lgdt, sgdt, lidt, sidt
  3506. {AMOVL, Ym, Ynone, Ygdtr, movMemReg2op, [4]uint8{0x0f, 0x01, 2, 0}},
  3507. {AMOVL, Ygdtr, Ynone, Ym, movRegMem2op, [4]uint8{0x0f, 0x01, 0, 0}},
  3508. {AMOVL, Ym, Ynone, Yidtr, movMemReg2op, [4]uint8{0x0f, 0x01, 3, 0}},
  3509. {AMOVL, Yidtr, Ynone, Ym, movRegMem2op, [4]uint8{0x0f, 0x01, 1, 0}},
  3510. {AMOVQ, Ym, Ynone, Ygdtr, movMemReg2op, [4]uint8{0x0f, 0x01, 2, 0}},
  3511. {AMOVQ, Ygdtr, Ynone, Ym, movRegMem2op, [4]uint8{0x0f, 0x01, 0, 0}},
  3512. {AMOVQ, Ym, Ynone, Yidtr, movMemReg2op, [4]uint8{0x0f, 0x01, 3, 0}},
  3513. {AMOVQ, Yidtr, Ynone, Ym, movRegMem2op, [4]uint8{0x0f, 0x01, 1, 0}},
  3514. // lldt, sldt
  3515. {AMOVW, Yml, Ynone, Yldtr, movMemReg2op, [4]uint8{0x0f, 0x00, 2, 0}},
  3516. {AMOVW, Yldtr, Ynone, Yml, movRegMem2op, [4]uint8{0x0f, 0x00, 0, 0}},
  3517. // lmsw, smsw
  3518. {AMOVW, Yml, Ynone, Ymsw, movMemReg2op, [4]uint8{0x0f, 0x01, 6, 0}},
  3519. {AMOVW, Ymsw, Ynone, Yml, movRegMem2op, [4]uint8{0x0f, 0x01, 4, 0}},
  3520. // ltr, str
  3521. {AMOVW, Yml, Ynone, Ytask, movMemReg2op, [4]uint8{0x0f, 0x00, 3, 0}},
  3522. {AMOVW, Ytask, Ynone, Yml, movRegMem2op, [4]uint8{0x0f, 0x00, 1, 0}},
  3523. /* load full pointer - unsupported
  3524. {AMOVL, Yml, Ycol, movFullPtr, [4]uint8{0, 0, 0, 0}},
  3525. {AMOVW, Yml, Ycol, movFullPtr, [4]uint8{Pe, 0, 0, 0}},
  3526. */
  3527. // double shift
  3528. {ASHLL, Yi8, Yrl, Yml, movDoubleShift, [4]uint8{0xa4, 0xa5, 0, 0}},
  3529. {ASHLL, Ycl, Yrl, Yml, movDoubleShift, [4]uint8{0xa4, 0xa5, 0, 0}},
  3530. {ASHLL, Ycx, Yrl, Yml, movDoubleShift, [4]uint8{0xa4, 0xa5, 0, 0}},
  3531. {ASHRL, Yi8, Yrl, Yml, movDoubleShift, [4]uint8{0xac, 0xad, 0, 0}},
  3532. {ASHRL, Ycl, Yrl, Yml, movDoubleShift, [4]uint8{0xac, 0xad, 0, 0}},
  3533. {ASHRL, Ycx, Yrl, Yml, movDoubleShift, [4]uint8{0xac, 0xad, 0, 0}},
  3534. {ASHLQ, Yi8, Yrl, Yml, movDoubleShift, [4]uint8{Pw, 0xa4, 0xa5, 0}},
  3535. {ASHLQ, Ycl, Yrl, Yml, movDoubleShift, [4]uint8{Pw, 0xa4, 0xa5, 0}},
  3536. {ASHLQ, Ycx, Yrl, Yml, movDoubleShift, [4]uint8{Pw, 0xa4, 0xa5, 0}},
  3537. {ASHRQ, Yi8, Yrl, Yml, movDoubleShift, [4]uint8{Pw, 0xac, 0xad, 0}},
  3538. {ASHRQ, Ycl, Yrl, Yml, movDoubleShift, [4]uint8{Pw, 0xac, 0xad, 0}},
  3539. {ASHRQ, Ycx, Yrl, Yml, movDoubleShift, [4]uint8{Pw, 0xac, 0xad, 0}},
  3540. {ASHLW, Yi8, Yrl, Yml, movDoubleShift, [4]uint8{Pe, 0xa4, 0xa5, 0}},
  3541. {ASHLW, Ycl, Yrl, Yml, movDoubleShift, [4]uint8{Pe, 0xa4, 0xa5, 0}},
  3542. {ASHLW, Ycx, Yrl, Yml, movDoubleShift, [4]uint8{Pe, 0xa4, 0xa5, 0}},
  3543. {ASHRW, Yi8, Yrl, Yml, movDoubleShift, [4]uint8{Pe, 0xac, 0xad, 0}},
  3544. {ASHRW, Ycl, Yrl, Yml, movDoubleShift, [4]uint8{Pe, 0xac, 0xad, 0}},
  3545. {ASHRW, Ycx, Yrl, Yml, movDoubleShift, [4]uint8{Pe, 0xac, 0xad, 0}},
  3546. // load TLS base
  3547. {AMOVL, Ytls, Ynone, Yrl, movTLSReg, [4]uint8{0, 0, 0, 0}},
  3548. {AMOVQ, Ytls, Ynone, Yrl, movTLSReg, [4]uint8{0, 0, 0, 0}},
  3549. {0, 0, 0, 0, 0, [4]uint8{}},
  3550. }
  3551. func isax(a *obj.Addr) bool {
  3552. switch a.Reg {
  3553. case REG_AX, REG_AL, REG_AH:
  3554. return true
  3555. }
  3556. if a.Index == REG_AX {
  3557. return true
  3558. }
  3559. return false
  3560. }
  3561. func subreg(p *obj.Prog, from int, to int) {
  3562. if false { /* debug['Q'] */
  3563. fmt.Printf("\n%v\ts/%v/%v/\n", p, rconv(from), rconv(to))
  3564. }
  3565. if int(p.From.Reg) == from {
  3566. p.From.Reg = int16(to)
  3567. p.Ft = 0
  3568. }
  3569. if int(p.To.Reg) == from {
  3570. p.To.Reg = int16(to)
  3571. p.Tt = 0
  3572. }
  3573. if int(p.From.Index) == from {
  3574. p.From.Index = int16(to)
  3575. p.Ft = 0
  3576. }
  3577. if int(p.To.Index) == from {
  3578. p.To.Index = int16(to)
  3579. p.Tt = 0
  3580. }
  3581. if false { /* debug['Q'] */
  3582. fmt.Printf("%v\n", p)
  3583. }
  3584. }
  3585. func (ab *AsmBuf) mediaop(ctxt *obj.Link, o *Optab, op int, osize int, z int) int {
  3586. switch op {
  3587. case Pm, Pe, Pf2, Pf3:
  3588. if osize != 1 {
  3589. if op != Pm {
  3590. ab.Put1(byte(op))
  3591. }
  3592. ab.Put1(Pm)
  3593. z++
  3594. op = int(o.op[z])
  3595. break
  3596. }
  3597. fallthrough
  3598. default:
  3599. if ab.Len() == 0 || ab.Last() != Pm {
  3600. ab.Put1(Pm)
  3601. }
  3602. }
  3603. ab.Put1(byte(op))
  3604. return z
  3605. }
  3606. var bpduff1 = []byte{
  3607. 0x48, 0x89, 0x6c, 0x24, 0xf0, // MOVQ BP, -16(SP)
  3608. 0x48, 0x8d, 0x6c, 0x24, 0xf0, // LEAQ -16(SP), BP
  3609. }
  3610. var bpduff2 = []byte{
  3611. 0x48, 0x8b, 0x6d, 0x00, // MOVQ 0(BP), BP
  3612. }
  3613. // asmevex emits EVEX pregis and opcode byte.
  3614. // In addition to asmvex r/m, vvvv and reg fields also requires optional
  3615. // K-masking register.
  3616. //
  3617. // Expects asmbuf.evex to be properly initialized.
  3618. func (ab *AsmBuf) asmevex(ctxt *obj.Link, p *obj.Prog, rm, v, r, k *obj.Addr) {
  3619. ab.evexflag = true
  3620. evex := ab.evex
  3621. rexR := byte(1)
  3622. evexR := byte(1)
  3623. rexX := byte(1)
  3624. rexB := byte(1)
  3625. if r != nil {
  3626. if regrex[r.Reg]&Rxr != 0 {
  3627. rexR = 0 // "ModR/M.reg" selector 4th bit.
  3628. }
  3629. if regrex[r.Reg]&RxrEvex != 0 {
  3630. evexR = 0 // "ModR/M.reg" selector 5th bit.
  3631. }
  3632. }
  3633. if rm != nil {
  3634. if rm.Index == REG_NONE && regrex[rm.Reg]&RxrEvex != 0 {
  3635. rexX = 0
  3636. } else if regrex[rm.Index]&Rxx != 0 {
  3637. rexX = 0
  3638. }
  3639. if regrex[rm.Reg]&Rxb != 0 {
  3640. rexB = 0
  3641. }
  3642. }
  3643. // P0 = [R][X][B][R'][00][mm]
  3644. p0 := (rexR << 7) |
  3645. (rexX << 6) |
  3646. (rexB << 5) |
  3647. (evexR << 4) |
  3648. (0 << 2) |
  3649. (evex.M() << 0)
  3650. vexV := byte(0)
  3651. if v != nil {
  3652. // 4bit-wide reg index.
  3653. vexV = byte(reg[v.Reg]|(regrex[v.Reg]&Rxr)<<1) & 0xF
  3654. }
  3655. vexV ^= 0x0F
  3656. // P1 = [W][vvvv][1][pp]
  3657. p1 := (evex.W() << 7) |
  3658. (vexV << 3) |
  3659. (1 << 2) |
  3660. (evex.P() << 0)
  3661. suffix := evexSuffixMap[p.Scond]
  3662. evexZ := byte(0)
  3663. evexLL := evex.L()
  3664. evexB := byte(0)
  3665. evexV := byte(1)
  3666. evexA := byte(0)
  3667. if suffix.zeroing {
  3668. if !evex.ZeroingEnabled() {
  3669. ctxt.Diag("unsupported zeroing: %v", p)
  3670. }
  3671. evexZ = 1
  3672. }
  3673. switch {
  3674. case suffix.rounding != rcUnset:
  3675. if rm != nil && rm.Type == obj.TYPE_MEM {
  3676. ctxt.Diag("illegal rounding with memory argument: %v", p)
  3677. } else if !evex.RoundingEnabled() {
  3678. ctxt.Diag("unsupported rounding: %v", p)
  3679. }
  3680. evexB = 1
  3681. evexLL = suffix.rounding
  3682. case suffix.broadcast:
  3683. if rm == nil || rm.Type != obj.TYPE_MEM {
  3684. ctxt.Diag("illegal broadcast without memory argument: %v", p)
  3685. } else if !evex.BroadcastEnabled() {
  3686. ctxt.Diag("unsupported broadcast: %v", p)
  3687. }
  3688. evexB = 1
  3689. case suffix.sae:
  3690. if rm != nil && rm.Type == obj.TYPE_MEM {
  3691. ctxt.Diag("illegal SAE with memory argument: %v", p)
  3692. } else if !evex.SaeEnabled() {
  3693. ctxt.Diag("unsupported SAE: %v", p)
  3694. }
  3695. evexB = 1
  3696. }
  3697. if rm != nil && regrex[rm.Index]&RxrEvex != 0 {
  3698. evexV = 0
  3699. } else if v != nil && regrex[v.Reg]&RxrEvex != 0 {
  3700. evexV = 0 // VSR selector 5th bit.
  3701. }
  3702. if k != nil {
  3703. evexA = byte(reg[k.Reg])
  3704. }
  3705. // P2 = [z][L'L][b][V'][aaa]
  3706. p2 := (evexZ << 7) |
  3707. (evexLL << 5) |
  3708. (evexB << 4) |
  3709. (evexV << 3) |
  3710. (evexA << 0)
  3711. const evexEscapeByte = 0x62
  3712. ab.Put4(evexEscapeByte, p0, p1, p2)
  3713. ab.Put1(evex.opcode)
  3714. }
  3715. // Emit VEX prefix and opcode byte.
  3716. // The three addresses are the r/m, vvvv, and reg fields.
  3717. // The reg and rm arguments appear in the same order as the
  3718. // arguments to asmand, which typically follows the call to asmvex.
  3719. // The final two arguments are the VEX prefix (see encoding above)
  3720. // and the opcode byte.
  3721. // For details about vex prefix see:
  3722. // https://en.wikipedia.org/wiki/VEX_prefix#Technical_description
  3723. func (ab *AsmBuf) asmvex(ctxt *obj.Link, rm, v, r *obj.Addr, vex, opcode uint8) {
  3724. ab.vexflag = true
  3725. rexR := 0
  3726. if r != nil {
  3727. rexR = regrex[r.Reg] & Rxr
  3728. }
  3729. rexB := 0
  3730. rexX := 0
  3731. if rm != nil {
  3732. rexB = regrex[rm.Reg] & Rxb
  3733. rexX = regrex[rm.Index] & Rxx
  3734. }
  3735. vexM := (vex >> 3) & 0x7
  3736. vexWLP := vex & 0x87
  3737. vexV := byte(0)
  3738. if v != nil {
  3739. vexV = byte(reg[v.Reg]|(regrex[v.Reg]&Rxr)<<1) & 0xF
  3740. }
  3741. vexV ^= 0xF
  3742. if vexM == 1 && (rexX|rexB) == 0 && vex&vexW1 == 0 {
  3743. // Can use 2-byte encoding.
  3744. ab.Put2(0xc5, byte(rexR<<5)^0x80|vexV<<3|vexWLP)
  3745. } else {
  3746. // Must use 3-byte encoding.
  3747. ab.Put3(0xc4,
  3748. (byte(rexR|rexX|rexB)<<5)^0xE0|vexM,
  3749. vexV<<3|vexWLP,
  3750. )
  3751. }
  3752. ab.Put1(opcode)
  3753. }
  3754. // regIndex returns register index that fits in 5 bits.
  3755. //
  3756. // R : 3 bit | legacy instructions | N/A
  3757. // [R/V]EX.R : 1 bit | REX / VEX extension bit | Rxr
  3758. // EVEX.R : 1 bit | EVEX extension bit | RxrEvex
  3759. //
  3760. // Examples:
  3761. // REG_Z30 => 30
  3762. // REG_X15 => 15
  3763. // REG_R9 => 9
  3764. // REG_AX => 0
  3765. //
  3766. func regIndex(r int16) int {
  3767. lower3bits := reg[r]
  3768. high4bit := regrex[r] & Rxr << 1
  3769. high5bit := regrex[r] & RxrEvex << 0
  3770. return lower3bits | high4bit | high5bit
  3771. }
  3772. // avx2gatherValid reports whether p satisfies AVX2 gather constraints.
  3773. // Reports errors via ctxt.
  3774. func avx2gatherValid(ctxt *obj.Link, p *obj.Prog) bool {
  3775. // If any pair of the index, mask, or destination registers
  3776. // are the same, illegal instruction trap (#UD) is triggered.
  3777. index := regIndex(p.GetFrom3().Index)
  3778. mask := regIndex(p.From.Reg)
  3779. dest := regIndex(p.To.Reg)
  3780. if dest == mask || dest == index || mask == index {
  3781. ctxt.Diag("mask, index, and destination registers should be distinct: %v", p)
  3782. return false
  3783. }
  3784. return true
  3785. }
  3786. // avx512gatherValid reports whether p satisfies AVX512 gather constraints.
  3787. // Reports errors via ctxt.
  3788. func avx512gatherValid(ctxt *obj.Link, p *obj.Prog) bool {
  3789. // Illegal instruction trap (#UD) is triggered if the destination vector
  3790. // register is the same as index vector in VSIB.
  3791. index := regIndex(p.From.Index)
  3792. dest := regIndex(p.To.Reg)
  3793. if dest == index {
  3794. ctxt.Diag("index and destination registers should be distinct: %v", p)
  3795. return false
  3796. }
  3797. return true
  3798. }
  3799. func (ab *AsmBuf) doasm(ctxt *obj.Link, cursym *obj.LSym, p *obj.Prog) {
  3800. o := opindex[p.As&obj.AMask]
  3801. if o == nil {
  3802. ctxt.Diag("asmins: missing op %v", p)
  3803. return
  3804. }
  3805. if pre := prefixof(ctxt, &p.From); pre != 0 {
  3806. ab.Put1(byte(pre))
  3807. }
  3808. if pre := prefixof(ctxt, &p.To); pre != 0 {
  3809. ab.Put1(byte(pre))
  3810. }
  3811. // Checks to warn about instruction/arguments combinations that
  3812. // will unconditionally trigger illegal instruction trap (#UD).
  3813. switch p.As {
  3814. case AVGATHERDPD,
  3815. AVGATHERQPD,
  3816. AVGATHERDPS,
  3817. AVGATHERQPS,
  3818. AVPGATHERDD,
  3819. AVPGATHERQD,
  3820. AVPGATHERDQ,
  3821. AVPGATHERQQ:
  3822. // AVX512 gather requires explicit K mask.
  3823. if p.GetFrom3().Reg >= REG_K0 && p.GetFrom3().Reg <= REG_K7 {
  3824. if !avx512gatherValid(ctxt, p) {
  3825. return
  3826. }
  3827. } else {
  3828. if !avx2gatherValid(ctxt, p) {
  3829. return
  3830. }
  3831. }
  3832. }
  3833. if p.Ft == 0 {
  3834. p.Ft = uint8(oclass(ctxt, p, &p.From))
  3835. }
  3836. if p.Tt == 0 {
  3837. p.Tt = uint8(oclass(ctxt, p, &p.To))
  3838. }
  3839. ft := int(p.Ft) * Ymax
  3840. var f3t int
  3841. tt := int(p.Tt) * Ymax
  3842. xo := obj.Bool2int(o.op[0] == 0x0f)
  3843. z := 0
  3844. var a *obj.Addr
  3845. var l int
  3846. var op int
  3847. var q *obj.Prog
  3848. var r *obj.Reloc
  3849. var rel obj.Reloc
  3850. var v int64
  3851. args := make([]int, 0, argListMax)
  3852. if ft != Ynone*Ymax {
  3853. args = append(args, ft)
  3854. }
  3855. for i := range p.RestArgs {
  3856. args = append(args, oclass(ctxt, p, &p.RestArgs[i])*Ymax)
  3857. }
  3858. if tt != Ynone*Ymax {
  3859. args = append(args, tt)
  3860. }
  3861. for _, yt := range o.ytab {
  3862. // ytab matching is purely args-based,
  3863. // but AVX512 suffixes like "Z" or "RU_SAE" will
  3864. // add EVEX-only filter that will reject non-EVEX matches.
  3865. //
  3866. // Consider "VADDPD.BCST 2032(DX), X0, X0".
  3867. // Without this rule, operands will lead to VEX-encoded form
  3868. // and produce "c5b15813" encoding.
  3869. if !yt.match(args) {
  3870. // "xo" is always zero for VEX/EVEX encoded insts.
  3871. z += int(yt.zoffset) + xo
  3872. } else {
  3873. if p.Scond != 0 && !evexZcase(yt.zcase) {
  3874. // Do not signal error and continue to search
  3875. // for matching EVEX-encoded form.
  3876. z += int(yt.zoffset)
  3877. continue
  3878. }
  3879. switch o.prefix {
  3880. case Px1: // first option valid only in 32-bit mode
  3881. if ctxt.Arch.Family == sys.AMD64 && z == 0 {
  3882. z += int(yt.zoffset) + xo
  3883. continue
  3884. }
  3885. case Pq: // 16 bit escape and opcode escape
  3886. ab.Put2(Pe, Pm)
  3887. case Pq3: // 16 bit escape and opcode escape + REX.W
  3888. ab.rexflag |= Pw
  3889. ab.Put2(Pe, Pm)
  3890. case Pq4: // 66 0F 38
  3891. ab.Put3(0x66, 0x0F, 0x38)
  3892. case Pq4w: // 66 0F 38 + REX.W
  3893. ab.rexflag |= Pw
  3894. ab.Put3(0x66, 0x0F, 0x38)
  3895. case Pq5: // F3 0F 38
  3896. ab.Put3(0xF3, 0x0F, 0x38)
  3897. case Pq5w: // F3 0F 38 + REX.W
  3898. ab.rexflag |= Pw
  3899. ab.Put3(0xF3, 0x0F, 0x38)
  3900. case Pf2, // xmm opcode escape
  3901. Pf3:
  3902. ab.Put2(o.prefix, Pm)
  3903. case Pef3:
  3904. ab.Put3(Pe, Pf3, Pm)
  3905. case Pfw: // xmm opcode escape + REX.W
  3906. ab.rexflag |= Pw
  3907. ab.Put2(Pf3, Pm)
  3908. case Pm: // opcode escape
  3909. ab.Put1(Pm)
  3910. case Pe: // 16 bit escape
  3911. ab.Put1(Pe)
  3912. case Pw: // 64-bit escape
  3913. if ctxt.Arch.Family != sys.AMD64 {
  3914. ctxt.Diag("asmins: illegal 64: %v", p)
  3915. }
  3916. ab.rexflag |= Pw
  3917. case Pw8: // 64-bit escape if z >= 8
  3918. if z >= 8 {
  3919. if ctxt.Arch.Family != sys.AMD64 {
  3920. ctxt.Diag("asmins: illegal 64: %v", p)
  3921. }
  3922. ab.rexflag |= Pw
  3923. }
  3924. case Pb: // botch
  3925. if ctxt.Arch.Family != sys.AMD64 && (isbadbyte(&p.From) || isbadbyte(&p.To)) {
  3926. goto bad
  3927. }
  3928. // NOTE(rsc): This is probably safe to do always,
  3929. // but when enabled it chooses different encodings
  3930. // than the old cmd/internal/obj/i386 code did,
  3931. // which breaks our "same bits out" checks.
  3932. // In particular, CMPB AX, $0 encodes as 80 f8 00
  3933. // in the original obj/i386, and it would encode
  3934. // (using a valid, shorter form) as 3c 00 if we enabled
  3935. // the call to bytereg here.
  3936. if ctxt.Arch.Family == sys.AMD64 {
  3937. bytereg(&p.From, &p.Ft)
  3938. bytereg(&p.To, &p.Tt)
  3939. }
  3940. case P32: // 32 bit but illegal if 64-bit mode
  3941. if ctxt.Arch.Family == sys.AMD64 {
  3942. ctxt.Diag("asmins: illegal in 64-bit mode: %v", p)
  3943. }
  3944. case Py: // 64-bit only, no prefix
  3945. if ctxt.Arch.Family != sys.AMD64 {
  3946. ctxt.Diag("asmins: illegal in %d-bit mode: %v", ctxt.Arch.RegSize*8, p)
  3947. }
  3948. case Py1: // 64-bit only if z < 1, no prefix
  3949. if z < 1 && ctxt.Arch.Family != sys.AMD64 {
  3950. ctxt.Diag("asmins: illegal in %d-bit mode: %v", ctxt.Arch.RegSize*8, p)
  3951. }
  3952. case Py3: // 64-bit only if z < 3, no prefix
  3953. if z < 3 && ctxt.Arch.Family != sys.AMD64 {
  3954. ctxt.Diag("asmins: illegal in %d-bit mode: %v", ctxt.Arch.RegSize*8, p)
  3955. }
  3956. }
  3957. if z >= len(o.op) {
  3958. log.Fatalf("asmins bad table %v", p)
  3959. }
  3960. op = int(o.op[z])
  3961. if op == 0x0f {
  3962. ab.Put1(byte(op))
  3963. z++
  3964. op = int(o.op[z])
  3965. }
  3966. switch yt.zcase {
  3967. default:
  3968. ctxt.Diag("asmins: unknown z %d %v", yt.zcase, p)
  3969. return
  3970. case Zpseudo:
  3971. break
  3972. case Zlit:
  3973. ab.PutOpBytesLit(z, &o.op)
  3974. case Zlitr_m:
  3975. ab.PutOpBytesLit(z, &o.op)
  3976. ab.asmand(ctxt, cursym, p, &p.To, &p.From)
  3977. case Zlitm_r:
  3978. ab.PutOpBytesLit(z, &o.op)
  3979. ab.asmand(ctxt, cursym, p, &p.From, &p.To)
  3980. case Zlit_m_r:
  3981. ab.PutOpBytesLit(z, &o.op)
  3982. ab.asmand(ctxt, cursym, p, p.GetFrom3(), &p.To)
  3983. case Zmb_r:
  3984. bytereg(&p.From, &p.Ft)
  3985. fallthrough
  3986. case Zm_r:
  3987. ab.Put1(byte(op))
  3988. ab.asmand(ctxt, cursym, p, &p.From, &p.To)
  3989. case Z_m_r:
  3990. ab.Put1(byte(op))
  3991. ab.asmand(ctxt, cursym, p, p.GetFrom3(), &p.To)
  3992. case Zm2_r:
  3993. ab.Put2(byte(op), o.op[z+1])
  3994. ab.asmand(ctxt, cursym, p, &p.From, &p.To)
  3995. case Zm_r_xm:
  3996. ab.mediaop(ctxt, o, op, int(yt.zoffset), z)
  3997. ab.asmand(ctxt, cursym, p, &p.From, &p.To)
  3998. case Zm_r_xm_nr:
  3999. ab.rexflag = 0
  4000. ab.mediaop(ctxt, o, op, int(yt.zoffset), z)
  4001. ab.asmand(ctxt, cursym, p, &p.From, &p.To)
  4002. case Zm_r_i_xm:
  4003. ab.mediaop(ctxt, o, op, int(yt.zoffset), z)
  4004. ab.asmand(ctxt, cursym, p, &p.From, p.GetFrom3())
  4005. ab.Put1(byte(p.To.Offset))
  4006. case Zibm_r, Zibr_m:
  4007. ab.PutOpBytesLit(z, &o.op)
  4008. if yt.zcase == Zibr_m {
  4009. ab.asmand(ctxt, cursym, p, &p.To, p.GetFrom3())
  4010. } else {
  4011. ab.asmand(ctxt, cursym, p, p.GetFrom3(), &p.To)
  4012. }
  4013. switch {
  4014. default:
  4015. ab.Put1(byte(p.From.Offset))
  4016. case yt.args[0] == Yi32 && o.prefix == Pe:
  4017. ab.PutInt16(int16(p.From.Offset))
  4018. case yt.args[0] == Yi32:
  4019. ab.PutInt32(int32(p.From.Offset))
  4020. }
  4021. case Zaut_r:
  4022. ab.Put1(0x8d) // leal
  4023. if p.From.Type != obj.TYPE_ADDR {
  4024. ctxt.Diag("asmins: Zaut sb type ADDR")
  4025. }
  4026. p.From.Type = obj.TYPE_MEM
  4027. ab.asmand(ctxt, cursym, p, &p.From, &p.To)
  4028. p.From.Type = obj.TYPE_ADDR
  4029. case Zm_o:
  4030. ab.Put1(byte(op))
  4031. ab.asmando(ctxt, cursym, p, &p.From, int(o.op[z+1]))
  4032. case Zr_m:
  4033. ab.Put1(byte(op))
  4034. ab.asmand(ctxt, cursym, p, &p.To, &p.From)
  4035. case Zvex:
  4036. ab.asmvex(ctxt, &p.From, p.GetFrom3(), &p.To, o.op[z], o.op[z+1])
  4037. case Zvex_rm_v_r:
  4038. ab.asmvex(ctxt, &p.From, p.GetFrom3(), &p.To, o.op[z], o.op[z+1])
  4039. ab.asmand(ctxt, cursym, p, &p.From, &p.To)
  4040. case Zvex_rm_v_ro:
  4041. ab.asmvex(ctxt, &p.From, p.GetFrom3(), &p.To, o.op[z], o.op[z+1])
  4042. ab.asmando(ctxt, cursym, p, &p.From, int(o.op[z+2]))
  4043. case Zvex_i_rm_vo:
  4044. ab.asmvex(ctxt, p.GetFrom3(), &p.To, nil, o.op[z], o.op[z+1])
  4045. ab.asmando(ctxt, cursym, p, p.GetFrom3(), int(o.op[z+2]))
  4046. ab.Put1(byte(p.From.Offset))
  4047. case Zvex_i_r_v:
  4048. ab.asmvex(ctxt, p.GetFrom3(), &p.To, nil, o.op[z], o.op[z+1])
  4049. regnum := byte(0x7)
  4050. if p.GetFrom3().Reg >= REG_X0 && p.GetFrom3().Reg <= REG_X15 {
  4051. regnum &= byte(p.GetFrom3().Reg - REG_X0)
  4052. } else {
  4053. regnum &= byte(p.GetFrom3().Reg - REG_Y0)
  4054. }
  4055. ab.Put1(o.op[z+2] | regnum)
  4056. ab.Put1(byte(p.From.Offset))
  4057. case Zvex_i_rm_v_r:
  4058. imm, from, from3, to := unpackOps4(p)
  4059. ab.asmvex(ctxt, from, from3, to, o.op[z], o.op[z+1])
  4060. ab.asmand(ctxt, cursym, p, from, to)
  4061. ab.Put1(byte(imm.Offset))
  4062. case Zvex_i_rm_r:
  4063. ab.asmvex(ctxt, p.GetFrom3(), nil, &p.To, o.op[z], o.op[z+1])
  4064. ab.asmand(ctxt, cursym, p, p.GetFrom3(), &p.To)
  4065. ab.Put1(byte(p.From.Offset))
  4066. case Zvex_v_rm_r:
  4067. ab.asmvex(ctxt, p.GetFrom3(), &p.From, &p.To, o.op[z], o.op[z+1])
  4068. ab.asmand(ctxt, cursym, p, p.GetFrom3(), &p.To)
  4069. case Zvex_r_v_rm:
  4070. ab.asmvex(ctxt, &p.To, p.GetFrom3(), &p.From, o.op[z], o.op[z+1])
  4071. ab.asmand(ctxt, cursym, p, &p.To, &p.From)
  4072. case Zvex_rm_r_vo:
  4073. ab.asmvex(ctxt, &p.From, &p.To, p.GetFrom3(), o.op[z], o.op[z+1])
  4074. ab.asmando(ctxt, cursym, p, &p.From, int(o.op[z+2]))
  4075. case Zvex_i_r_rm:
  4076. ab.asmvex(ctxt, &p.To, nil, p.GetFrom3(), o.op[z], o.op[z+1])
  4077. ab.asmand(ctxt, cursym, p, &p.To, p.GetFrom3())
  4078. ab.Put1(byte(p.From.Offset))
  4079. case Zvex_hr_rm_v_r:
  4080. hr, from, from3, to := unpackOps4(p)
  4081. ab.asmvex(ctxt, from, from3, to, o.op[z], o.op[z+1])
  4082. ab.asmand(ctxt, cursym, p, from, to)
  4083. ab.Put1(byte(regIndex(hr.Reg) << 4))
  4084. case Zevex_k_rmo:
  4085. ab.evex = newEVEXBits(z, &o.op)
  4086. ab.asmevex(ctxt, p, &p.To, nil, nil, &p.From)
  4087. ab.asmando(ctxt, cursym, p, &p.To, int(o.op[z+3]))
  4088. case Zevex_i_rm_vo:
  4089. ab.evex = newEVEXBits(z, &o.op)
  4090. ab.asmevex(ctxt, p, p.GetFrom3(), &p.To, nil, nil)
  4091. ab.asmando(ctxt, cursym, p, p.GetFrom3(), int(o.op[z+3]))
  4092. ab.Put1(byte(p.From.Offset))
  4093. case Zevex_i_rm_k_vo:
  4094. imm, from, kmask, to := unpackOps4(p)
  4095. ab.evex = newEVEXBits(z, &o.op)
  4096. ab.asmevex(ctxt, p, from, to, nil, kmask)
  4097. ab.asmando(ctxt, cursym, p, from, int(o.op[z+3]))
  4098. ab.Put1(byte(imm.Offset))
  4099. case Zevex_i_r_rm:
  4100. ab.evex = newEVEXBits(z, &o.op)
  4101. ab.asmevex(ctxt, p, &p.To, nil, p.GetFrom3(), nil)
  4102. ab.asmand(ctxt, cursym, p, &p.To, p.GetFrom3())
  4103. ab.Put1(byte(p.From.Offset))
  4104. case Zevex_i_r_k_rm:
  4105. imm, from, kmask, to := unpackOps4(p)
  4106. ab.evex = newEVEXBits(z, &o.op)
  4107. ab.asmevex(ctxt, p, to, nil, from, kmask)
  4108. ab.asmand(ctxt, cursym, p, to, from)
  4109. ab.Put1(byte(imm.Offset))
  4110. case Zevex_i_rm_r:
  4111. ab.evex = newEVEXBits(z, &o.op)
  4112. ab.asmevex(ctxt, p, p.GetFrom3(), nil, &p.To, nil)
  4113. ab.asmand(ctxt, cursym, p, p.GetFrom3(), &p.To)
  4114. ab.Put1(byte(p.From.Offset))
  4115. case Zevex_i_rm_k_r:
  4116. imm, from, kmask, to := unpackOps4(p)
  4117. ab.evex = newEVEXBits(z, &o.op)
  4118. ab.asmevex(ctxt, p, from, nil, to, kmask)
  4119. ab.asmand(ctxt, cursym, p, from, to)
  4120. ab.Put1(byte(imm.Offset))
  4121. case Zevex_i_rm_v_r:
  4122. imm, from, from3, to := unpackOps4(p)
  4123. ab.evex = newEVEXBits(z, &o.op)
  4124. ab.asmevex(ctxt, p, from, from3, to, nil)
  4125. ab.asmand(ctxt, cursym, p, from, to)
  4126. ab.Put1(byte(imm.Offset))
  4127. case Zevex_i_rm_v_k_r:
  4128. imm, from, from3, kmask, to := unpackOps5(p)
  4129. ab.evex = newEVEXBits(z, &o.op)
  4130. ab.asmevex(ctxt, p, from, from3, to, kmask)
  4131. ab.asmand(ctxt, cursym, p, from, to)
  4132. ab.Put1(byte(imm.Offset))
  4133. case Zevex_r_v_rm:
  4134. ab.evex = newEVEXBits(z, &o.op)
  4135. ab.asmevex(ctxt, p, &p.To, p.GetFrom3(), &p.From, nil)
  4136. ab.asmand(ctxt, cursym, p, &p.To, &p.From)
  4137. case Zevex_rm_v_r:
  4138. ab.evex = newEVEXBits(z, &o.op)
  4139. ab.asmevex(ctxt, p, &p.From, p.GetFrom3(), &p.To, nil)
  4140. ab.asmand(ctxt, cursym, p, &p.From, &p.To)
  4141. case Zevex_rm_k_r:
  4142. ab.evex = newEVEXBits(z, &o.op)
  4143. ab.asmevex(ctxt, p, &p.From, nil, &p.To, p.GetFrom3())
  4144. ab.asmand(ctxt, cursym, p, &p.From, &p.To)
  4145. case Zevex_r_k_rm:
  4146. ab.evex = newEVEXBits(z, &o.op)
  4147. ab.asmevex(ctxt, p, &p.To, nil, &p.From, p.GetFrom3())
  4148. ab.asmand(ctxt, cursym, p, &p.To, &p.From)
  4149. case Zevex_rm_v_k_r:
  4150. from, from3, kmask, to := unpackOps4(p)
  4151. ab.evex = newEVEXBits(z, &o.op)
  4152. ab.asmevex(ctxt, p, from, from3, to, kmask)
  4153. ab.asmand(ctxt, cursym, p, from, to)
  4154. case Zevex_r_v_k_rm:
  4155. from, from3, kmask, to := unpackOps4(p)
  4156. ab.evex = newEVEXBits(z, &o.op)
  4157. ab.asmevex(ctxt, p, to, from3, from, kmask)
  4158. ab.asmand(ctxt, cursym, p, to, from)
  4159. case Zr_m_xm:
  4160. ab.mediaop(ctxt, o, op, int(yt.zoffset), z)
  4161. ab.asmand(ctxt, cursym, p, &p.To, &p.From)
  4162. case Zr_m_xm_nr:
  4163. ab.rexflag = 0
  4164. ab.mediaop(ctxt, o, op, int(yt.zoffset), z)
  4165. ab.asmand(ctxt, cursym, p, &p.To, &p.From)
  4166. case Zo_m:
  4167. ab.Put1(byte(op))
  4168. ab.asmando(ctxt, cursym, p, &p.To, int(o.op[z+1]))
  4169. case Zcallindreg:
  4170. r = obj.Addrel(cursym)
  4171. r.Off = int32(p.Pc)
  4172. r.Type = objabi.R_CALLIND
  4173. r.Siz = 0
  4174. fallthrough
  4175. case Zo_m64:
  4176. ab.Put1(byte(op))
  4177. ab.asmandsz(ctxt, cursym, p, &p.To, int(o.op[z+1]), 0, 1)
  4178. case Zm_ibo:
  4179. ab.Put1(byte(op))
  4180. ab.asmando(ctxt, cursym, p, &p.From, int(o.op[z+1]))
  4181. ab.Put1(byte(vaddr(ctxt, p, &p.To, nil)))
  4182. case Zibo_m:
  4183. ab.Put1(byte(op))
  4184. ab.asmando(ctxt, cursym, p, &p.To, int(o.op[z+1]))
  4185. ab.Put1(byte(vaddr(ctxt, p, &p.From, nil)))
  4186. case Zibo_m_xm:
  4187. z = ab.mediaop(ctxt, o, op, int(yt.zoffset), z)
  4188. ab.asmando(ctxt, cursym, p, &p.To, int(o.op[z+1]))
  4189. ab.Put1(byte(vaddr(ctxt, p, &p.From, nil)))
  4190. case Z_ib, Zib_:
  4191. if yt.zcase == Zib_ {
  4192. a = &p.From
  4193. } else {
  4194. a = &p.To
  4195. }
  4196. ab.Put1(byte(op))
  4197. if p.As == AXABORT {
  4198. ab.Put1(o.op[z+1])
  4199. }
  4200. ab.Put1(byte(vaddr(ctxt, p, a, nil)))
  4201. case Zib_rp:
  4202. ab.rexflag |= regrex[p.To.Reg] & (Rxb | 0x40)
  4203. ab.Put2(byte(op+reg[p.To.Reg]), byte(vaddr(ctxt, p, &p.From, nil)))
  4204. case Zil_rp:
  4205. ab.rexflag |= regrex[p.To.Reg] & Rxb
  4206. ab.Put1(byte(op + reg[p.To.Reg]))
  4207. if o.prefix == Pe {
  4208. v = vaddr(ctxt, p, &p.From, nil)
  4209. ab.PutInt16(int16(v))
  4210. } else {
  4211. ab.relput4(ctxt, cursym, p, &p.From)
  4212. }
  4213. case Zo_iw:
  4214. ab.Put1(byte(op))
  4215. if p.From.Type != obj.TYPE_NONE {
  4216. v = vaddr(ctxt, p, &p.From, nil)
  4217. ab.PutInt16(int16(v))
  4218. }
  4219. case Ziq_rp:
  4220. v = vaddr(ctxt, p, &p.From, &rel)
  4221. l = int(v >> 32)
  4222. if l == 0 && rel.Siz != 8 {
  4223. ab.rexflag &^= (0x40 | Rxw)
  4224. ab.rexflag |= regrex[p.To.Reg] & Rxb
  4225. ab.Put1(byte(0xb8 + reg[p.To.Reg]))
  4226. if rel.Type != 0 {
  4227. r = obj.Addrel(cursym)
  4228. *r = rel
  4229. r.Off = int32(p.Pc + int64(ab.Len()))
  4230. }
  4231. ab.PutInt32(int32(v))
  4232. } else if l == -1 && uint64(v)&(uint64(1)<<31) != 0 { // sign extend
  4233. ab.Put1(0xc7)
  4234. ab.asmando(ctxt, cursym, p, &p.To, 0)
  4235. ab.PutInt32(int32(v)) // need all 8
  4236. } else {
  4237. ab.rexflag |= regrex[p.To.Reg] & Rxb
  4238. ab.Put1(byte(op + reg[p.To.Reg]))
  4239. if rel.Type != 0 {
  4240. r = obj.Addrel(cursym)
  4241. *r = rel
  4242. r.Off = int32(p.Pc + int64(ab.Len()))
  4243. }
  4244. ab.PutInt64(v)
  4245. }
  4246. case Zib_rr:
  4247. ab.Put1(byte(op))
  4248. ab.asmand(ctxt, cursym, p, &p.To, &p.To)
  4249. ab.Put1(byte(vaddr(ctxt, p, &p.From, nil)))
  4250. case Z_il, Zil_:
  4251. if yt.zcase == Zil_ {
  4252. a = &p.From
  4253. } else {
  4254. a = &p.To
  4255. }
  4256. ab.Put1(byte(op))
  4257. if o.prefix == Pe {
  4258. v = vaddr(ctxt, p, a, nil)
  4259. ab.PutInt16(int16(v))
  4260. } else {
  4261. ab.relput4(ctxt, cursym, p, a)
  4262. }
  4263. case Zm_ilo, Zilo_m:
  4264. ab.Put1(byte(op))
  4265. if yt.zcase == Zilo_m {
  4266. a = &p.From
  4267. ab.asmando(ctxt, cursym, p, &p.To, int(o.op[z+1]))
  4268. } else {
  4269. a = &p.To
  4270. ab.asmando(ctxt, cursym, p, &p.From, int(o.op[z+1]))
  4271. }
  4272. if o.prefix == Pe {
  4273. v = vaddr(ctxt, p, a, nil)
  4274. ab.PutInt16(int16(v))
  4275. } else {
  4276. ab.relput4(ctxt, cursym, p, a)
  4277. }
  4278. case Zil_rr:
  4279. ab.Put1(byte(op))
  4280. ab.asmand(ctxt, cursym, p, &p.To, &p.To)
  4281. if o.prefix == Pe {
  4282. v = vaddr(ctxt, p, &p.From, nil)
  4283. ab.PutInt16(int16(v))
  4284. } else {
  4285. ab.relput4(ctxt, cursym, p, &p.From)
  4286. }
  4287. case Z_rp:
  4288. ab.rexflag |= regrex[p.To.Reg] & (Rxb | 0x40)
  4289. ab.Put1(byte(op + reg[p.To.Reg]))
  4290. case Zrp_:
  4291. ab.rexflag |= regrex[p.From.Reg] & (Rxb | 0x40)
  4292. ab.Put1(byte(op + reg[p.From.Reg]))
  4293. case Zcallcon, Zjmpcon:
  4294. if yt.zcase == Zcallcon {
  4295. ab.Put1(byte(op))
  4296. } else {
  4297. ab.Put1(o.op[z+1])
  4298. }
  4299. r = obj.Addrel(cursym)
  4300. r.Off = int32(p.Pc + int64(ab.Len()))
  4301. r.Type = objabi.R_PCREL
  4302. r.Siz = 4
  4303. r.Add = p.To.Offset
  4304. ab.PutInt32(0)
  4305. case Zcallind:
  4306. ab.Put2(byte(op), o.op[z+1])
  4307. r = obj.Addrel(cursym)
  4308. r.Off = int32(p.Pc + int64(ab.Len()))
  4309. if ctxt.Arch.Family == sys.AMD64 {
  4310. r.Type = objabi.R_PCREL
  4311. } else {
  4312. r.Type = objabi.R_ADDR
  4313. }
  4314. r.Siz = 4
  4315. r.Add = p.To.Offset
  4316. r.Sym = p.To.Sym
  4317. ab.PutInt32(0)
  4318. case Zcall, Zcallduff:
  4319. if p.To.Sym == nil {
  4320. ctxt.Diag("call without target")
  4321. ctxt.DiagFlush()
  4322. log.Fatalf("bad code")
  4323. }
  4324. if yt.zcase == Zcallduff && ctxt.Flag_dynlink {
  4325. ctxt.Diag("directly calling duff when dynamically linking Go")
  4326. }
  4327. if yt.zcase == Zcallduff && ctxt.Arch.Family == sys.AMD64 {
  4328. // Maintain BP around call, since duffcopy/duffzero can't do it
  4329. // (the call jumps into the middle of the function).
  4330. // This makes it possible to see call sites for duffcopy/duffzero in
  4331. // BP-based profiling tools like Linux perf (which is the
  4332. // whole point of maintaining frame pointers in Go).
  4333. // MOVQ BP, -16(SP)
  4334. // LEAQ -16(SP), BP
  4335. ab.Put(bpduff1)
  4336. }
  4337. ab.Put1(byte(op))
  4338. r = obj.Addrel(cursym)
  4339. r.Off = int32(p.Pc + int64(ab.Len()))
  4340. r.Sym = p.To.Sym
  4341. r.Add = p.To.Offset
  4342. r.Type = objabi.R_CALL
  4343. r.Siz = 4
  4344. ab.PutInt32(0)
  4345. if yt.zcase == Zcallduff && ctxt.Arch.Family == sys.AMD64 {
  4346. // Pop BP pushed above.
  4347. // MOVQ 0(BP), BP
  4348. ab.Put(bpduff2)
  4349. }
  4350. // TODO: jump across functions needs reloc
  4351. case Zbr, Zjmp, Zloop:
  4352. if p.As == AXBEGIN {
  4353. ab.Put1(byte(op))
  4354. }
  4355. if p.To.Sym != nil {
  4356. if yt.zcase != Zjmp {
  4357. ctxt.Diag("branch to ATEXT")
  4358. ctxt.DiagFlush()
  4359. log.Fatalf("bad code")
  4360. }
  4361. ab.Put1(o.op[z+1])
  4362. r = obj.Addrel(cursym)
  4363. r.Off = int32(p.Pc + int64(ab.Len()))
  4364. r.Sym = p.To.Sym
  4365. // Note: R_CALL instead of R_PCREL. R_CALL is more permissive in that
  4366. // it can point to a trampoline instead of the destination itself.
  4367. r.Type = objabi.R_CALL
  4368. r.Siz = 4
  4369. ab.PutInt32(0)
  4370. break
  4371. }
  4372. // Assumes q is in this function.
  4373. // TODO: Check in input, preserve in brchain.
  4374. // Fill in backward jump now.
  4375. q = p.To.Target()
  4376. if q == nil {
  4377. ctxt.Diag("jmp/branch/loop without target")
  4378. ctxt.DiagFlush()
  4379. log.Fatalf("bad code")
  4380. }
  4381. if p.Back&branchBackwards != 0 {
  4382. v = q.Pc - (p.Pc + 2)
  4383. if v >= -128 && p.As != AXBEGIN {
  4384. if p.As == AJCXZL {
  4385. ab.Put1(0x67)
  4386. }
  4387. ab.Put2(byte(op), byte(v))
  4388. } else if yt.zcase == Zloop {
  4389. ctxt.Diag("loop too far: %v", p)
  4390. } else {
  4391. v -= 5 - 2
  4392. if p.As == AXBEGIN {
  4393. v--
  4394. }
  4395. if yt.zcase == Zbr {
  4396. ab.Put1(0x0f)
  4397. v--
  4398. }
  4399. ab.Put1(o.op[z+1])
  4400. ab.PutInt32(int32(v))
  4401. }
  4402. break
  4403. }
  4404. // Annotate target; will fill in later.
  4405. p.Forwd = q.Rel
  4406. q.Rel = p
  4407. if p.Back&branchShort != 0 && p.As != AXBEGIN {
  4408. if p.As == AJCXZL {
  4409. ab.Put1(0x67)
  4410. }
  4411. ab.Put2(byte(op), 0)
  4412. } else if yt.zcase == Zloop {
  4413. ctxt.Diag("loop too far: %v", p)
  4414. } else {
  4415. if yt.zcase == Zbr {
  4416. ab.Put1(0x0f)
  4417. }
  4418. ab.Put1(o.op[z+1])
  4419. ab.PutInt32(0)
  4420. }
  4421. case Zbyte:
  4422. v = vaddr(ctxt, p, &p.From, &rel)
  4423. if rel.Siz != 0 {
  4424. rel.Siz = uint8(op)
  4425. r = obj.Addrel(cursym)
  4426. *r = rel
  4427. r.Off = int32(p.Pc + int64(ab.Len()))
  4428. }
  4429. ab.Put1(byte(v))
  4430. if op > 1 {
  4431. ab.Put1(byte(v >> 8))
  4432. if op > 2 {
  4433. ab.PutInt16(int16(v >> 16))
  4434. if op > 4 {
  4435. ab.PutInt32(int32(v >> 32))
  4436. }
  4437. }
  4438. }
  4439. }
  4440. return
  4441. }
  4442. }
  4443. f3t = Ynone * Ymax
  4444. if p.GetFrom3() != nil {
  4445. f3t = oclass(ctxt, p, p.GetFrom3()) * Ymax
  4446. }
  4447. for mo := ymovtab; mo[0].as != 0; mo = mo[1:] {
  4448. var pp obj.Prog
  4449. var t []byte
  4450. if p.As == mo[0].as {
  4451. if ycover[ft+int(mo[0].ft)] != 0 && ycover[f3t+int(mo[0].f3t)] != 0 && ycover[tt+int(mo[0].tt)] != 0 {
  4452. t = mo[0].op[:]
  4453. switch mo[0].code {
  4454. default:
  4455. ctxt.Diag("asmins: unknown mov %d %v", mo[0].code, p)
  4456. case movLit:
  4457. for z = 0; t[z] != 0; z++ {
  4458. ab.Put1(t[z])
  4459. }
  4460. case movRegMem:
  4461. ab.Put1(t[0])
  4462. ab.asmando(ctxt, cursym, p, &p.To, int(t[1]))
  4463. case movMemReg:
  4464. ab.Put1(t[0])
  4465. ab.asmando(ctxt, cursym, p, &p.From, int(t[1]))
  4466. case movRegMem2op: // r,m - 2op
  4467. ab.Put2(t[0], t[1])
  4468. ab.asmando(ctxt, cursym, p, &p.To, int(t[2]))
  4469. ab.rexflag |= regrex[p.From.Reg] & (Rxr | 0x40)
  4470. case movMemReg2op:
  4471. ab.Put2(t[0], t[1])
  4472. ab.asmando(ctxt, cursym, p, &p.From, int(t[2]))
  4473. ab.rexflag |= regrex[p.To.Reg] & (Rxr | 0x40)
  4474. case movFullPtr:
  4475. if t[0] != 0 {
  4476. ab.Put1(t[0])
  4477. }
  4478. switch p.To.Index {
  4479. default:
  4480. goto bad
  4481. case REG_DS:
  4482. ab.Put1(0xc5)
  4483. case REG_SS:
  4484. ab.Put2(0x0f, 0xb2)
  4485. case REG_ES:
  4486. ab.Put1(0xc4)
  4487. case REG_FS:
  4488. ab.Put2(0x0f, 0xb4)
  4489. case REG_GS:
  4490. ab.Put2(0x0f, 0xb5)
  4491. }
  4492. ab.asmand(ctxt, cursym, p, &p.From, &p.To)
  4493. case movDoubleShift:
  4494. if t[0] == Pw {
  4495. if ctxt.Arch.Family != sys.AMD64 {
  4496. ctxt.Diag("asmins: illegal 64: %v", p)
  4497. }
  4498. ab.rexflag |= Pw
  4499. t = t[1:]
  4500. } else if t[0] == Pe {
  4501. ab.Put1(Pe)
  4502. t = t[1:]
  4503. }
  4504. switch p.From.Type {
  4505. default:
  4506. goto bad
  4507. case obj.TYPE_CONST:
  4508. ab.Put2(0x0f, t[0])
  4509. ab.asmandsz(ctxt, cursym, p, &p.To, reg[p.GetFrom3().Reg], regrex[p.GetFrom3().Reg], 0)
  4510. ab.Put1(byte(p.From.Offset))
  4511. case obj.TYPE_REG:
  4512. switch p.From.Reg {
  4513. default:
  4514. goto bad
  4515. case REG_CL, REG_CX:
  4516. ab.Put2(0x0f, t[1])
  4517. ab.asmandsz(ctxt, cursym, p, &p.To, reg[p.GetFrom3().Reg], regrex[p.GetFrom3().Reg], 0)
  4518. }
  4519. }
  4520. // NOTE: The systems listed here are the ones that use the "TLS initial exec" model,
  4521. // where you load the TLS base register into a register and then index off that
  4522. // register to access the actual TLS variables. Systems that allow direct TLS access
  4523. // are handled in prefixof above and should not be listed here.
  4524. case movTLSReg:
  4525. if ctxt.Arch.Family == sys.AMD64 && p.As != AMOVQ || ctxt.Arch.Family == sys.I386 && p.As != AMOVL {
  4526. ctxt.Diag("invalid load of TLS: %v", p)
  4527. }
  4528. if ctxt.Arch.Family == sys.I386 {
  4529. // NOTE: The systems listed here are the ones that use the "TLS initial exec" model,
  4530. // where you load the TLS base register into a register and then index off that
  4531. // register to access the actual TLS variables. Systems that allow direct TLS access
  4532. // are handled in prefixof above and should not be listed here.
  4533. switch ctxt.Headtype {
  4534. default:
  4535. log.Fatalf("unknown TLS base location for %v", ctxt.Headtype)
  4536. case objabi.Hlinux, objabi.Hfreebsd:
  4537. if ctxt.Flag_shared {
  4538. // Note that this is not generating the same insns as the other cases.
  4539. // MOV TLS, dst
  4540. // becomes
  4541. // call __x86.get_pc_thunk.dst
  4542. // movl (gotpc + g@gotntpoff)(dst), dst
  4543. // which is encoded as
  4544. // call __x86.get_pc_thunk.dst
  4545. // movq 0(dst), dst
  4546. // and R_CALL & R_TLS_IE relocs. This all assumes the only tls variable we access
  4547. // is g, which we can't check here, but will when we assemble the second
  4548. // instruction.
  4549. dst := p.To.Reg
  4550. ab.Put1(0xe8)
  4551. r = obj.Addrel(cursym)
  4552. r.Off = int32(p.Pc + int64(ab.Len()))
  4553. r.Type = objabi.R_CALL
  4554. r.Siz = 4
  4555. r.Sym = ctxt.Lookup("__x86.get_pc_thunk." + strings.ToLower(rconv(int(dst))))
  4556. ab.PutInt32(0)
  4557. ab.Put2(0x8B, byte(2<<6|reg[dst]|(reg[dst]<<3)))
  4558. r = obj.Addrel(cursym)
  4559. r.Off = int32(p.Pc + int64(ab.Len()))
  4560. r.Type = objabi.R_TLS_IE
  4561. r.Siz = 4
  4562. r.Add = 2
  4563. ab.PutInt32(0)
  4564. } else {
  4565. // ELF TLS base is 0(GS).
  4566. pp.From = p.From
  4567. pp.From.Type = obj.TYPE_MEM
  4568. pp.From.Reg = REG_GS
  4569. pp.From.Offset = 0
  4570. pp.From.Index = REG_NONE
  4571. pp.From.Scale = 0
  4572. ab.Put2(0x65, // GS
  4573. 0x8B)
  4574. ab.asmand(ctxt, cursym, p, &pp.From, &p.To)
  4575. }
  4576. case objabi.Hplan9:
  4577. pp.From = obj.Addr{}
  4578. pp.From.Type = obj.TYPE_MEM
  4579. pp.From.Name = obj.NAME_EXTERN
  4580. pp.From.Sym = plan9privates
  4581. pp.From.Offset = 0
  4582. pp.From.Index = REG_NONE
  4583. ab.Put1(0x8B)
  4584. ab.asmand(ctxt, cursym, p, &pp.From, &p.To)
  4585. case objabi.Hwindows:
  4586. // Windows TLS base is always 0x14(FS).
  4587. pp.From = p.From
  4588. pp.From.Type = obj.TYPE_MEM
  4589. pp.From.Reg = REG_FS
  4590. pp.From.Offset = 0x14
  4591. pp.From.Index = REG_NONE
  4592. pp.From.Scale = 0
  4593. ab.Put2(0x64, // FS
  4594. 0x8B)
  4595. ab.asmand(ctxt, cursym, p, &pp.From, &p.To)
  4596. }
  4597. break
  4598. }
  4599. switch ctxt.Headtype {
  4600. default:
  4601. log.Fatalf("unknown TLS base location for %v", ctxt.Headtype)
  4602. case objabi.Hlinux, objabi.Hfreebsd:
  4603. if !ctxt.Flag_shared {
  4604. log.Fatalf("unknown TLS base location for linux/freebsd without -shared")
  4605. }
  4606. // Note that this is not generating the same insn as the other cases.
  4607. // MOV TLS, R_to
  4608. // becomes
  4609. // movq g@gottpoff(%rip), R_to
  4610. // which is encoded as
  4611. // movq 0(%rip), R_to
  4612. // and a R_TLS_IE reloc. This all assumes the only tls variable we access
  4613. // is g, which we can't check here, but will when we assemble the second
  4614. // instruction.
  4615. ab.rexflag = Pw | (regrex[p.To.Reg] & Rxr)
  4616. ab.Put2(0x8B, byte(0x05|(reg[p.To.Reg]<<3)))
  4617. r = obj.Addrel(cursym)
  4618. r.Off = int32(p.Pc + int64(ab.Len()))
  4619. r.Type = objabi.R_TLS_IE
  4620. r.Siz = 4
  4621. r.Add = -4
  4622. ab.PutInt32(0)
  4623. case objabi.Hplan9:
  4624. pp.From = obj.Addr{}
  4625. pp.From.Type = obj.TYPE_MEM
  4626. pp.From.Name = obj.NAME_EXTERN
  4627. pp.From.Sym = plan9privates
  4628. pp.From.Offset = 0
  4629. pp.From.Index = REG_NONE
  4630. ab.rexflag |= Pw
  4631. ab.Put1(0x8B)
  4632. ab.asmand(ctxt, cursym, p, &pp.From, &p.To)
  4633. case objabi.Hsolaris: // TODO(rsc): Delete Hsolaris from list. Should not use this code. See progedit in obj6.c.
  4634. // TLS base is 0(FS).
  4635. pp.From = p.From
  4636. pp.From.Type = obj.TYPE_MEM
  4637. pp.From.Name = obj.NAME_NONE
  4638. pp.From.Reg = REG_NONE
  4639. pp.From.Offset = 0
  4640. pp.From.Index = REG_NONE
  4641. pp.From.Scale = 0
  4642. ab.rexflag |= Pw
  4643. ab.Put2(0x64, // FS
  4644. 0x8B)
  4645. ab.asmand(ctxt, cursym, p, &pp.From, &p.To)
  4646. case objabi.Hwindows:
  4647. // Windows TLS base is always 0x28(GS).
  4648. pp.From = p.From
  4649. pp.From.Type = obj.TYPE_MEM
  4650. pp.From.Name = obj.NAME_NONE
  4651. pp.From.Reg = REG_GS
  4652. pp.From.Offset = 0x28
  4653. pp.From.Index = REG_NONE
  4654. pp.From.Scale = 0
  4655. ab.rexflag |= Pw
  4656. ab.Put2(0x65, // GS
  4657. 0x8B)
  4658. ab.asmand(ctxt, cursym, p, &pp.From, &p.To)
  4659. }
  4660. }
  4661. return
  4662. }
  4663. }
  4664. }
  4665. goto bad
  4666. bad:
  4667. if ctxt.Arch.Family != sys.AMD64 {
  4668. // here, the assembly has failed.
  4669. // if it's a byte instruction that has
  4670. // unaddressable registers, try to
  4671. // exchange registers and reissue the
  4672. // instruction with the operands renamed.
  4673. pp := *p
  4674. unbytereg(&pp.From, &pp.Ft)
  4675. unbytereg(&pp.To, &pp.Tt)
  4676. z := int(p.From.Reg)
  4677. if p.From.Type == obj.TYPE_REG && z >= REG_BP && z <= REG_DI {
  4678. // TODO(rsc): Use this code for x86-64 too. It has bug fixes not present in the amd64 code base.
  4679. // For now, different to keep bit-for-bit compatibility.
  4680. if ctxt.Arch.Family == sys.I386 {
  4681. breg := byteswapreg(ctxt, &p.To)
  4682. if breg != REG_AX {
  4683. ab.Put1(0x87) // xchg lhs,bx
  4684. ab.asmando(ctxt, cursym, p, &p.From, reg[breg])
  4685. subreg(&pp, z, breg)
  4686. ab.doasm(ctxt, cursym, &pp)
  4687. ab.Put1(0x87) // xchg lhs,bx
  4688. ab.asmando(ctxt, cursym, p, &p.From, reg[breg])
  4689. } else {
  4690. ab.Put1(byte(0x90 + reg[z])) // xchg lsh,ax
  4691. subreg(&pp, z, REG_AX)
  4692. ab.doasm(ctxt, cursym, &pp)
  4693. ab.Put1(byte(0x90 + reg[z])) // xchg lsh,ax
  4694. }
  4695. return
  4696. }
  4697. if isax(&p.To) || p.To.Type == obj.TYPE_NONE {
  4698. // We certainly don't want to exchange
  4699. // with AX if the op is MUL or DIV.
  4700. ab.Put1(0x87) // xchg lhs,bx
  4701. ab.asmando(ctxt, cursym, p, &p.From, reg[REG_BX])
  4702. subreg(&pp, z, REG_BX)
  4703. ab.doasm(ctxt, cursym, &pp)
  4704. ab.Put1(0x87) // xchg lhs,bx
  4705. ab.asmando(ctxt, cursym, p, &p.From, reg[REG_BX])
  4706. } else {
  4707. ab.Put1(byte(0x90 + reg[z])) // xchg lsh,ax
  4708. subreg(&pp, z, REG_AX)
  4709. ab.doasm(ctxt, cursym, &pp)
  4710. ab.Put1(byte(0x90 + reg[z])) // xchg lsh,ax
  4711. }
  4712. return
  4713. }
  4714. z = int(p.To.Reg)
  4715. if p.To.Type == obj.TYPE_REG && z >= REG_BP && z <= REG_DI {
  4716. // TODO(rsc): Use this code for x86-64 too. It has bug fixes not present in the amd64 code base.
  4717. // For now, different to keep bit-for-bit compatibility.
  4718. if ctxt.Arch.Family == sys.I386 {
  4719. breg := byteswapreg(ctxt, &p.From)
  4720. if breg != REG_AX {
  4721. ab.Put1(0x87) //xchg rhs,bx
  4722. ab.asmando(ctxt, cursym, p, &p.To, reg[breg])
  4723. subreg(&pp, z, breg)
  4724. ab.doasm(ctxt, cursym, &pp)
  4725. ab.Put1(0x87) // xchg rhs,bx
  4726. ab.asmando(ctxt, cursym, p, &p.To, reg[breg])
  4727. } else {
  4728. ab.Put1(byte(0x90 + reg[z])) // xchg rsh,ax
  4729. subreg(&pp, z, REG_AX)
  4730. ab.doasm(ctxt, cursym, &pp)
  4731. ab.Put1(byte(0x90 + reg[z])) // xchg rsh,ax
  4732. }
  4733. return
  4734. }
  4735. if isax(&p.From) {
  4736. ab.Put1(0x87) // xchg rhs,bx
  4737. ab.asmando(ctxt, cursym, p, &p.To, reg[REG_BX])
  4738. subreg(&pp, z, REG_BX)
  4739. ab.doasm(ctxt, cursym, &pp)
  4740. ab.Put1(0x87) // xchg rhs,bx
  4741. ab.asmando(ctxt, cursym, p, &p.To, reg[REG_BX])
  4742. } else {
  4743. ab.Put1(byte(0x90 + reg[z])) // xchg rsh,ax
  4744. subreg(&pp, z, REG_AX)
  4745. ab.doasm(ctxt, cursym, &pp)
  4746. ab.Put1(byte(0x90 + reg[z])) // xchg rsh,ax
  4747. }
  4748. return
  4749. }
  4750. }
  4751. ctxt.Diag("invalid instruction: %v", p)
  4752. }
  4753. // byteswapreg returns a byte-addressable register (AX, BX, CX, DX)
  4754. // which is not referenced in a.
  4755. // If a is empty, it returns BX to account for MULB-like instructions
  4756. // that might use DX and AX.
  4757. func byteswapreg(ctxt *obj.Link, a *obj.Addr) int {
  4758. cana, canb, canc, cand := true, true, true, true
  4759. if a.Type == obj.TYPE_NONE {
  4760. cana, cand = false, false
  4761. }
  4762. if a.Type == obj.TYPE_REG || ((a.Type == obj.TYPE_MEM || a.Type == obj.TYPE_ADDR) && a.Name == obj.NAME_NONE) {
  4763. switch a.Reg {
  4764. case REG_NONE:
  4765. cana, cand = false, false
  4766. case REG_AX, REG_AL, REG_AH:
  4767. cana = false
  4768. case REG_BX, REG_BL, REG_BH:
  4769. canb = false
  4770. case REG_CX, REG_CL, REG_CH:
  4771. canc = false
  4772. case REG_DX, REG_DL, REG_DH:
  4773. cand = false
  4774. }
  4775. }
  4776. if a.Type == obj.TYPE_MEM || a.Type == obj.TYPE_ADDR {
  4777. switch a.Index {
  4778. case REG_AX:
  4779. cana = false
  4780. case REG_BX:
  4781. canb = false
  4782. case REG_CX:
  4783. canc = false
  4784. case REG_DX:
  4785. cand = false
  4786. }
  4787. }
  4788. switch {
  4789. case cana:
  4790. return REG_AX
  4791. case canb:
  4792. return REG_BX
  4793. case canc:
  4794. return REG_CX
  4795. case cand:
  4796. return REG_DX
  4797. default:
  4798. ctxt.Diag("impossible byte register")
  4799. ctxt.DiagFlush()
  4800. log.Fatalf("bad code")
  4801. return 0
  4802. }
  4803. }
  4804. func isbadbyte(a *obj.Addr) bool {
  4805. return a.Type == obj.TYPE_REG && (REG_BP <= a.Reg && a.Reg <= REG_DI || REG_BPB <= a.Reg && a.Reg <= REG_DIB)
  4806. }
  4807. func (ab *AsmBuf) asmins(ctxt *obj.Link, cursym *obj.LSym, p *obj.Prog) {
  4808. ab.Reset()
  4809. ab.rexflag = 0
  4810. ab.vexflag = false
  4811. ab.evexflag = false
  4812. mark := ab.Len()
  4813. ab.doasm(ctxt, cursym, p)
  4814. if ab.rexflag != 0 && !ab.vexflag && !ab.evexflag {
  4815. // as befits the whole approach of the architecture,
  4816. // the rex prefix must appear before the first opcode byte
  4817. // (and thus after any 66/67/f2/f3/26/2e/3e prefix bytes, but
  4818. // before the 0f opcode escape!), or it might be ignored.
  4819. // note that the handbook often misleadingly shows 66/f2/f3 in `opcode'.
  4820. if ctxt.Arch.Family != sys.AMD64 {
  4821. ctxt.Diag("asmins: illegal in mode %d: %v (%d %d)", ctxt.Arch.RegSize*8, p, p.Ft, p.Tt)
  4822. }
  4823. n := ab.Len()
  4824. var np int
  4825. for np = mark; np < n; np++ {
  4826. c := ab.At(np)
  4827. if c != 0xf2 && c != 0xf3 && (c < 0x64 || c > 0x67) && c != 0x2e && c != 0x3e && c != 0x26 {
  4828. break
  4829. }
  4830. }
  4831. ab.Insert(np, byte(0x40|ab.rexflag))
  4832. }
  4833. n := ab.Len()
  4834. for i := len(cursym.R) - 1; i >= 0; i-- {
  4835. r := &cursym.R[i]
  4836. if int64(r.Off) < p.Pc {
  4837. break
  4838. }
  4839. if ab.rexflag != 0 && !ab.vexflag && !ab.evexflag {
  4840. r.Off++
  4841. }
  4842. if r.Type == objabi.R_PCREL {
  4843. if ctxt.Arch.Family == sys.AMD64 || p.As == obj.AJMP || p.As == obj.ACALL {
  4844. // PC-relative addressing is relative to the end of the instruction,
  4845. // but the relocations applied by the linker are relative to the end
  4846. // of the relocation. Because immediate instruction
  4847. // arguments can follow the PC-relative memory reference in the
  4848. // instruction encoding, the two may not coincide. In this case,
  4849. // adjust addend so that linker can keep relocating relative to the
  4850. // end of the relocation.
  4851. r.Add -= p.Pc + int64(n) - (int64(r.Off) + int64(r.Siz))
  4852. } else if ctxt.Arch.Family == sys.I386 {
  4853. // On 386 PC-relative addressing (for non-call/jmp instructions)
  4854. // assumes that the previous instruction loaded the PC of the end
  4855. // of that instruction into CX, so the adjustment is relative to
  4856. // that.
  4857. r.Add += int64(r.Off) - p.Pc + int64(r.Siz)
  4858. }
  4859. }
  4860. if r.Type == objabi.R_GOTPCREL && ctxt.Arch.Family == sys.I386 {
  4861. // On 386, R_GOTPCREL makes the same assumptions as R_PCREL.
  4862. r.Add += int64(r.Off) - p.Pc + int64(r.Siz)
  4863. }
  4864. }
  4865. }
  4866. // unpackOps4 extracts 4 operands from p.
  4867. func unpackOps4(p *obj.Prog) (arg0, arg1, arg2, dst *obj.Addr) {
  4868. return &p.From, &p.RestArgs[0], &p.RestArgs[1], &p.To
  4869. }
  4870. // unpackOps5 extracts 5 operands from p.
  4871. func unpackOps5(p *obj.Prog) (arg0, arg1, arg2, arg3, dst *obj.Addr) {
  4872. return &p.From, &p.RestArgs[0], &p.RestArgs[1], &p.RestArgs[2], &p.To
  4873. }