| 1234567891011121314151617181920212223242526272829303132333435363738394041424344454647484950515253545556575859606162636465666768697071727374757677787980818283848586878889909192939495969798991001011021031041051061071081091101111121131141151161171181191201211221231241251261271281291301311321331341351361371381391401411421431441451461471481491501511521531541551561571581591601611621631641651661671681691701711721731741751761771781791801811821831841851861871881891901911921931941951961971981992002012022032042052062072082092102112122132142152162172182192202212222232242252262272282292302312322332342352362372382392402412422432442452462472482492502512522532542552562572582592602612622632642652662672682692702712722732742752762772782792802812822832842852862872882892902912922932942952962972982993003013023033043053063073083093103113123133143153163173183193203213223233243253263273283293303313323333343353363373383393403413423433443453463473483493503513523533543553563573583593603613623633643653663673683693703713723733743753763773783793803813823833843853863873883893903913923933943953963973983994004014024034044054064074084094104114124134144154164174184194204214224234244254264274284294304314324334344354364374384394404414424434444454464474484494504514524534544554564574584594604614624634644654664674684694704714724734744754764774784794804814824834844854864874884894904914924934944954964974984995005015025035045055065075085095105115125135145155165175185195205215225235245255265275285295305315325335345355365375385395405415425435445455465475485495505515525535545555565575585595605615625635645655665675685695705715725735745755765775785795805815825835845855865875885895905915925935945955965975985996006016026036046056066076086096106116126136146156166176186196206216226236246256266276286296306316326336346356366376386396406416426436446456466476486496506516526536546556566576586596606616626636646656666676686696706716726736746756766776786796806816826836846856866876886896906916926936946956966976986997007017027037047057067077087097107117127137147157167177187197207217227237247257267277287297307317327337347357367377387397407417427437447457467477487497507517527537547557567577587597607617627637647657667677687697707717727737747757767777787797807817827837847857867877887897907917927937947957967977987998008018028038048058068078088098108118128138148158168178188198208218228238248258268278288298308318328338348358368378388398408418428438448458468478488498508518528538548558568578588598608618628638648658668678688698708718728738748758768778788798808818828838848858868878888898908918928938948958968978988999009019029039049059069079089099109119129139149159169179189199209219229239249259269279289299309319329339349359369379389399409419429439449459469479489499509519529539549559569579589599609619629639649659669679689699709719729739749759769779789799809819829839849859869879889899909919929939949959969979989991000100110021003100410051006100710081009101010111012101310141015101610171018101910201021102210231024102510261027102810291030103110321033103410351036103710381039104010411042104310441045104610471048104910501051105210531054105510561057105810591060106110621063106410651066106710681069107010711072107310741075107610771078107910801081108210831084108510861087108810891090109110921093109410951096109710981099110011011102110311041105110611071108110911101111111211131114111511161117111811191120112111221123112411251126112711281129113011311132113311341135113611371138113911401141114211431144114511461147114811491150115111521153115411551156115711581159116011611162116311641165116611671168116911701171117211731174117511761177117811791180118111821183118411851186118711881189119011911192119311941195119611971198119912001201120212031204120512061207120812091210121112121213121412151216121712181219122012211222122312241225122612271228122912301231123212331234123512361237123812391240124112421243124412451246124712481249125012511252125312541255125612571258125912601261126212631264126512661267126812691270127112721273127412751276127712781279128012811282128312841285128612871288128912901291129212931294129512961297129812991300130113021303130413051306130713081309131013111312131313141315131613171318131913201321132213231324132513261327132813291330133113321333133413351336133713381339134013411342134313441345134613471348134913501351135213531354135513561357135813591360136113621363136413651366136713681369137013711372137313741375137613771378137913801381138213831384138513861387138813891390139113921393139413951396139713981399140014011402140314041405140614071408140914101411141214131414141514161417141814191420142114221423142414251426142714281429143014311432143314341435143614371438143914401441144214431444144514461447144814491450145114521453145414551456145714581459146014611462146314641465146614671468146914701471147214731474147514761477147814791480148114821483148414851486148714881489149014911492149314941495149614971498149915001501150215031504150515061507150815091510151115121513151415151516151715181519152015211522152315241525152615271528152915301531153215331534153515361537153815391540154115421543154415451546154715481549155015511552155315541555155615571558155915601561156215631564156515661567156815691570157115721573157415751576157715781579158015811582158315841585158615871588158915901591159215931594159515961597159815991600160116021603160416051606160716081609161016111612161316141615161616171618161916201621162216231624162516261627162816291630163116321633163416351636163716381639164016411642164316441645164616471648164916501651165216531654165516561657165816591660166116621663166416651666166716681669167016711672167316741675167616771678167916801681168216831684168516861687168816891690169116921693169416951696169716981699170017011702170317041705170617071708170917101711171217131714171517161717171817191720172117221723172417251726172717281729173017311732173317341735173617371738173917401741174217431744174517461747174817491750175117521753175417551756175717581759176017611762176317641765176617671768176917701771177217731774177517761777177817791780178117821783178417851786178717881789179017911792179317941795179617971798179918001801180218031804180518061807180818091810181118121813181418151816181718181819182018211822182318241825182618271828182918301831183218331834183518361837183818391840184118421843184418451846184718481849185018511852185318541855185618571858185918601861186218631864186518661867186818691870187118721873187418751876187718781879188018811882188318841885188618871888188918901891189218931894189518961897189818991900190119021903190419051906190719081909191019111912191319141915191619171918191919201921192219231924192519261927192819291930193119321933193419351936193719381939194019411942194319441945194619471948194919501951195219531954195519561957195819591960196119621963196419651966196719681969197019711972197319741975197619771978197919801981198219831984198519861987198819891990199119921993199419951996199719981999200020012002200320042005200620072008200920102011201220132014201520162017201820192020202120222023202420252026202720282029203020312032203320342035203620372038203920402041204220432044204520462047204820492050205120522053205420552056205720582059206020612062206320642065206620672068206920702071207220732074207520762077207820792080208120822083208420852086208720882089209020912092209320942095209620972098209921002101210221032104210521062107210821092110211121122113211421152116211721182119212021212122212321242125212621272128212921302131213221332134213521362137213821392140214121422143214421452146214721482149215021512152215321542155215621572158215921602161216221632164216521662167216821692170217121722173217421752176217721782179218021812182218321842185218621872188218921902191219221932194219521962197219821992200220122022203220422052206220722082209221022112212221322142215221622172218221922202221222222232224222522262227222822292230223122322233223422352236223722382239224022412242224322442245224622472248224922502251225222532254225522562257225822592260226122622263226422652266226722682269227022712272227322742275227622772278227922802281228222832284228522862287228822892290229122922293229422952296229722982299230023012302230323042305230623072308230923102311231223132314231523162317231823192320232123222323232423252326232723282329233023312332233323342335233623372338233923402341234223432344234523462347234823492350235123522353235423552356235723582359236023612362236323642365236623672368236923702371237223732374237523762377237823792380238123822383238423852386238723882389239023912392239323942395239623972398239924002401240224032404240524062407240824092410241124122413241424152416241724182419242024212422242324242425242624272428242924302431243224332434243524362437243824392440244124422443244424452446244724482449245024512452245324542455245624572458245924602461246224632464246524662467246824692470247124722473247424752476247724782479248024812482248324842485248624872488248924902491249224932494249524962497249824992500250125022503250425052506250725082509251025112512251325142515251625172518251925202521252225232524252525262527252825292530253125322533253425352536253725382539254025412542254325442545254625472548254925502551255225532554255525562557255825592560256125622563256425652566256725682569257025712572257325742575257625772578257925802581258225832584258525862587258825892590259125922593259425952596259725982599260026012602260326042605260626072608260926102611261226132614261526162617261826192620262126222623262426252626262726282629263026312632263326342635263626372638263926402641264226432644264526462647264826492650265126522653265426552656265726582659266026612662266326642665266626672668266926702671267226732674267526762677267826792680268126822683268426852686268726882689269026912692269326942695269626972698269927002701270227032704270527062707270827092710271127122713271427152716271727182719272027212722272327242725272627272728272927302731273227332734273527362737273827392740274127422743274427452746274727482749275027512752275327542755275627572758275927602761276227632764276527662767276827692770277127722773277427752776277727782779278027812782278327842785278627872788278927902791279227932794279527962797279827992800280128022803280428052806280728082809281028112812281328142815281628172818281928202821282228232824282528262827282828292830283128322833283428352836283728382839284028412842284328442845284628472848284928502851285228532854285528562857285828592860286128622863286428652866286728682869287028712872287328742875287628772878287928802881288228832884288528862887288828892890289128922893289428952896289728982899290029012902290329042905290629072908290929102911291229132914291529162917291829192920292129222923292429252926292729282929293029312932293329342935293629372938293929402941294229432944294529462947294829492950295129522953295429552956295729582959296029612962296329642965296629672968296929702971297229732974297529762977297829792980298129822983298429852986298729882989299029912992299329942995299629972998299930003001300230033004300530063007300830093010301130123013301430153016301730183019302030213022302330243025302630273028302930303031303230333034303530363037303830393040304130423043304430453046304730483049305030513052305330543055305630573058305930603061306230633064306530663067306830693070307130723073307430753076307730783079308030813082308330843085308630873088308930903091309230933094309530963097309830993100310131023103310431053106310731083109311031113112311331143115311631173118311931203121312231233124312531263127312831293130313131323133313431353136313731383139314031413142314331443145314631473148314931503151315231533154315531563157315831593160316131623163316431653166316731683169317031713172317331743175317631773178317931803181318231833184318531863187318831893190319131923193319431953196319731983199320032013202320332043205320632073208320932103211321232133214321532163217321832193220322132223223322432253226322732283229323032313232323332343235323632373238323932403241324232433244324532463247324832493250325132523253325432553256325732583259326032613262326332643265326632673268326932703271327232733274327532763277327832793280328132823283328432853286328732883289329032913292329332943295329632973298329933003301330233033304330533063307330833093310331133123313331433153316331733183319332033213322332333243325332633273328332933303331333233333334333533363337333833393340334133423343334433453346334733483349335033513352335333543355335633573358335933603361336233633364336533663367336833693370337133723373337433753376337733783379338033813382338333843385338633873388338933903391339233933394339533963397339833993400340134023403340434053406340734083409341034113412341334143415341634173418341934203421342234233424342534263427342834293430343134323433343434353436343734383439344034413442344334443445344634473448344934503451345234533454345534563457345834593460346134623463346434653466346734683469347034713472347334743475347634773478347934803481348234833484348534863487348834893490349134923493349434953496349734983499350035013502350335043505350635073508350935103511351235133514351535163517351835193520352135223523352435253526352735283529353035313532353335343535353635373538353935403541354235433544354535463547354835493550355135523553355435553556355735583559356035613562356335643565356635673568356935703571357235733574357535763577357835793580358135823583358435853586358735883589359035913592359335943595359635973598359936003601360236033604360536063607360836093610361136123613361436153616361736183619362036213622362336243625362636273628362936303631363236333634363536363637363836393640364136423643364436453646364736483649365036513652365336543655365636573658365936603661366236633664366536663667366836693670367136723673367436753676367736783679368036813682368336843685368636873688368936903691369236933694369536963697369836993700370137023703370437053706370737083709371037113712371337143715371637173718371937203721372237233724372537263727372837293730373137323733373437353736373737383739374037413742374337443745374637473748374937503751375237533754375537563757375837593760376137623763376437653766376737683769377037713772377337743775377637773778377937803781378237833784378537863787378837893790379137923793379437953796379737983799380038013802380338043805380638073808380938103811381238133814381538163817381838193820382138223823382438253826382738283829383038313832383338343835383638373838383938403841384238433844384538463847384838493850385138523853385438553856385738583859386038613862386338643865386638673868386938703871387238733874387538763877387838793880388138823883388438853886388738883889389038913892389338943895389638973898389939003901390239033904390539063907390839093910391139123913391439153916391739183919392039213922392339243925392639273928392939303931393239333934393539363937393839393940394139423943394439453946394739483949395039513952395339543955395639573958395939603961396239633964396539663967396839693970397139723973397439753976397739783979398039813982398339843985398639873988398939903991399239933994399539963997399839994000400140024003400440054006400740084009401040114012401340144015401640174018401940204021402240234024402540264027402840294030403140324033403440354036403740384039404040414042404340444045404640474048404940504051405240534054405540564057405840594060406140624063406440654066406740684069407040714072407340744075407640774078407940804081408240834084408540864087408840894090409140924093409440954096409740984099410041014102410341044105410641074108410941104111411241134114411541164117411841194120412141224123412441254126412741284129413041314132413341344135413641374138413941404141414241434144414541464147414841494150415141524153415441554156415741584159416041614162416341644165416641674168416941704171417241734174417541764177417841794180418141824183418441854186418741884189419041914192419341944195419641974198419942004201420242034204420542064207420842094210421142124213421442154216421742184219422042214222422342244225422642274228422942304231423242334234423542364237423842394240424142424243424442454246424742484249425042514252425342544255425642574258425942604261426242634264426542664267426842694270427142724273427442754276427742784279428042814282428342844285428642874288428942904291429242934294429542964297429842994300430143024303430443054306430743084309431043114312431343144315431643174318431943204321432243234324432543264327432843294330433143324333433443354336433743384339434043414342434343444345434643474348434943504351435243534354435543564357435843594360436143624363436443654366436743684369437043714372437343744375437643774378437943804381438243834384438543864387438843894390439143924393439443954396439743984399440044014402440344044405440644074408440944104411441244134414441544164417441844194420442144224423442444254426442744284429443044314432443344344435443644374438443944404441444244434444444544464447444844494450445144524453445444554456445744584459446044614462446344644465446644674468446944704471447244734474447544764477447844794480448144824483448444854486448744884489449044914492449344944495449644974498449945004501450245034504450545064507450845094510451145124513451445154516451745184519452045214522452345244525452645274528452945304531453245334534453545364537453845394540454145424543454445454546454745484549455045514552455345544555455645574558455945604561456245634564456545664567456845694570457145724573457445754576457745784579458045814582458345844585458645874588458945904591459245934594459545964597459845994600460146024603460446054606460746084609461046114612461346144615461646174618461946204621462246234624462546264627462846294630463146324633463446354636463746384639464046414642464346444645464646474648464946504651465246534654465546564657465846594660466146624663466446654666466746684669467046714672467346744675467646774678467946804681468246834684468546864687468846894690469146924693469446954696469746984699470047014702470347044705470647074708470947104711471247134714471547164717471847194720472147224723472447254726472747284729473047314732473347344735473647374738473947404741474247434744474547464747474847494750475147524753475447554756475747584759476047614762476347644765476647674768476947704771477247734774477547764777477847794780478147824783478447854786478747884789479047914792479347944795479647974798479948004801480248034804480548064807480848094810481148124813481448154816481748184819482048214822482348244825482648274828482948304831483248334834483548364837483848394840484148424843484448454846484748484849485048514852485348544855485648574858485948604861486248634864486548664867486848694870487148724873487448754876487748784879488048814882488348844885488648874888488948904891489248934894489548964897489848994900490149024903490449054906490749084909491049114912491349144915491649174918491949204921492249234924492549264927492849294930493149324933493449354936493749384939494049414942494349444945494649474948494949504951495249534954495549564957495849594960496149624963496449654966496749684969497049714972497349744975497649774978497949804981498249834984498549864987498849894990499149924993499449954996499749984999500050015002500350045005500650075008500950105011501250135014501550165017501850195020502150225023502450255026502750285029503050315032503350345035503650375038503950405041504250435044504550465047504850495050505150525053505450555056505750585059506050615062506350645065506650675068506950705071507250735074507550765077507850795080508150825083508450855086508750885089509050915092509350945095509650975098509951005101510251035104510551065107510851095110511151125113511451155116511751185119512051215122512351245125512651275128512951305131513251335134513551365137513851395140514151425143514451455146514751485149515051515152515351545155515651575158515951605161516251635164516551665167516851695170517151725173517451755176517751785179518051815182518351845185518651875188518951905191519251935194519551965197519851995200520152025203520452055206520752085209521052115212521352145215521652175218521952205221522252235224522552265227522852295230523152325233523452355236523752385239524052415242524352445245524652475248524952505251525252535254525552565257525852595260526152625263526452655266526752685269527052715272527352745275527652775278527952805281528252835284528552865287528852895290529152925293529452955296529752985299530053015302530353045305530653075308530953105311531253135314531553165317531853195320532153225323532453255326532753285329533053315332533353345335533653375338533953405341534253435344534553465347534853495350535153525353535453555356535753585359536053615362536353645365536653675368536953705371537253735374537553765377537853795380538153825383538453855386538753885389539053915392539353945395539653975398539954005401540254035404540554065407540854095410541154125413541454155416541754185419542054215422542354245425542654275428542954305431543254335434543554365437543854395440544154425443544454455446544754485449545054515452545354545455545654575458545954605461546254635464546554665467546854695470547154725473547454755476547754785479548054815482548354845485548654875488548954905491549254935494549554965497549854995500550155025503550455055506550755085509551055115512551355145515551655175518551955205521552255235524552555265527552855295530553155325533553455355536553755385539554055415542554355445545554655475548554955505551555255535554555555565557555855595560556155625563556455655566556755685569557055715572557355745575557655775578557955805581558255835584558555865587558855895590559155925593559455955596559755985599560056015602560356045605560656075608560956105611561256135614561556165617561856195620562156225623562456255626562756285629563056315632563356345635563656375638563956405641564256435644564556465647564856495650565156525653565456555656565756585659566056615662566356645665566656675668566956705671567256735674567556765677567856795680568156825683568456855686568756885689569056915692569356945695569656975698569957005701570257035704570557065707570857095710571157125713571457155716571757185719572057215722572357245725572657275728572957305731573257335734573557365737573857395740574157425743574457455746574757485749575057515752575357545755575657575758575957605761576257635764576557665767576857695770577157725773577457755776577757785779578057815782578357845785578657875788578957905791579257935794579557965797579857995800580158025803580458055806580758085809581058115812581358145815581658175818581958205821582258235824582558265827582858295830583158325833583458355836583758385839584058415842584358445845584658475848584958505851585258535854585558565857585858595860586158625863586458655866586758685869587058715872587358745875587658775878587958805881588258835884588558865887588858895890589158925893589458955896589758985899590059015902590359045905590659075908590959105911591259135914591559165917591859195920592159225923592459255926592759285929593059315932593359345935593659375938593959405941594259435944594559465947594859495950595159525953595459555956595759585959596059615962596359645965596659675968596959705971597259735974597559765977597859795980598159825983598459855986598759885989599059915992599359945995599659975998599960006001600260036004600560066007600860096010601160126013601460156016601760186019602060216022602360246025602660276028602960306031603260336034603560366037603860396040604160426043604460456046604760486049605060516052605360546055605660576058605960606061606260636064606560666067606860696070607160726073607460756076607760786079608060816082608360846085608660876088608960906091609260936094609560966097609860996100610161026103610461056106610761086109611061116112611361146115611661176118611961206121612261236124612561266127612861296130613161326133613461356136613761386139614061416142614361446145614661476148614961506151615261536154615561566157615861596160616161626163616461656166616761686169617061716172617361746175617661776178617961806181618261836184618561866187618861896190619161926193619461956196619761986199620062016202620362046205620662076208620962106211621262136214621562166217621862196220622162226223622462256226622762286229623062316232623362346235623662376238623962406241624262436244624562466247624862496250625162526253625462556256625762586259626062616262626362646265626662676268626962706271627262736274627562766277627862796280628162826283628462856286628762886289629062916292629362946295629662976298629963006301630263036304630563066307630863096310631163126313631463156316631763186319632063216322632363246325632663276328632963306331633263336334633563366337633863396340634163426343634463456346634763486349635063516352635363546355635663576358635963606361636263636364636563666367636863696370637163726373637463756376637763786379638063816382638363846385638663876388638963906391639263936394639563966397639863996400640164026403640464056406640764086409641064116412641364146415641664176418641964206421642264236424642564266427642864296430643164326433643464356436643764386439644064416442644364446445644664476448644964506451645264536454645564566457645864596460646164626463646464656466646764686469647064716472647364746475647664776478647964806481648264836484648564866487648864896490649164926493649464956496649764986499650065016502650365046505650665076508650965106511651265136514651565166517651865196520652165226523652465256526652765286529653065316532653365346535653665376538653965406541654265436544654565466547654865496550655165526553655465556556655765586559656065616562656365646565656665676568656965706571657265736574657565766577657865796580658165826583658465856586658765886589659065916592659365946595659665976598659966006601660266036604660566066607660866096610661166126613661466156616661766186619662066216622662366246625662666276628662966306631663266336634663566366637663866396640664166426643664466456646664766486649665066516652665366546655665666576658665966606661666266636664666566666667666866696670667166726673667466756676667766786679668066816682668366846685668666876688668966906691669266936694669566966697669866996700670167026703670467056706670767086709671067116712671367146715671667176718671967206721672267236724672567266727672867296730673167326733673467356736673767386739674067416742674367446745674667476748674967506751675267536754675567566757675867596760676167626763676467656766676767686769677067716772677367746775677667776778677967806781678267836784678567866787678867896790679167926793679467956796679767986799680068016802680368046805680668076808680968106811681268136814681568166817681868196820682168226823682468256826682768286829683068316832683368346835683668376838683968406841684268436844684568466847684868496850685168526853685468556856685768586859686068616862686368646865686668676868686968706871687268736874687568766877687868796880688168826883688468856886688768886889689068916892689368946895689668976898689969006901690269036904690569066907690869096910691169126913691469156916691769186919692069216922692369246925692669276928692969306931693269336934693569366937693869396940694169426943694469456946694769486949695069516952695369546955695669576958695969606961696269636964696569666967696869696970697169726973697469756976697769786979698069816982698369846985698669876988698969906991699269936994699569966997699869997000700170027003700470057006700770087009701070117012701370147015701670177018701970207021702270237024702570267027702870297030703170327033703470357036703770387039704070417042704370447045704670477048704970507051705270537054705570567057705870597060706170627063706470657066706770687069707070717072707370747075707670777078707970807081708270837084708570867087708870897090709170927093709470957096709770987099710071017102710371047105710671077108710971107111711271137114711571167117711871197120712171227123712471257126712771287129713071317132713371347135713671377138713971407141714271437144714571467147714871497150715171527153715471557156715771587159716071617162716371647165716671677168716971707171717271737174717571767177717871797180718171827183718471857186718771887189719071917192719371947195719671977198719972007201720272037204720572067207720872097210721172127213721472157216721772187219722072217222722372247225722672277228722972307231723272337234723572367237723872397240724172427243724472457246724772487249725072517252725372547255725672577258725972607261726272637264726572667267726872697270727172727273727472757276727772787279728072817282728372847285728672877288728972907291729272937294729572967297729872997300730173027303730473057306730773087309731073117312731373147315731673177318731973207321732273237324732573267327732873297330733173327333733473357336733773387339734073417342734373447345734673477348734973507351735273537354735573567357735873597360736173627363736473657366736773687369737073717372737373747375737673777378737973807381738273837384738573867387738873897390739173927393739473957396739773987399740074017402740374047405740674077408740974107411741274137414741574167417741874197420742174227423742474257426742774287429743074317432743374347435743674377438743974407441744274437444744574467447744874497450745174527453745474557456745774587459746074617462746374647465746674677468746974707471747274737474747574767477747874797480748174827483748474857486748774887489749074917492749374947495749674977498749975007501750275037504750575067507750875097510751175127513751475157516751775187519752075217522752375247525752675277528752975307531753275337534753575367537753875397540754175427543754475457546754775487549755075517552755375547555755675577558755975607561756275637564756575667567756875697570757175727573757475757576757775787579758075817582758375847585758675877588758975907591759275937594759575967597759875997600760176027603760476057606760776087609761076117612761376147615761676177618761976207621762276237624762576267627762876297630763176327633763476357636763776387639764076417642764376447645764676477648764976507651765276537654765576567657765876597660766176627663766476657666766776687669767076717672767376747675767676777678767976807681768276837684768576867687768876897690769176927693769476957696769776987699770077017702770377047705770677077708770977107711771277137714771577167717771877197720772177227723772477257726772777287729773077317732773377347735773677377738773977407741774277437744774577467747774877497750775177527753775477557756775777587759776077617762776377647765776677677768776977707771777277737774777577767777777877797780778177827783778477857786778777887789779077917792779377947795779677977798779978007801780278037804780578067807780878097810781178127813781478157816781778187819782078217822782378247825782678277828782978307831783278337834783578367837783878397840784178427843784478457846784778487849785078517852785378547855785678577858785978607861786278637864786578667867786878697870787178727873787478757876787778787879788078817882788378847885788678877888788978907891789278937894789578967897789878997900790179027903790479057906790779087909791079117912791379147915791679177918791979207921792279237924792579267927792879297930793179327933793479357936793779387939794079417942794379447945794679477948794979507951795279537954795579567957795879597960796179627963796479657966796779687969797079717972797379747975797679777978797979807981798279837984798579867987798879897990799179927993799479957996799779987999800080018002800380048005800680078008800980108011801280138014801580168017801880198020802180228023802480258026802780288029803080318032803380348035803680378038803980408041804280438044804580468047804880498050805180528053805480558056805780588059806080618062806380648065806680678068806980708071807280738074807580768077807880798080808180828083808480858086808780888089809080918092809380948095809680978098809981008101810281038104810581068107810881098110811181128113811481158116811781188119812081218122812381248125812681278128812981308131813281338134813581368137813881398140814181428143814481458146814781488149815081518152815381548155815681578158815981608161816281638164816581668167816881698170817181728173817481758176817781788179818081818182818381848185818681878188818981908191819281938194819581968197819881998200820182028203820482058206820782088209821082118212821382148215821682178218821982208221822282238224822582268227822882298230823182328233823482358236823782388239824082418242824382448245824682478248824982508251825282538254825582568257825882598260826182628263826482658266826782688269827082718272827382748275827682778278827982808281828282838284828582868287828882898290829182928293829482958296829782988299830083018302830383048305830683078308830983108311831283138314831583168317831883198320832183228323832483258326832783288329833083318332833383348335833683378338833983408341834283438344834583468347834883498350835183528353835483558356835783588359836083618362836383648365836683678368836983708371837283738374837583768377837883798380838183828383838483858386838783888389839083918392839383948395839683978398839984008401840284038404840584068407840884098410841184128413841484158416841784188419842084218422842384248425842684278428842984308431843284338434843584368437843884398440844184428443844484458446844784488449845084518452845384548455845684578458845984608461846284638464846584668467846884698470847184728473847484758476847784788479848084818482848384848485848684878488848984908491849284938494849584968497849884998500850185028503850485058506850785088509851085118512851385148515851685178518851985208521852285238524852585268527852885298530853185328533853485358536853785388539854085418542854385448545854685478548854985508551855285538554855585568557855885598560856185628563856485658566856785688569857085718572857385748575857685778578857985808581858285838584858585868587858885898590859185928593859485958596859785988599860086018602860386048605860686078608860986108611861286138614861586168617861886198620862186228623862486258626862786288629863086318632863386348635863686378638863986408641864286438644864586468647864886498650865186528653865486558656865786588659866086618662866386648665866686678668866986708671867286738674867586768677867886798680868186828683868486858686868786888689869086918692869386948695869686978698869987008701870287038704870587068707870887098710871187128713871487158716871787188719872087218722872387248725872687278728872987308731873287338734873587368737873887398740874187428743874487458746874787488749875087518752875387548755875687578758875987608761876287638764876587668767876887698770877187728773877487758776877787788779878087818782878387848785878687878788878987908791879287938794879587968797879887998800880188028803880488058806880788088809881088118812881388148815881688178818881988208821882288238824882588268827882888298830883188328833883488358836883788388839884088418842884388448845884688478848884988508851885288538854885588568857885888598860886188628863886488658866886788688869887088718872887388748875887688778878887988808881888288838884888588868887888888898890889188928893889488958896889788988899890089018902890389048905890689078908890989108911891289138914891589168917891889198920892189228923892489258926892789288929893089318932893389348935893689378938893989408941894289438944894589468947894889498950895189528953895489558956895789588959896089618962896389648965896689678968896989708971897289738974897589768977897889798980898189828983898489858986898789888989899089918992899389948995899689978998899990009001900290039004900590069007900890099010901190129013901490159016901790189019902090219022902390249025902690279028902990309031903290339034903590369037903890399040904190429043904490459046904790489049905090519052905390549055905690579058905990609061906290639064906590669067906890699070907190729073907490759076907790789079908090819082908390849085908690879088908990909091909290939094909590969097909890999100910191029103910491059106910791089109911091119112911391149115911691179118911991209121912291239124912591269127912891299130913191329133913491359136913791389139914091419142914391449145914691479148914991509151915291539154915591569157915891599160916191629163916491659166916791689169917091719172917391749175917691779178917991809181918291839184918591869187918891899190919191929193919491959196919791989199920092019202920392049205920692079208920992109211921292139214921592169217921892199220922192229223922492259226922792289229923092319232923392349235923692379238923992409241924292439244924592469247924892499250925192529253925492559256925792589259926092619262926392649265926692679268926992709271927292739274927592769277927892799280928192829283928492859286928792889289929092919292929392949295929692979298929993009301930293039304930593069307930893099310931193129313931493159316931793189319932093219322932393249325932693279328932993309331933293339334933593369337933893399340934193429343934493459346934793489349935093519352935393549355935693579358935993609361936293639364936593669367936893699370937193729373937493759376937793789379938093819382938393849385938693879388938993909391939293939394939593969397939893999400940194029403940494059406940794089409941094119412941394149415941694179418941994209421942294239424942594269427942894299430943194329433943494359436943794389439944094419442944394449445944694479448944994509451945294539454945594569457945894599460946194629463946494659466946794689469947094719472947394749475947694779478947994809481948294839484948594869487948894899490949194929493949494959496949794989499950095019502950395049505950695079508950995109511951295139514951595169517951895199520952195229523952495259526952795289529953095319532953395349535953695379538953995409541954295439544954595469547954895499550955195529553955495559556955795589559956095619562956395649565956695679568956995709571957295739574957595769577957895799580958195829583958495859586958795889589959095919592959395949595959695979598959996009601960296039604960596069607960896099610961196129613961496159616961796189619962096219622962396249625962696279628962996309631963296339634963596369637963896399640964196429643964496459646964796489649965096519652965396549655965696579658965996609661966296639664966596669667966896699670967196729673967496759676967796789679968096819682968396849685968696879688968996909691969296939694969596969697969896999700970197029703970497059706970797089709971097119712971397149715971697179718971997209721972297239724972597269727972897299730973197329733973497359736973797389739974097419742974397449745974697479748974997509751975297539754975597569757975897599760976197629763976497659766976797689769977097719772977397749775977697779778977997809781978297839784978597869787978897899790979197929793979497959796979797989799980098019802980398049805980698079808980998109811981298139814981598169817981898199820982198229823982498259826982798289829983098319832983398349835983698379838983998409841984298439844984598469847984898499850985198529853985498559856985798589859986098619862986398649865986698679868986998709871987298739874987598769877987898799880988198829883988498859886988798889889989098919892989398949895989698979898989999009901990299039904990599069907990899099910991199129913991499159916991799189919992099219922992399249925992699279928992999309931993299339934993599369937993899399940994199429943994499459946994799489949995099519952995399549955995699579958995999609961996299639964996599669967996899699970997199729973997499759976997799789979998099819982998399849985998699879988998999909991999299939994999599969997999899991000010001100021000310004100051000610007100081000910010100111001210013100141001510016100171001810019100201002110022100231002410025100261002710028100291003010031100321003310034100351003610037100381003910040100411004210043100441004510046100471004810049100501005110052100531005410055100561005710058100591006010061100621006310064100651006610067100681006910070100711007210073100741007510076100771007810079100801008110082100831008410085100861008710088100891009010091100921009310094100951009610097100981009910100101011010210103101041010510106101071010810109101101011110112101131011410115101161011710118101191012010121101221012310124101251012610127101281012910130101311013210133101341013510136101371013810139101401014110142101431014410145101461014710148101491015010151101521015310154101551015610157101581015910160101611016210163101641016510166101671016810169101701017110172101731017410175101761017710178101791018010181101821018310184101851018610187101881018910190101911019210193101941019510196101971019810199102001020110202102031020410205102061020710208102091021010211102121021310214102151021610217102181021910220102211022210223102241022510226102271022810229102301023110232102331023410235102361023710238102391024010241102421024310244102451024610247102481024910250102511025210253102541025510256102571025810259102601026110262102631026410265102661026710268102691027010271102721027310274102751027610277102781027910280102811028210283102841028510286102871028810289102901029110292102931029410295102961029710298102991030010301103021030310304103051030610307103081030910310103111031210313103141031510316103171031810319103201032110322103231032410325103261032710328103291033010331103321033310334103351033610337103381033910340103411034210343103441034510346103471034810349103501035110352103531035410355103561035710358103591036010361103621036310364103651036610367103681036910370103711037210373103741037510376103771037810379103801038110382103831038410385103861038710388103891039010391103921039310394103951039610397103981039910400104011040210403104041040510406104071040810409104101041110412104131041410415104161041710418104191042010421104221042310424104251042610427104281042910430104311043210433104341043510436104371043810439104401044110442104431044410445104461044710448104491045010451104521045310454104551045610457104581045910460104611046210463104641046510466104671046810469104701047110472104731047410475104761047710478104791048010481104821048310484104851048610487104881048910490104911049210493104941049510496104971049810499105001050110502105031050410505105061050710508105091051010511105121051310514105151051610517105181051910520105211052210523105241052510526105271052810529105301053110532105331053410535105361053710538105391054010541105421054310544105451054610547105481054910550105511055210553105541055510556105571055810559105601056110562105631056410565105661056710568105691057010571105721057310574105751057610577105781057910580105811058210583105841058510586105871058810589105901059110592105931059410595105961059710598105991060010601106021060310604106051060610607106081060910610106111061210613106141061510616106171061810619106201062110622106231062410625106261062710628106291063010631106321063310634106351063610637106381063910640106411064210643106441064510646106471064810649106501065110652106531065410655106561065710658106591066010661106621066310664106651066610667106681066910670106711067210673106741067510676106771067810679106801068110682106831068410685106861068710688106891069010691106921069310694106951069610697106981069910700107011070210703107041070510706107071070810709107101071110712107131071410715107161071710718107191072010721107221072310724107251072610727107281072910730107311073210733107341073510736107371073810739107401074110742107431074410745107461074710748107491075010751107521075310754107551075610757107581075910760107611076210763107641076510766107671076810769107701077110772107731077410775107761077710778107791078010781107821078310784107851078610787107881078910790107911079210793107941079510796107971079810799108001080110802108031080410805108061080710808108091081010811108121081310814108151081610817108181081910820108211082210823108241082510826108271082810829108301083110832108331083410835108361083710838108391084010841108421084310844108451084610847108481084910850108511085210853108541085510856108571085810859108601086110862108631086410865108661086710868108691087010871108721087310874108751087610877108781087910880108811088210883108841088510886108871088810889108901089110892108931089410895108961089710898108991090010901109021090310904109051090610907109081090910910109111091210913109141091510916109171091810919109201092110922109231092410925109261092710928109291093010931109321093310934109351093610937109381093910940109411094210943109441094510946109471094810949109501095110952109531095410955109561095710958109591096010961109621096310964109651096610967109681096910970109711097210973109741097510976109771097810979109801098110982109831098410985109861098710988109891099010991109921099310994109951099610997109981099911000110011100211003110041100511006110071100811009110101101111012110131101411015110161101711018110191102011021110221102311024110251102611027110281102911030110311103211033110341103511036110371103811039110401104111042110431104411045110461104711048110491105011051110521105311054110551105611057110581105911060110611106211063110641106511066110671106811069110701107111072110731107411075110761107711078110791108011081110821108311084110851108611087110881108911090110911109211093110941109511096110971109811099111001110111102111031110411105111061110711108111091111011111111121111311114111151111611117111181111911120111211112211123111241112511126111271112811129111301113111132111331113411135111361113711138111391114011141111421114311144111451114611147111481114911150111511115211153111541115511156111571115811159111601116111162111631116411165111661116711168111691117011171111721117311174111751117611177111781117911180111811118211183111841118511186111871118811189111901119111192111931119411195111961119711198111991120011201112021120311204112051120611207112081120911210112111121211213112141121511216112171121811219112201122111222112231122411225112261122711228112291123011231112321123311234112351123611237112381123911240112411124211243112441124511246112471124811249112501125111252112531125411255112561125711258112591126011261112621126311264112651126611267112681126911270112711127211273112741127511276112771127811279112801128111282112831128411285112861128711288112891129011291112921129311294112951129611297112981129911300113011130211303113041130511306113071130811309113101131111312113131131411315113161131711318113191132011321113221132311324113251132611327113281132911330113311133211333113341133511336113371133811339113401134111342113431134411345113461134711348113491135011351113521135311354113551135611357113581135911360113611136211363113641136511366113671136811369113701137111372113731137411375113761137711378113791138011381113821138311384113851138611387113881138911390113911139211393113941139511396113971139811399114001140111402114031140411405114061140711408114091141011411114121141311414114151141611417114181141911420114211142211423114241142511426114271142811429114301143111432114331143411435114361143711438114391144011441114421144311444114451144611447114481144911450114511145211453114541145511456114571145811459114601146111462114631146411465114661146711468114691147011471114721147311474114751147611477114781147911480114811148211483114841148511486114871148811489114901149111492114931149411495114961149711498114991150011501115021150311504115051150611507115081150911510115111151211513115141151511516115171151811519115201152111522115231152411525115261152711528115291153011531115321153311534115351153611537115381153911540115411154211543115441154511546115471154811549115501155111552115531155411555115561155711558115591156011561115621156311564115651156611567115681156911570115711157211573115741157511576115771157811579115801158111582115831158411585115861158711588115891159011591115921159311594115951159611597115981159911600116011160211603116041160511606116071160811609116101161111612116131161411615116161161711618116191162011621116221162311624116251162611627116281162911630116311163211633116341163511636116371163811639116401164111642116431164411645116461164711648116491165011651116521165311654116551165611657116581165911660116611166211663116641166511666116671166811669116701167111672116731167411675116761167711678116791168011681116821168311684116851168611687116881168911690116911169211693116941169511696116971169811699117001170111702117031170411705117061170711708117091171011711117121171311714117151171611717117181171911720117211172211723117241172511726117271172811729117301173111732117331173411735117361173711738117391174011741117421174311744117451174611747117481174911750117511175211753117541175511756117571175811759117601176111762117631176411765117661176711768117691177011771117721177311774117751177611777117781177911780117811178211783117841178511786117871178811789117901179111792117931179411795117961179711798117991180011801118021180311804118051180611807118081180911810118111181211813118141181511816118171181811819118201182111822118231182411825118261182711828118291183011831118321183311834118351183611837118381183911840118411184211843118441184511846118471184811849118501185111852118531185411855118561185711858118591186011861118621186311864118651186611867118681186911870118711187211873118741187511876118771187811879118801188111882118831188411885118861188711888118891189011891118921189311894118951189611897118981189911900119011190211903119041190511906119071190811909119101191111912119131191411915119161191711918119191192011921119221192311924119251192611927119281192911930119311193211933119341193511936119371193811939119401194111942119431194411945119461194711948119491195011951119521195311954119551195611957119581195911960119611196211963119641196511966119671196811969119701197111972119731197411975119761197711978119791198011981119821198311984119851198611987119881198911990119911199211993119941199511996119971199811999120001200112002120031200412005120061200712008120091201012011120121201312014120151201612017120181201912020120211202212023120241202512026120271202812029120301203112032120331203412035120361203712038120391204012041120421204312044120451204612047120481204912050120511205212053120541205512056120571205812059120601206112062120631206412065120661206712068120691207012071120721207312074120751207612077120781207912080120811208212083120841208512086120871208812089120901209112092120931209412095120961209712098120991210012101121021210312104121051210612107121081210912110121111211212113121141211512116121171211812119121201212112122121231212412125121261212712128121291213012131121321213312134121351213612137121381213912140121411214212143121441214512146121471214812149121501215112152121531215412155121561215712158121591216012161121621216312164121651216612167121681216912170121711217212173121741217512176121771217812179121801218112182121831218412185121861218712188121891219012191121921219312194121951219612197121981219912200122011220212203122041220512206122071220812209122101221112212122131221412215122161221712218122191222012221122221222312224122251222612227122281222912230122311223212233122341223512236122371223812239122401224112242122431224412245122461224712248122491225012251122521225312254122551225612257122581225912260122611226212263122641226512266122671226812269122701227112272122731227412275122761227712278122791228012281122821228312284122851228612287122881228912290122911229212293122941229512296122971229812299123001230112302123031230412305123061230712308123091231012311123121231312314123151231612317123181231912320123211232212323123241232512326123271232812329123301233112332123331233412335123361233712338123391234012341123421234312344123451234612347123481234912350123511235212353123541235512356123571235812359123601236112362123631236412365123661236712368123691237012371123721237312374123751237612377123781237912380123811238212383123841238512386123871238812389123901239112392123931239412395123961239712398123991240012401124021240312404124051240612407124081240912410124111241212413124141241512416124171241812419124201242112422124231242412425124261242712428124291243012431124321243312434124351243612437124381243912440124411244212443124441244512446124471244812449124501245112452124531245412455124561245712458124591246012461124621246312464124651246612467124681246912470124711247212473124741247512476124771247812479124801248112482124831248412485124861248712488124891249012491124921249312494124951249612497124981249912500125011250212503125041250512506125071250812509125101251112512125131251412515125161251712518125191252012521125221252312524125251252612527125281252912530125311253212533125341253512536125371253812539125401254112542125431254412545125461254712548125491255012551125521255312554125551255612557125581255912560125611256212563125641256512566125671256812569125701257112572125731257412575125761257712578125791258012581125821258312584125851258612587125881258912590125911259212593125941259512596125971259812599126001260112602126031260412605126061260712608126091261012611126121261312614126151261612617126181261912620126211262212623126241262512626126271262812629126301263112632126331263412635126361263712638126391264012641126421264312644126451264612647126481264912650126511265212653126541265512656126571265812659126601266112662126631266412665126661266712668126691267012671126721267312674126751267612677126781267912680126811268212683126841268512686126871268812689126901269112692126931269412695126961269712698126991270012701127021270312704127051270612707127081270912710127111271212713127141271512716127171271812719127201272112722127231272412725127261272712728127291273012731127321273312734127351273612737127381273912740127411274212743127441274512746127471274812749127501275112752127531275412755127561275712758127591276012761127621276312764127651276612767127681276912770127711277212773127741277512776127771277812779127801278112782127831278412785127861278712788127891279012791127921279312794127951279612797127981279912800128011280212803128041280512806128071280812809128101281112812128131281412815128161281712818128191282012821128221282312824128251282612827128281282912830128311283212833128341283512836128371283812839128401284112842128431284412845128461284712848128491285012851128521285312854128551285612857128581285912860128611286212863128641286512866128671286812869128701287112872128731287412875128761287712878128791288012881128821288312884128851288612887128881288912890128911289212893128941289512896128971289812899129001290112902129031290412905129061290712908129091291012911129121291312914129151291612917129181291912920129211292212923129241292512926129271292812929129301293112932129331293412935129361293712938129391294012941129421294312944129451294612947129481294912950129511295212953129541295512956129571295812959129601296112962129631296412965129661296712968129691297012971129721297312974129751297612977129781297912980129811298212983129841298512986129871298812989129901299112992129931299412995129961299712998129991300013001130021300313004130051300613007130081300913010130111301213013130141301513016130171301813019130201302113022130231302413025130261302713028130291303013031130321303313034130351303613037130381303913040130411304213043130441304513046130471304813049130501305113052130531305413055130561305713058130591306013061130621306313064130651306613067130681306913070130711307213073130741307513076130771307813079130801308113082130831308413085130861308713088130891309013091130921309313094130951309613097130981309913100131011310213103131041310513106131071310813109131101311113112131131311413115131161311713118131191312013121131221312313124131251312613127131281312913130131311313213133131341313513136131371313813139131401314113142131431314413145131461314713148131491315013151131521315313154131551315613157131581315913160131611316213163131641316513166131671316813169131701317113172131731317413175131761317713178131791318013181131821318313184131851318613187131881318913190131911319213193131941319513196131971319813199132001320113202132031320413205132061320713208132091321013211132121321313214132151321613217132181321913220132211322213223132241322513226132271322813229132301323113232132331323413235132361323713238132391324013241132421324313244132451324613247132481324913250132511325213253132541325513256132571325813259132601326113262132631326413265132661326713268132691327013271132721327313274132751327613277132781327913280132811328213283132841328513286132871328813289132901329113292132931329413295132961329713298132991330013301133021330313304133051330613307133081330913310133111331213313133141331513316133171331813319133201332113322133231332413325133261332713328133291333013331133321333313334133351333613337133381333913340133411334213343133441334513346133471334813349133501335113352133531335413355133561335713358133591336013361133621336313364133651336613367133681336913370133711337213373133741337513376133771337813379133801338113382133831338413385133861338713388133891339013391133921339313394133951339613397133981339913400134011340213403134041340513406134071340813409134101341113412134131341413415134161341713418134191342013421134221342313424134251342613427134281342913430134311343213433134341343513436134371343813439134401344113442134431344413445134461344713448134491345013451134521345313454134551345613457134581345913460134611346213463134641346513466134671346813469134701347113472134731347413475134761347713478134791348013481134821348313484134851348613487134881348913490134911349213493134941349513496134971349813499135001350113502135031350413505135061350713508135091351013511135121351313514135151351613517135181351913520135211352213523135241352513526135271352813529135301353113532135331353413535135361353713538135391354013541135421354313544135451354613547135481354913550135511355213553135541355513556135571355813559135601356113562135631356413565135661356713568135691357013571135721357313574135751357613577135781357913580135811358213583135841358513586135871358813589135901359113592135931359413595135961359713598135991360013601136021360313604136051360613607136081360913610136111361213613136141361513616136171361813619136201362113622136231362413625136261362713628136291363013631136321363313634136351363613637136381363913640136411364213643136441364513646136471364813649136501365113652136531365413655136561365713658136591366013661136621366313664136651366613667136681366913670136711367213673136741367513676136771367813679136801368113682136831368413685136861368713688136891369013691136921369313694136951369613697136981369913700137011370213703137041370513706137071370813709137101371113712137131371413715137161371713718137191372013721137221372313724137251372613727137281372913730137311373213733137341373513736137371373813739137401374113742137431374413745137461374713748137491375013751137521375313754137551375613757137581375913760137611376213763137641376513766137671376813769137701377113772137731377413775137761377713778137791378013781137821378313784137851378613787137881378913790137911379213793137941379513796137971379813799138001380113802138031380413805138061380713808138091381013811138121381313814138151381613817138181381913820138211382213823138241382513826138271382813829138301383113832138331383413835138361383713838138391384013841138421384313844138451384613847138481384913850138511385213853138541385513856138571385813859138601386113862138631386413865138661386713868138691387013871138721387313874138751387613877138781387913880138811388213883138841388513886138871388813889138901389113892138931389413895138961389713898138991390013901139021390313904139051390613907139081390913910139111391213913139141391513916139171391813919139201392113922139231392413925139261392713928139291393013931139321393313934139351393613937139381393913940139411394213943139441394513946139471394813949139501395113952139531395413955139561395713958139591396013961139621396313964139651396613967139681396913970139711397213973139741397513976139771397813979139801398113982139831398413985139861398713988139891399013991139921399313994139951399613997139981399914000140011400214003140041400514006140071400814009140101401114012140131401414015140161401714018140191402014021140221402314024140251402614027140281402914030140311403214033140341403514036140371403814039140401404114042140431404414045140461404714048140491405014051140521405314054140551405614057140581405914060140611406214063140641406514066140671406814069140701407114072140731407414075140761407714078140791408014081140821408314084140851408614087140881408914090140911409214093140941409514096140971409814099141001410114102141031410414105141061410714108141091411014111141121411314114141151411614117141181411914120141211412214123141241412514126141271412814129141301413114132141331413414135141361413714138141391414014141141421414314144141451414614147141481414914150141511415214153141541415514156141571415814159141601416114162141631416414165141661416714168141691417014171141721417314174141751417614177141781417914180141811418214183141841418514186141871418814189141901419114192141931419414195141961419714198141991420014201142021420314204142051420614207142081420914210142111421214213142141421514216142171421814219142201422114222142231422414225142261422714228142291423014231142321423314234142351423614237142381423914240142411424214243142441424514246142471424814249142501425114252142531425414255142561425714258142591426014261142621426314264142651426614267142681426914270142711427214273142741427514276142771427814279142801428114282142831428414285142861428714288142891429014291142921429314294142951429614297142981429914300143011430214303143041430514306143071430814309143101431114312143131431414315143161431714318143191432014321143221432314324143251432614327143281432914330143311433214333143341433514336143371433814339143401434114342143431434414345143461434714348143491435014351143521435314354143551435614357143581435914360143611436214363143641436514366143671436814369143701437114372143731437414375143761437714378143791438014381143821438314384143851438614387143881438914390143911439214393143941439514396143971439814399144001440114402144031440414405144061440714408144091441014411144121441314414144151441614417144181441914420144211442214423144241442514426144271442814429144301443114432144331443414435144361443714438144391444014441144421444314444144451444614447144481444914450144511445214453144541445514456144571445814459144601446114462144631446414465144661446714468144691447014471144721447314474144751447614477144781447914480144811448214483144841448514486144871448814489144901449114492144931449414495144961449714498144991450014501145021450314504145051450614507145081450914510145111451214513145141451514516145171451814519145201452114522145231452414525145261452714528145291453014531145321453314534145351453614537145381453914540145411454214543145441454514546145471454814549145501455114552145531455414555145561455714558145591456014561145621456314564145651456614567145681456914570145711457214573145741457514576145771457814579145801458114582145831458414585145861458714588145891459014591145921459314594145951459614597145981459914600146011460214603146041460514606146071460814609146101461114612146131461414615146161461714618146191462014621146221462314624146251462614627146281462914630146311463214633146341463514636146371463814639146401464114642146431464414645146461464714648146491465014651146521465314654146551465614657146581465914660146611466214663146641466514666146671466814669146701467114672146731467414675146761467714678146791468014681146821468314684146851468614687146881468914690146911469214693146941469514696146971469814699147001470114702147031470414705147061470714708147091471014711147121471314714147151471614717147181471914720147211472214723147241472514726147271472814729147301473114732147331473414735147361473714738147391474014741147421474314744147451474614747147481474914750147511475214753147541475514756147571475814759147601476114762147631476414765147661476714768147691477014771147721477314774147751477614777147781477914780147811478214783147841478514786147871478814789147901479114792147931479414795147961479714798147991480014801148021480314804148051480614807148081480914810148111481214813148141481514816148171481814819148201482114822148231482414825148261482714828148291483014831148321483314834148351483614837148381483914840148411484214843148441484514846148471484814849148501485114852148531485414855148561485714858148591486014861148621486314864148651486614867148681486914870148711487214873148741487514876148771487814879148801488114882148831488414885148861488714888148891489014891148921489314894148951489614897148981489914900149011490214903149041490514906149071490814909149101491114912149131491414915149161491714918149191492014921149221492314924149251492614927149281492914930149311493214933149341493514936149371493814939149401494114942149431494414945149461494714948149491495014951149521495314954149551495614957149581495914960149611496214963149641496514966149671496814969149701497114972149731497414975149761497714978149791498014981149821498314984149851498614987149881498914990149911499214993149941499514996149971499814999150001500115002150031500415005150061500715008150091501015011150121501315014150151501615017150181501915020150211502215023150241502515026150271502815029150301503115032150331503415035150361503715038150391504015041150421504315044150451504615047150481504915050150511505215053150541505515056150571505815059150601506115062150631506415065150661506715068150691507015071150721507315074150751507615077150781507915080150811508215083150841508515086150871508815089150901509115092150931509415095150961509715098150991510015101151021510315104151051510615107151081510915110151111511215113151141511515116151171511815119151201512115122151231512415125151261512715128151291513015131151321513315134151351513615137151381513915140151411514215143 |
- #pragma clang diagnostic ignored "-Weverything"
-
- # 1 "/long_pathname_so_that_rpms_can_package_the_debug_info/src/out/ubuntu-22.04/22.04/build/hip-on-rocclr/hipamd/src/hiprtc/hip_rtc_gen/hipRTC_header.h"
- # 1 "<built-in>" 1
- # 1 "<built-in>" 3
- # 845 "<built-in>" 3
- # 1 "<command line>" 1
- # 1 "<built-in>" 2
- # 1 "/opt/rocm-6.0.0/lib/llvm/lib/clang/17.0.0/include/__clang_hip_runtime_wrapper.h" 1 3
- # 33 "/opt/rocm-6.0.0/lib/llvm/lib/clang/17.0.0/include/__clang_hip_runtime_wrapper.h" 3
- extern "C" {
- __attribute__((__visibility__("default")))
- __attribute__((weak))
- __attribute__((noreturn))
- __attribute__((device)) void __cxa_pure_virtual(void) {
- __builtin_trap();
- }
- __attribute__((__visibility__("default")))
- __attribute__((weak))
- __attribute__((noreturn))
- __attribute__((device)) void __cxa_deleted_virtual(void) {
- __builtin_trap();
- }
- }
- # 57 "/opt/rocm-6.0.0/lib/llvm/lib/clang/17.0.0/include/__clang_hip_runtime_wrapper.h" 3
- typedef long unsigned int size_t;
- # 74 "/opt/rocm-6.0.0/lib/llvm/lib/clang/17.0.0/include/__clang_hip_runtime_wrapper.h" 3
- typedef long unsigned int __hip_size_t;
- extern "C" {
- extern "C" __attribute__((device)) unsigned long long __ockl_dm_alloc(unsigned long long __size);
- extern "C" __attribute__((device)) void __ockl_dm_dealloc(unsigned long long __addr);
- # 95 "/opt/rocm-6.0.0/lib/llvm/lib/clang/17.0.0/include/__clang_hip_runtime_wrapper.h" 3
- __attribute__((weak)) inline __attribute__((device)) void *malloc(__hip_size_t __size) {
- return (void *) __ockl_dm_alloc(__size);
- }
- __attribute__((weak)) inline __attribute__((device)) void free(void *__ptr) {
- __ockl_dm_dealloc((unsigned long long)__ptr);
- }
- # 124 "/opt/rocm-6.0.0/lib/llvm/lib/clang/17.0.0/include/__clang_hip_runtime_wrapper.h" 3
- }
- # 1 "/opt/rocm-6.0.0/lib/llvm/lib/clang/17.0.0/include/__clang_hip_libdevice_declares.h" 1 3
- # 14 "/opt/rocm-6.0.0/lib/llvm/lib/clang/17.0.0/include/__clang_hip_libdevice_declares.h" 3
- extern "C" {
- __attribute__((device)) __attribute__((const)) float __ocml_acos_f32(float);
- __attribute__((device)) __attribute__((pure)) float __ocml_acosh_f32(float);
- __attribute__((device)) __attribute__((const)) float __ocml_asin_f32(float);
- __attribute__((device)) __attribute__((pure)) float __ocml_asinh_f32(float);
- __attribute__((device)) __attribute__((const)) float __ocml_atan2_f32(float, float);
- __attribute__((device)) __attribute__((const)) float __ocml_atan_f32(float);
- __attribute__((device)) __attribute__((pure)) float __ocml_atanh_f32(float);
- __attribute__((device)) __attribute__((pure)) float __ocml_cbrt_f32(float);
- __attribute__((device)) __attribute__((const)) float __ocml_ceil_f32(float);
- __attribute__((device)) __attribute__((const)) __attribute__((device)) float __ocml_copysign_f32(float,
- float);
- __attribute__((device)) float __ocml_cos_f32(float);
- __attribute__((device)) float __ocml_native_cos_f32(float);
- __attribute__((device)) __attribute__((pure)) __attribute__((device)) float __ocml_cosh_f32(float);
- __attribute__((device)) float __ocml_cospi_f32(float);
- __attribute__((device)) float __ocml_i0_f32(float);
- __attribute__((device)) float __ocml_i1_f32(float);
- __attribute__((device)) __attribute__((pure)) float __ocml_erfc_f32(float);
- __attribute__((device)) __attribute__((pure)) float __ocml_erfcinv_f32(float);
- __attribute__((device)) __attribute__((pure)) float __ocml_erfcx_f32(float);
- __attribute__((device)) __attribute__((pure)) float __ocml_erf_f32(float);
- __attribute__((device)) __attribute__((pure)) float __ocml_erfinv_f32(float);
- __attribute__((device)) __attribute__((pure)) float __ocml_exp10_f32(float);
- __attribute__((device)) __attribute__((pure)) float __ocml_native_exp10_f32(float);
- __attribute__((device)) __attribute__((pure)) float __ocml_exp2_f32(float);
- __attribute__((device)) __attribute__((pure)) float __ocml_exp_f32(float);
- __attribute__((device)) __attribute__((pure)) float __ocml_native_exp_f32(float);
- __attribute__((device)) __attribute__((pure)) float __ocml_expm1_f32(float);
- __attribute__((device)) __attribute__((const)) float __ocml_fabs_f32(float);
- __attribute__((device)) __attribute__((const)) float __ocml_fdim_f32(float, float);
- __attribute__((device)) __attribute__((const)) float __ocml_floor_f32(float);
- __attribute__((device)) __attribute__((const)) float __ocml_fma_f32(float, float, float);
- __attribute__((device)) __attribute__((const)) float __ocml_fmax_f32(float, float);
- __attribute__((device)) __attribute__((const)) float __ocml_fmin_f32(float, float);
- __attribute__((device)) __attribute__((const)) __attribute__((device)) float __ocml_fmod_f32(float,
- float);
- __attribute__((device)) float __ocml_frexp_f32(float,
- __attribute__((address_space(5))) int *);
- __attribute__((device)) __attribute__((const)) float __ocml_hypot_f32(float, float);
- __attribute__((device)) __attribute__((const)) int __ocml_ilogb_f32(float);
- __attribute__((device)) __attribute__((const)) int __ocml_isfinite_f32(float);
- __attribute__((device)) __attribute__((const)) int __ocml_isinf_f32(float);
- __attribute__((device)) __attribute__((const)) int __ocml_isnan_f32(float);
- __attribute__((device)) float __ocml_j0_f32(float);
- __attribute__((device)) float __ocml_j1_f32(float);
- __attribute__((device)) __attribute__((const)) float __ocml_ldexp_f32(float, int);
- __attribute__((device)) float __ocml_lgamma_f32(float);
- __attribute__((device)) __attribute__((pure)) float __ocml_log10_f32(float);
- __attribute__((device)) __attribute__((pure)) float __ocml_native_log10_f32(float);
- __attribute__((device)) __attribute__((pure)) float __ocml_log1p_f32(float);
- __attribute__((device)) __attribute__((pure)) float __ocml_log2_f32(float);
- __attribute__((device)) __attribute__((pure)) float __ocml_native_log2_f32(float);
- __attribute__((device)) __attribute__((const)) float __ocml_logb_f32(float);
- __attribute__((device)) __attribute__((pure)) float __ocml_log_f32(float);
- __attribute__((device)) __attribute__((pure)) float __ocml_native_log_f32(float);
- __attribute__((device)) float __ocml_modf_f32(float,
- __attribute__((address_space(5))) float *);
- __attribute__((device)) __attribute__((const)) float __ocml_nearbyint_f32(float);
- __attribute__((device)) __attribute__((const)) float __ocml_nextafter_f32(float, float);
- __attribute__((device)) __attribute__((const)) float __ocml_len3_f32(float, float, float);
- __attribute__((device)) __attribute__((const)) float __ocml_len4_f32(float, float, float,
- float);
- __attribute__((device)) __attribute__((pure)) float __ocml_ncdf_f32(float);
- __attribute__((device)) __attribute__((pure)) float __ocml_ncdfinv_f32(float);
- __attribute__((device)) __attribute__((pure)) float __ocml_pow_f32(float, float);
- __attribute__((device)) __attribute__((pure)) float __ocml_pown_f32(float, int);
- __attribute__((device)) __attribute__((pure)) float __ocml_rcbrt_f32(float);
- __attribute__((device)) __attribute__((const)) float __ocml_remainder_f32(float, float);
- __attribute__((device)) float __ocml_remquo_f32(float, float,
- __attribute__((address_space(5))) int *);
- __attribute__((device)) __attribute__((const)) float __ocml_rhypot_f32(float, float);
- __attribute__((device)) __attribute__((const)) float __ocml_rint_f32(float);
- __attribute__((device)) __attribute__((const)) float __ocml_rlen3_f32(float, float, float);
- __attribute__((device)) __attribute__((const)) float __ocml_rlen4_f32(float, float, float,
- float);
- __attribute__((device)) __attribute__((const)) float __ocml_round_f32(float);
- __attribute__((device)) __attribute__((pure)) float __ocml_rsqrt_f32(float);
- __attribute__((device)) __attribute__((const)) float __ocml_scalb_f32(float, float);
- __attribute__((device)) __attribute__((const)) float __ocml_scalbn_f32(float, int);
- __attribute__((device)) __attribute__((const)) int __ocml_signbit_f32(float);
- __attribute__((device)) float __ocml_sincos_f32(float,
- __attribute__((address_space(5))) float *);
- __attribute__((device)) float __ocml_sincospi_f32(float,
- __attribute__((address_space(5))) float *);
- __attribute__((device)) float __ocml_sin_f32(float);
- __attribute__((device)) float __ocml_native_sin_f32(float);
- __attribute__((device)) __attribute__((pure)) float __ocml_sinh_f32(float);
- __attribute__((device)) float __ocml_sinpi_f32(float);
- __attribute__((device)) __attribute__((const)) float __ocml_sqrt_f32(float);
- __attribute__((device)) __attribute__((const)) float __ocml_native_sqrt_f32(float);
- __attribute__((device)) float __ocml_tan_f32(float);
- __attribute__((device)) __attribute__((pure)) float __ocml_tanh_f32(float);
- __attribute__((device)) float __ocml_tgamma_f32(float);
- __attribute__((device)) __attribute__((const)) float __ocml_trunc_f32(float);
- __attribute__((device)) float __ocml_y0_f32(float);
- __attribute__((device)) float __ocml_y1_f32(float);
- __attribute__((device)) __attribute__((const)) float __ocml_add_rte_f32(float, float);
- __attribute__((device)) __attribute__((const)) float __ocml_add_rtn_f32(float, float);
- __attribute__((device)) __attribute__((const)) float __ocml_add_rtp_f32(float, float);
- __attribute__((device)) __attribute__((const)) float __ocml_add_rtz_f32(float, float);
- __attribute__((device)) __attribute__((const)) float __ocml_sub_rte_f32(float, float);
- __attribute__((device)) __attribute__((const)) float __ocml_sub_rtn_f32(float, float);
- __attribute__((device)) __attribute__((const)) float __ocml_sub_rtp_f32(float, float);
- __attribute__((device)) __attribute__((const)) float __ocml_sub_rtz_f32(float, float);
- __attribute__((device)) __attribute__((const)) float __ocml_mul_rte_f32(float, float);
- __attribute__((device)) __attribute__((const)) float __ocml_mul_rtn_f32(float, float);
- __attribute__((device)) __attribute__((const)) float __ocml_mul_rtp_f32(float, float);
- __attribute__((device)) __attribute__((const)) float __ocml_mul_rtz_f32(float, float);
- __attribute__((device)) __attribute__((const)) float __ocml_div_rte_f32(float, float);
- __attribute__((device)) __attribute__((const)) float __ocml_div_rtn_f32(float, float);
- __attribute__((device)) __attribute__((const)) float __ocml_div_rtp_f32(float, float);
- __attribute__((device)) __attribute__((const)) float __ocml_div_rtz_f32(float, float);
- __attribute__((device)) __attribute__((const)) float __ocml_sqrt_rte_f32(float);
- __attribute__((device)) __attribute__((const)) float __ocml_sqrt_rtn_f32(float);
- __attribute__((device)) __attribute__((const)) float __ocml_sqrt_rtp_f32(float);
- __attribute__((device)) __attribute__((const)) float __ocml_sqrt_rtz_f32(float);
- __attribute__((device)) __attribute__((const)) float __ocml_fma_rte_f32(float, float, float);
- __attribute__((device)) __attribute__((const)) float __ocml_fma_rtn_f32(float, float, float);
- __attribute__((device)) __attribute__((const)) float __ocml_fma_rtp_f32(float, float, float);
- __attribute__((device)) __attribute__((const)) float __ocml_fma_rtz_f32(float, float, float);
- __attribute__((device)) inline __attribute__((const)) float
- __llvm_amdgcn_cos_f32(float __x) {
- return __builtin_amdgcn_cosf(__x);
- }
- __attribute__((device)) inline __attribute__((const)) float
- __llvm_amdgcn_rcp_f32(float __x) {
- return __builtin_amdgcn_rcpf(__x);
- }
- __attribute__((device)) inline __attribute__((const)) float
- __llvm_amdgcn_rsq_f32(float __x) {
- return __builtin_amdgcn_rsqf(__x);
- }
- __attribute__((device)) inline __attribute__((const)) float
- __llvm_amdgcn_sin_f32(float __x) {
- return __builtin_amdgcn_sinf(__x);
- }
- __attribute__((device)) __attribute__((const)) double __ocml_acos_f64(double);
- __attribute__((device)) __attribute__((pure)) double __ocml_acosh_f64(double);
- __attribute__((device)) __attribute__((const)) double __ocml_asin_f64(double);
- __attribute__((device)) __attribute__((pure)) double __ocml_asinh_f64(double);
- __attribute__((device)) __attribute__((const)) double __ocml_atan2_f64(double, double);
- __attribute__((device)) __attribute__((const)) double __ocml_atan_f64(double);
- __attribute__((device)) __attribute__((pure)) double __ocml_atanh_f64(double);
- __attribute__((device)) __attribute__((pure)) double __ocml_cbrt_f64(double);
- __attribute__((device)) __attribute__((const)) double __ocml_ceil_f64(double);
- __attribute__((device)) __attribute__((const)) double __ocml_copysign_f64(double, double);
- __attribute__((device)) double __ocml_cos_f64(double);
- __attribute__((device)) __attribute__((pure)) double __ocml_cosh_f64(double);
- __attribute__((device)) double __ocml_cospi_f64(double);
- __attribute__((device)) double __ocml_i0_f64(double);
- __attribute__((device)) double __ocml_i1_f64(double);
- __attribute__((device)) __attribute__((pure)) double __ocml_erfc_f64(double);
- __attribute__((device)) __attribute__((pure)) double __ocml_erfcinv_f64(double);
- __attribute__((device)) __attribute__((pure)) double __ocml_erfcx_f64(double);
- __attribute__((device)) __attribute__((pure)) double __ocml_erf_f64(double);
- __attribute__((device)) __attribute__((pure)) double __ocml_erfinv_f64(double);
- __attribute__((device)) __attribute__((pure)) double __ocml_exp10_f64(double);
- __attribute__((device)) __attribute__((pure)) double __ocml_exp2_f64(double);
- __attribute__((device)) __attribute__((pure)) double __ocml_exp_f64(double);
- __attribute__((device)) __attribute__((pure)) double __ocml_expm1_f64(double);
- __attribute__((device)) __attribute__((const)) double __ocml_fabs_f64(double);
- __attribute__((device)) __attribute__((const)) double __ocml_fdim_f64(double, double);
- __attribute__((device)) __attribute__((const)) double __ocml_floor_f64(double);
- __attribute__((device)) __attribute__((const)) double __ocml_fma_f64(double, double, double);
- __attribute__((device)) __attribute__((const)) double __ocml_fmax_f64(double, double);
- __attribute__((device)) __attribute__((const)) double __ocml_fmin_f64(double, double);
- __attribute__((device)) __attribute__((const)) double __ocml_fmod_f64(double, double);
- __attribute__((device)) double __ocml_frexp_f64(double,
- __attribute__((address_space(5))) int *);
- __attribute__((device)) __attribute__((const)) double __ocml_hypot_f64(double, double);
- __attribute__((device)) __attribute__((const)) int __ocml_ilogb_f64(double);
- __attribute__((device)) __attribute__((const)) int __ocml_isfinite_f64(double);
- __attribute__((device)) __attribute__((const)) int __ocml_isinf_f64(double);
- __attribute__((device)) __attribute__((const)) int __ocml_isnan_f64(double);
- __attribute__((device)) double __ocml_j0_f64(double);
- __attribute__((device)) double __ocml_j1_f64(double);
- __attribute__((device)) __attribute__((const)) double __ocml_ldexp_f64(double, int);
- __attribute__((device)) double __ocml_lgamma_f64(double);
- __attribute__((device)) __attribute__((pure)) double __ocml_log10_f64(double);
- __attribute__((device)) __attribute__((pure)) double __ocml_log1p_f64(double);
- __attribute__((device)) __attribute__((pure)) double __ocml_log2_f64(double);
- __attribute__((device)) __attribute__((const)) double __ocml_logb_f64(double);
- __attribute__((device)) __attribute__((pure)) double __ocml_log_f64(double);
- __attribute__((device)) double __ocml_modf_f64(double,
- __attribute__((address_space(5))) double *);
- __attribute__((device)) __attribute__((const)) double __ocml_nearbyint_f64(double);
- __attribute__((device)) __attribute__((const)) double __ocml_nextafter_f64(double, double);
- __attribute__((device)) __attribute__((const)) double __ocml_len3_f64(double, double,
- double);
- __attribute__((device)) __attribute__((const)) double __ocml_len4_f64(double, double, double,
- double);
- __attribute__((device)) __attribute__((pure)) double __ocml_ncdf_f64(double);
- __attribute__((device)) __attribute__((pure)) double __ocml_ncdfinv_f64(double);
- __attribute__((device)) __attribute__((pure)) double __ocml_pow_f64(double, double);
- __attribute__((device)) __attribute__((pure)) double __ocml_pown_f64(double, int);
- __attribute__((device)) __attribute__((pure)) double __ocml_rcbrt_f64(double);
- __attribute__((device)) __attribute__((const)) double __ocml_remainder_f64(double, double);
- __attribute__((device)) double __ocml_remquo_f64(double, double,
- __attribute__((address_space(5))) int *);
- __attribute__((device)) __attribute__((const)) double __ocml_rhypot_f64(double, double);
- __attribute__((device)) __attribute__((const)) double __ocml_rint_f64(double);
- __attribute__((device)) __attribute__((const)) double __ocml_rlen3_f64(double, double,
- double);
- __attribute__((device)) __attribute__((const)) double __ocml_rlen4_f64(double, double,
- double, double);
- __attribute__((device)) __attribute__((const)) double __ocml_round_f64(double);
- __attribute__((device)) __attribute__((pure)) double __ocml_rsqrt_f64(double);
- __attribute__((device)) __attribute__((const)) double __ocml_scalb_f64(double, double);
- __attribute__((device)) __attribute__((const)) double __ocml_scalbn_f64(double, int);
- __attribute__((device)) __attribute__((const)) int __ocml_signbit_f64(double);
- __attribute__((device)) double __ocml_sincos_f64(double,
- __attribute__((address_space(5))) double *);
- __attribute__((device)) double
- __ocml_sincospi_f64(double, __attribute__((address_space(5))) double *);
- __attribute__((device)) double __ocml_sin_f64(double);
- __attribute__((device)) __attribute__((pure)) double __ocml_sinh_f64(double);
- __attribute__((device)) double __ocml_sinpi_f64(double);
- __attribute__((device)) __attribute__((const)) double __ocml_sqrt_f64(double);
- __attribute__((device)) double __ocml_tan_f64(double);
- __attribute__((device)) __attribute__((pure)) double __ocml_tanh_f64(double);
- __attribute__((device)) double __ocml_tgamma_f64(double);
- __attribute__((device)) __attribute__((const)) double __ocml_trunc_f64(double);
- __attribute__((device)) double __ocml_y0_f64(double);
- __attribute__((device)) double __ocml_y1_f64(double);
- __attribute__((device)) __attribute__((const)) double __ocml_add_rte_f64(double, double);
- __attribute__((device)) __attribute__((const)) double __ocml_add_rtn_f64(double, double);
- __attribute__((device)) __attribute__((const)) double __ocml_add_rtp_f64(double, double);
- __attribute__((device)) __attribute__((const)) double __ocml_add_rtz_f64(double, double);
- __attribute__((device)) __attribute__((const)) double __ocml_sub_rte_f64(double, double);
- __attribute__((device)) __attribute__((const)) double __ocml_sub_rtn_f64(double, double);
- __attribute__((device)) __attribute__((const)) double __ocml_sub_rtp_f64(double, double);
- __attribute__((device)) __attribute__((const)) double __ocml_sub_rtz_f64(double, double);
- __attribute__((device)) __attribute__((const)) double __ocml_mul_rte_f64(double, double);
- __attribute__((device)) __attribute__((const)) double __ocml_mul_rtn_f64(double, double);
- __attribute__((device)) __attribute__((const)) double __ocml_mul_rtp_f64(double, double);
- __attribute__((device)) __attribute__((const)) double __ocml_mul_rtz_f64(double, double);
- __attribute__((device)) __attribute__((const)) double __ocml_div_rte_f64(double, double);
- __attribute__((device)) __attribute__((const)) double __ocml_div_rtn_f64(double, double);
- __attribute__((device)) __attribute__((const)) double __ocml_div_rtp_f64(double, double);
- __attribute__((device)) __attribute__((const)) double __ocml_div_rtz_f64(double, double);
- __attribute__((device)) __attribute__((const)) double __ocml_sqrt_rte_f64(double);
- __attribute__((device)) __attribute__((const)) double __ocml_sqrt_rtn_f64(double);
- __attribute__((device)) __attribute__((const)) double __ocml_sqrt_rtp_f64(double);
- __attribute__((device)) __attribute__((const)) double __ocml_sqrt_rtz_f64(double);
- __attribute__((device)) __attribute__((const)) double __ocml_fma_rte_f64(double, double,
- double);
- __attribute__((device)) __attribute__((const)) double __ocml_fma_rtn_f64(double, double,
- double);
- __attribute__((device)) __attribute__((const)) double __ocml_fma_rtp_f64(double, double,
- double);
- __attribute__((device)) __attribute__((const)) double __ocml_fma_rtz_f64(double, double,
- double);
- __attribute__((device)) inline __attribute__((const)) double
- __llvm_amdgcn_rcp_f64(double __x) {
- return __builtin_amdgcn_rcp(__x);
- }
- __attribute__((device)) inline __attribute__((const)) double
- __llvm_amdgcn_rsq_f64(double __x) {
- return __builtin_amdgcn_rsq(__x);
- }
- __attribute__((device)) __attribute__((const)) _Float16 __ocml_ceil_f16(_Float16);
- __attribute__((device)) _Float16 __ocml_cos_f16(_Float16);
- __attribute__((device)) __attribute__((const)) _Float16 __ocml_cvtrtn_f16_f32(float);
- __attribute__((device)) __attribute__((const)) _Float16 __ocml_cvtrtp_f16_f32(float);
- __attribute__((device)) __attribute__((const)) _Float16 __ocml_cvtrtz_f16_f32(float);
- __attribute__((device)) __attribute__((pure)) _Float16 __ocml_exp_f16(_Float16);
- __attribute__((device)) __attribute__((pure)) _Float16 __ocml_exp10_f16(_Float16);
- __attribute__((device)) __attribute__((pure)) _Float16 __ocml_exp2_f16(_Float16);
- __attribute__((device)) __attribute__((const)) _Float16 __ocml_floor_f16(_Float16);
- __attribute__((device)) __attribute__((const)) _Float16 __ocml_fma_f16(_Float16, _Float16,
- _Float16);
- __attribute__((device)) __attribute__((const)) _Float16 __ocml_fmax_f16(_Float16, _Float16);
- __attribute__((device)) __attribute__((const)) _Float16 __ocml_fmin_f16(_Float16, _Float16);
- __attribute__((device)) __attribute__((const)) _Float16 __ocml_fabs_f16(_Float16);
- __attribute__((device)) __attribute__((const)) int __ocml_isinf_f16(_Float16);
- __attribute__((device)) __attribute__((const)) int __ocml_isnan_f16(_Float16);
- __attribute__((device)) __attribute__((pure)) _Float16 __ocml_log_f16(_Float16);
- __attribute__((device)) __attribute__((pure)) _Float16 __ocml_log10_f16(_Float16);
- __attribute__((device)) __attribute__((pure)) _Float16 __ocml_log2_f16(_Float16);
- __attribute__((device)) __attribute__((const)) _Float16 __llvm_amdgcn_rcp_f16(_Float16);
- __attribute__((device)) __attribute__((const)) _Float16 __ocml_rint_f16(_Float16);
- __attribute__((device)) __attribute__((const)) _Float16 __ocml_rsqrt_f16(_Float16);
- __attribute__((device)) _Float16 __ocml_sin_f16(_Float16);
- __attribute__((device)) __attribute__((const)) _Float16 __ocml_sqrt_f16(_Float16);
- __attribute__((device)) __attribute__((const)) _Float16 __ocml_trunc_f16(_Float16);
- __attribute__((device)) __attribute__((pure)) _Float16 __ocml_pown_f16(_Float16, int);
- typedef _Float16 __2f16 __attribute__((ext_vector_type(2)));
- typedef short __2i16 __attribute__((ext_vector_type(2)));
- __attribute__((device)) __attribute__((const)) float __ockl_fdot2(__2f16 a, __2f16 b,
- float c, bool s);
- __attribute__((device)) __attribute__((const)) __2f16 __ocml_ceil_2f16(__2f16);
- __attribute__((device)) __attribute__((const)) __2f16 __ocml_fabs_2f16(__2f16);
- __attribute__((device)) __2f16 __ocml_cos_2f16(__2f16);
- __attribute__((device)) __attribute__((pure)) __2f16 __ocml_exp_2f16(__2f16);
- __attribute__((device)) __attribute__((pure)) __2f16 __ocml_exp10_2f16(__2f16);
- __attribute__((device)) __attribute__((pure)) __2f16 __ocml_exp2_2f16(__2f16);
- __attribute__((device)) __attribute__((const)) __2f16 __ocml_floor_2f16(__2f16);
- __attribute__((device)) __attribute__((const))
- __2f16 __ocml_fma_2f16(__2f16, __2f16, __2f16);
- __attribute__((device)) __attribute__((const)) __2i16 __ocml_isinf_2f16(__2f16);
- __attribute__((device)) __attribute__((const)) __2i16 __ocml_isnan_2f16(__2f16);
- __attribute__((device)) __attribute__((pure)) __2f16 __ocml_log_2f16(__2f16);
- __attribute__((device)) __attribute__((pure)) __2f16 __ocml_log10_2f16(__2f16);
- __attribute__((device)) __attribute__((pure)) __2f16 __ocml_log2_2f16(__2f16);
- __attribute__((device)) inline __2f16
- __llvm_amdgcn_rcp_2f16(__2f16 __x)
- {
- return (__2f16)(__llvm_amdgcn_rcp_f16(__x.x), __llvm_amdgcn_rcp_f16(__x.y));
- }
- __attribute__((device)) __attribute__((const)) __2f16 __ocml_rint_2f16(__2f16);
- __attribute__((device)) __attribute__((const)) __2f16 __ocml_rsqrt_2f16(__2f16);
- __attribute__((device)) __2f16 __ocml_sin_2f16(__2f16);
- __attribute__((device)) __attribute__((const)) __2f16 __ocml_sqrt_2f16(__2f16);
- __attribute__((device)) __attribute__((const)) __2f16 __ocml_trunc_2f16(__2f16);
- __attribute__((device)) __attribute__((const)) __2f16 __ocml_pown_2f16(__2f16, __2i16);
- }
- # 128 "/opt/rocm-6.0.0/lib/llvm/lib/clang/17.0.0/include/__clang_hip_runtime_wrapper.h" 2 3
- # 1 "/opt/rocm-6.0.0/lib/llvm/lib/clang/17.0.0/include/__clang_hip_math.h" 1 3
- # 94 "/opt/rocm-6.0.0/lib/llvm/lib/clang/17.0.0/include/__clang_hip_math.h" 3
- static __attribute__((device)) inline __attribute__((always_inline))
- long unsigned int __make_mantissa_base8(const char *__tagp __attribute__((nonnull))) {
- long unsigned int __r = 0;
- while (*__tagp != '\0') {
- char __tmp = *__tagp;
- if (__tmp >= '0' && __tmp <= '7')
- __r = (__r * 8u) + __tmp - '0';
- else
- return 0;
- ++__tagp;
- }
- return __r;
- }
- static __attribute__((device)) inline __attribute__((always_inline))
- long unsigned int __make_mantissa_base10(const char *__tagp __attribute__((nonnull))) {
- long unsigned int __r = 0;
- while (*__tagp != '\0') {
- char __tmp = *__tagp;
- if (__tmp >= '0' && __tmp <= '9')
- __r = (__r * 10u) + __tmp - '0';
- else
- return 0;
- ++__tagp;
- }
- return __r;
- }
- static __attribute__((device)) inline __attribute__((always_inline))
- long unsigned int __make_mantissa_base16(const char *__tagp __attribute__((nonnull))) {
- long unsigned int __r = 0;
- while (*__tagp != '\0') {
- char __tmp = *__tagp;
- if (__tmp >= '0' && __tmp <= '9')
- __r = (__r * 16u) + __tmp - '0';
- else if (__tmp >= 'a' && __tmp <= 'f')
- __r = (__r * 16u) + __tmp - 'a' + 10;
- else if (__tmp >= 'A' && __tmp <= 'F')
- __r = (__r * 16u) + __tmp - 'A' + 10;
- else
- return 0;
- ++__tagp;
- }
- return __r;
- }
- static __attribute__((device)) inline __attribute__((always_inline))
- long unsigned int __make_mantissa(const char *__tagp __attribute__((nonnull))) {
- if (*__tagp == '0') {
- ++__tagp;
- if (*__tagp == 'x' || *__tagp == 'X')
- return __make_mantissa_base16(__tagp);
- else
- return __make_mantissa_base8(__tagp);
- }
- return __make_mantissa_base10(__tagp);
- }
- static __attribute__((device)) inline __attribute__((always_inline))
- int abs(int __x) {
- int __sgn = __x >> (sizeof(int) * 8 - 1);
- return (__x ^ __sgn) - __sgn;
- }
- static __attribute__((device)) inline __attribute__((always_inline))
- long labs(long __x) {
- long __sgn = __x >> (sizeof(long) * 8 - 1);
- return (__x ^ __sgn) - __sgn;
- }
- static __attribute__((device)) inline __attribute__((always_inline))
- long long llabs(long long __x) {
- long long __sgn = __x >> (sizeof(long long) * 8 - 1);
- return (__x ^ __sgn) - __sgn;
- }
- static __attribute__((device)) inline __attribute__((always_inline))
- float acosf(float __x) { return __ocml_acos_f32(__x); }
- static __attribute__((device)) inline __attribute__((always_inline))
- float acoshf(float __x) { return __ocml_acosh_f32(__x); }
- static __attribute__((device)) inline __attribute__((always_inline))
- float asinf(float __x) { return __ocml_asin_f32(__x); }
- static __attribute__((device)) inline __attribute__((always_inline))
- float asinhf(float __x) { return __ocml_asinh_f32(__x); }
- static __attribute__((device)) inline __attribute__((always_inline))
- float atan2f(float __x, float __y) { return __ocml_atan2_f32(__x, __y); }
- static __attribute__((device)) inline __attribute__((always_inline))
- float atanf(float __x) { return __ocml_atan_f32(__x); }
- static __attribute__((device)) inline __attribute__((always_inline))
- float atanhf(float __x) { return __ocml_atanh_f32(__x); }
- static __attribute__((device)) inline __attribute__((always_inline))
- float cbrtf(float __x) { return __ocml_cbrt_f32(__x); }
- static __attribute__((device)) inline __attribute__((always_inline))
- float ceilf(float __x) { return __ocml_ceil_f32(__x); }
- static __attribute__((device)) inline __attribute__((always_inline))
- float copysignf(float __x, float __y) { return __ocml_copysign_f32(__x, __y); }
- static __attribute__((device)) inline __attribute__((always_inline))
- float cosf(float __x) { return __ocml_cos_f32(__x); }
- static __attribute__((device)) inline __attribute__((always_inline))
- float coshf(float __x) { return __ocml_cosh_f32(__x); }
- static __attribute__((device)) inline __attribute__((always_inline))
- float cospif(float __x) { return __ocml_cospi_f32(__x); }
- static __attribute__((device)) inline __attribute__((always_inline))
- float cyl_bessel_i0f(float __x) { return __ocml_i0_f32(__x); }
- static __attribute__((device)) inline __attribute__((always_inline))
- float cyl_bessel_i1f(float __x) { return __ocml_i1_f32(__x); }
- static __attribute__((device)) inline __attribute__((always_inline))
- float erfcf(float __x) { return __ocml_erfc_f32(__x); }
- static __attribute__((device)) inline __attribute__((always_inline))
- float erfcinvf(float __x) { return __ocml_erfcinv_f32(__x); }
- static __attribute__((device)) inline __attribute__((always_inline))
- float erfcxf(float __x) { return __ocml_erfcx_f32(__x); }
- static __attribute__((device)) inline __attribute__((always_inline))
- float erff(float __x) { return __ocml_erf_f32(__x); }
- static __attribute__((device)) inline __attribute__((always_inline))
- float erfinvf(float __x) { return __ocml_erfinv_f32(__x); }
- static __attribute__((device)) inline __attribute__((always_inline))
- float exp10f(float __x) { return __ocml_exp10_f32(__x); }
- static __attribute__((device)) inline __attribute__((always_inline))
- float exp2f(float __x) { return __ocml_exp2_f32(__x); }
- static __attribute__((device)) inline __attribute__((always_inline))
- float expf(float __x) { return __ocml_exp_f32(__x); }
- static __attribute__((device)) inline __attribute__((always_inline))
- float expm1f(float __x) { return __ocml_expm1_f32(__x); }
- static __attribute__((device)) inline __attribute__((always_inline))
- float fabsf(float __x) { return __builtin_fabsf(__x); }
- static __attribute__((device)) inline __attribute__((always_inline))
- float fdimf(float __x, float __y) { return __ocml_fdim_f32(__x, __y); }
- static __attribute__((device)) inline __attribute__((always_inline))
- float fdividef(float __x, float __y) { return __x / __y; }
- static __attribute__((device)) inline __attribute__((always_inline))
- float floorf(float __x) { return __ocml_floor_f32(__x); }
- static __attribute__((device)) inline __attribute__((always_inline))
- float fmaf(float __x, float __y, float __z) {
- return __ocml_fma_f32(__x, __y, __z);
- }
- static __attribute__((device)) inline __attribute__((always_inline))
- float fmaxf(float __x, float __y) { return __ocml_fmax_f32(__x, __y); }
- static __attribute__((device)) inline __attribute__((always_inline))
- float fminf(float __x, float __y) { return __ocml_fmin_f32(__x, __y); }
- static __attribute__((device)) inline __attribute__((always_inline))
- float fmodf(float __x, float __y) { return __ocml_fmod_f32(__x, __y); }
- static __attribute__((device)) inline __attribute__((always_inline))
- float frexpf(float __x, int *__nptr) {
- int __tmp;
- float __r =
- __ocml_frexp_f32(__x, (__attribute__((address_space(5))) int *)&__tmp);
- *__nptr = __tmp;
- return __r;
- }
- static __attribute__((device)) inline __attribute__((always_inline))
- float hypotf(float __x, float __y) { return __ocml_hypot_f32(__x, __y); }
- static __attribute__((device)) inline __attribute__((always_inline))
- int ilogbf(float __x) { return __ocml_ilogb_f32(__x); }
- static __attribute__((device)) inline __attribute__((always_inline))
- bool __finitef(float __x) { return __ocml_isfinite_f32(__x); }
- static __attribute__((device)) inline __attribute__((always_inline))
- bool __isinff(float __x) { return __ocml_isinf_f32(__x); }
- static __attribute__((device)) inline __attribute__((always_inline))
- bool __isnanf(float __x) { return __ocml_isnan_f32(__x); }
- static __attribute__((device)) inline __attribute__((always_inline))
- float j0f(float __x) { return __ocml_j0_f32(__x); }
- static __attribute__((device)) inline __attribute__((always_inline))
- float j1f(float __x) { return __ocml_j1_f32(__x); }
- static __attribute__((device)) inline __attribute__((always_inline))
- float jnf(int __n, float __x) {
- if (__n == 0)
- return j0f(__x);
- if (__n == 1)
- return j1f(__x);
- float __x0 = j0f(__x);
- float __x1 = j1f(__x);
- for (int __i = 1; __i < __n; ++__i) {
- float __x2 = (2 * __i) / __x * __x1 - __x0;
- __x0 = __x1;
- __x1 = __x2;
- }
- return __x1;
- }
- static __attribute__((device)) inline __attribute__((always_inline))
- float ldexpf(float __x, int __e) { return __ocml_ldexp_f32(__x, __e); }
- static __attribute__((device)) inline __attribute__((always_inline))
- float lgammaf(float __x) { return __ocml_lgamma_f32(__x); }
- static __attribute__((device)) inline __attribute__((always_inline))
- long long int llrintf(float __x) { return __ocml_rint_f32(__x); }
- static __attribute__((device)) inline __attribute__((always_inline))
- long long int llroundf(float __x) { return __ocml_round_f32(__x); }
- static __attribute__((device)) inline __attribute__((always_inline))
- float log10f(float __x) { return __ocml_log10_f32(__x); }
- static __attribute__((device)) inline __attribute__((always_inline))
- float log1pf(float __x) { return __ocml_log1p_f32(__x); }
- static __attribute__((device)) inline __attribute__((always_inline))
- float log2f(float __x) { return __ocml_log2_f32(__x); }
- static __attribute__((device)) inline __attribute__((always_inline))
- float log2fi(int __x) { return __ocml_log2_f32((float) __x); }
- static __attribute__((device)) inline __attribute__((always_inline))
- float logbf(float __x) { return __ocml_logb_f32(__x); }
- static __attribute__((device)) inline __attribute__((always_inline))
- float logf(float __x) { return __ocml_log_f32(__x); }
- static __attribute__((device)) inline __attribute__((always_inline))
- long int lrintf(float __x) { return __ocml_rint_f32(__x); }
- static __attribute__((device)) inline __attribute__((always_inline))
- long int lroundf(float __x) { return __ocml_round_f32(__x); }
- static __attribute__((device)) inline __attribute__((always_inline))
- float modff(float __x, float *__iptr) {
- float __tmp;
- float __r =
- __ocml_modf_f32(__x, (__attribute__((address_space(5))) float *)&__tmp);
- *__iptr = __tmp;
- return __r;
- }
- static __attribute__((device)) inline __attribute__((always_inline))
- float nanf(const char *__tagp __attribute__((nonnull))) {
- union {
- float val;
- struct ieee_float {
- unsigned int mantissa : 22;
- unsigned int quiet : 1;
- unsigned int exponent : 8;
- unsigned int sign : 1;
- } bits;
- } __tmp;
- static_assert((sizeof(__tmp.val)) == (sizeof(__tmp.bits)), "");
- __tmp.bits.sign = 0u;
- __tmp.bits.exponent = ~0u;
- __tmp.bits.quiet = 1u;
- __tmp.bits.mantissa = __make_mantissa(__tagp);
- return __tmp.val;
- }
- static __attribute__((device)) inline __attribute__((always_inline))
- float nearbyintf(float __x) { return __ocml_nearbyint_f32(__x); }
- static __attribute__((device)) inline __attribute__((always_inline))
- float nextafterf(float __x, float __y) {
- return __ocml_nextafter_f32(__x, __y);
- }
- static __attribute__((device)) inline __attribute__((always_inline))
- float norm3df(float __x, float __y, float __z) {
- return __ocml_len3_f32(__x, __y, __z);
- }
- static __attribute__((device)) inline __attribute__((always_inline))
- float norm4df(float __x, float __y, float __z, float __w) {
- return __ocml_len4_f32(__x, __y, __z, __w);
- }
- static __attribute__((device)) inline __attribute__((always_inline))
- float normcdff(float __x) { return __ocml_ncdf_f32(__x); }
- static __attribute__((device)) inline __attribute__((always_inline))
- float normcdfinvf(float __x) { return __ocml_ncdfinv_f32(__x); }
- static __attribute__((device)) inline __attribute__((always_inline))
- float normf(int __dim,
- const float *__a) {
- float __r = 0;
- while (__dim--) {
- __r += __a[0] * __a[0];
- ++__a;
- }
- return __ocml_sqrt_f32(__r);
- }
- static __attribute__((device)) inline __attribute__((always_inline))
- float powf(float __x, float __y) { return __ocml_pow_f32(__x, __y); }
- static __attribute__((device)) inline __attribute__((always_inline))
- float powif(float __x, int __y) { return __ocml_pown_f32(__x, __y); }
- static __attribute__((device)) inline __attribute__((always_inline))
- int powii(int __base, int __exp) {
- if (__exp < 0 )
- return -1;
- int __result = 1;
- for (;;) {
- if (__exp & 1)
- __result *= __base;
- __exp >>= 1;
- if (!__exp)
- break;
- __base *= __base;
- }
- return __result;
- }
- static __attribute__((device)) inline __attribute__((always_inline))
- float rcbrtf(float __x) { return __ocml_rcbrt_f32(__x); }
- static __attribute__((device)) inline __attribute__((always_inline))
- float remainderf(float __x, float __y) {
- return __ocml_remainder_f32(__x, __y);
- }
- static __attribute__((device)) inline __attribute__((always_inline))
- float remquof(float __x, float __y, int *__quo) {
- int __tmp;
- float __r = __ocml_remquo_f32(
- __x, __y, (__attribute__((address_space(5))) int *)&__tmp);
- *__quo = __tmp;
- return __r;
- }
- static __attribute__((device)) inline __attribute__((always_inline))
- float rhypotf(float __x, float __y) { return __ocml_rhypot_f32(__x, __y); }
- static __attribute__((device)) inline __attribute__((always_inline))
- float rintf(float __x) { return __ocml_rint_f32(__x); }
- static __attribute__((device)) inline __attribute__((always_inline))
- float rnorm3df(float __x, float __y, float __z) {
- return __ocml_rlen3_f32(__x, __y, __z);
- }
- static __attribute__((device)) inline __attribute__((always_inline))
- float rnorm4df(float __x, float __y, float __z, float __w) {
- return __ocml_rlen4_f32(__x, __y, __z, __w);
- }
- static __attribute__((device)) inline __attribute__((always_inline))
- float rnormf(int __dim,
- const float *__a) {
- float __r = 0;
- while (__dim--) {
- __r += __a[0] * __a[0];
- ++__a;
- }
- return __ocml_rsqrt_f32(__r);
- }
- static __attribute__((device)) inline __attribute__((always_inline))
- float roundf(float __x) { return __ocml_round_f32(__x); }
- static __attribute__((device)) inline __attribute__((always_inline))
- float rsqrtf(float __x) { return __ocml_rsqrt_f32(__x); }
- static __attribute__((device)) inline __attribute__((always_inline))
- float scalblnf(float __x, long int __n) {
- return (__n < 9223372036854775807L) ? __ocml_scalbn_f32(__x, __n)
- : __ocml_scalb_f32(__x, __n);
- }
- static __attribute__((device)) inline __attribute__((always_inline))
- float scalbnf(float __x, int __n) { return __ocml_scalbn_f32(__x, __n); }
- static __attribute__((device)) inline __attribute__((always_inline))
- bool __signbitf(float __x) { return __ocml_signbit_f32(__x); }
- static __attribute__((device)) inline __attribute__((always_inline))
- void sincosf(float __x, float *__sinptr, float *__cosptr) {
- float __tmp;
- *__sinptr =
- __ocml_sincos_f32(__x, (__attribute__((address_space(5))) float *)&__tmp);
- *__cosptr = __tmp;
- }
- static __attribute__((device)) inline __attribute__((always_inline))
- void sincospif(float __x, float *__sinptr, float *__cosptr) {
- float __tmp;
- *__sinptr = __ocml_sincospi_f32(
- __x, (__attribute__((address_space(5))) float *)&__tmp);
- *__cosptr = __tmp;
- }
- static __attribute__((device)) inline __attribute__((always_inline))
- float sinf(float __x) { return __ocml_sin_f32(__x); }
- static __attribute__((device)) inline __attribute__((always_inline))
- float sinhf(float __x) { return __ocml_sinh_f32(__x); }
- static __attribute__((device)) inline __attribute__((always_inline))
- float sinpif(float __x) { return __ocml_sinpi_f32(__x); }
- static __attribute__((device)) inline __attribute__((always_inline))
- float sqrtf(float __x) { return __ocml_sqrt_f32(__x); }
- static __attribute__((device)) inline __attribute__((always_inline))
- float tanf(float __x) { return __ocml_tan_f32(__x); }
- static __attribute__((device)) inline __attribute__((always_inline))
- float tanhf(float __x) { return __ocml_tanh_f32(__x); }
- static __attribute__((device)) inline __attribute__((always_inline))
- float tgammaf(float __x) { return __ocml_tgamma_f32(__x); }
- static __attribute__((device)) inline __attribute__((always_inline))
- float truncf(float __x) { return __ocml_trunc_f32(__x); }
- static __attribute__((device)) inline __attribute__((always_inline))
- float y0f(float __x) { return __ocml_y0_f32(__x); }
- static __attribute__((device)) inline __attribute__((always_inline))
- float y1f(float __x) { return __ocml_y1_f32(__x); }
- static __attribute__((device)) inline __attribute__((always_inline))
- float ynf(int __n, float __x) {
- if (__n == 0)
- return y0f(__x);
- if (__n == 1)
- return y1f(__x);
- float __x0 = y0f(__x);
- float __x1 = y1f(__x);
- for (int __i = 1; __i < __n; ++__i) {
- float __x2 = (2 * __i) / __x * __x1 - __x0;
- __x0 = __x1;
- __x1 = __x2;
- }
- return __x1;
- }
- static __attribute__((device)) inline __attribute__((always_inline))
- float __cosf(float __x) { return __ocml_native_cos_f32(__x); }
- static __attribute__((device)) inline __attribute__((always_inline))
- float __exp10f(float __x) { return __ocml_native_exp10_f32(__x); }
- static __attribute__((device)) inline __attribute__((always_inline))
- float __expf(float __x) { return __ocml_native_exp_f32(__x); }
- # 627 "/opt/rocm-6.0.0/lib/llvm/lib/clang/17.0.0/include/__clang_hip_math.h" 3
- static __attribute__((device)) inline __attribute__((always_inline))
- float __fadd_rn(float __x, float __y) { return __x + __y; }
- # 641 "/opt/rocm-6.0.0/lib/llvm/lib/clang/17.0.0/include/__clang_hip_math.h" 3
- static __attribute__((device)) inline __attribute__((always_inline))
- float __fdiv_rn(float __x, float __y) { return __x / __y; }
- static __attribute__((device)) inline __attribute__((always_inline))
- float __fdividef(float __x, float __y) { return __x / __y; }
- # 666 "/opt/rocm-6.0.0/lib/llvm/lib/clang/17.0.0/include/__clang_hip_math.h" 3
- static __attribute__((device)) inline __attribute__((always_inline))
- float __fmaf_rn(float __x, float __y, float __z) {
- return __ocml_fma_f32(__x, __y, __z);
- }
- # 682 "/opt/rocm-6.0.0/lib/llvm/lib/clang/17.0.0/include/__clang_hip_math.h" 3
- static __attribute__((device)) inline __attribute__((always_inline))
- float __fmul_rn(float __x, float __y) { return __x * __y; }
- # 696 "/opt/rocm-6.0.0/lib/llvm/lib/clang/17.0.0/include/__clang_hip_math.h" 3
- static __attribute__((device)) inline __attribute__((always_inline))
- float __frcp_rn(float __x) { return 1.0f / __x; }
- static __attribute__((device)) inline __attribute__((always_inline))
- float __frsqrt_rn(float __x) { return __llvm_amdgcn_rsq_f32(__x); }
- # 713 "/opt/rocm-6.0.0/lib/llvm/lib/clang/17.0.0/include/__clang_hip_math.h" 3
- static __attribute__((device)) inline __attribute__((always_inline))
- float __fsqrt_rn(float __x) { return __ocml_native_sqrt_f32(__x); }
- # 727 "/opt/rocm-6.0.0/lib/llvm/lib/clang/17.0.0/include/__clang_hip_math.h" 3
- static __attribute__((device)) inline __attribute__((always_inline))
- float __fsub_rn(float __x, float __y) { return __x - __y; }
- static __attribute__((device)) inline __attribute__((always_inline))
- float __log10f(float __x) { return __ocml_native_log10_f32(__x); }
- static __attribute__((device)) inline __attribute__((always_inline))
- float __log2f(float __x) { return __ocml_native_log2_f32(__x); }
- static __attribute__((device)) inline __attribute__((always_inline))
- float __logf(float __x) { return __ocml_native_log_f32(__x); }
- static __attribute__((device)) inline __attribute__((always_inline))
- float __powf(float __x, float __y) { return __ocml_pow_f32(__x, __y); }
- static __attribute__((device)) inline __attribute__((always_inline))
- float __saturatef(float __x) { return (__x < 0) ? 0 : ((__x > 1) ? 1 : __x); }
- static __attribute__((device)) inline __attribute__((always_inline))
- void __sincosf(float __x, float *__sinptr, float *__cosptr) {
- *__sinptr = __ocml_native_sin_f32(__x);
- *__cosptr = __ocml_native_cos_f32(__x);
- }
- static __attribute__((device)) inline __attribute__((always_inline))
- float __sinf(float __x) { return __ocml_native_sin_f32(__x); }
- static __attribute__((device)) inline __attribute__((always_inline))
- float __tanf(float __x) { return __ocml_tan_f32(__x); }
- static __attribute__((device)) inline __attribute__((always_inline))
- double acos(double __x) { return __ocml_acos_f64(__x); }
- static __attribute__((device)) inline __attribute__((always_inline))
- double acosh(double __x) { return __ocml_acosh_f64(__x); }
- static __attribute__((device)) inline __attribute__((always_inline))
- double asin(double __x) { return __ocml_asin_f64(__x); }
- static __attribute__((device)) inline __attribute__((always_inline))
- double asinh(double __x) { return __ocml_asinh_f64(__x); }
- static __attribute__((device)) inline __attribute__((always_inline))
- double atan(double __x) { return __ocml_atan_f64(__x); }
- static __attribute__((device)) inline __attribute__((always_inline))
- double atan2(double __x, double __y) { return __ocml_atan2_f64(__x, __y); }
- static __attribute__((device)) inline __attribute__((always_inline))
- double atanh(double __x) { return __ocml_atanh_f64(__x); }
- static __attribute__((device)) inline __attribute__((always_inline))
- double cbrt(double __x) { return __ocml_cbrt_f64(__x); }
- static __attribute__((device)) inline __attribute__((always_inline))
- double ceil(double __x) { return __ocml_ceil_f64(__x); }
- static __attribute__((device)) inline __attribute__((always_inline))
- double copysign(double __x, double __y) {
- return __ocml_copysign_f64(__x, __y);
- }
- static __attribute__((device)) inline __attribute__((always_inline))
- double cos(double __x) { return __ocml_cos_f64(__x); }
- static __attribute__((device)) inline __attribute__((always_inline))
- double cosh(double __x) { return __ocml_cosh_f64(__x); }
- static __attribute__((device)) inline __attribute__((always_inline))
- double cospi(double __x) { return __ocml_cospi_f64(__x); }
- static __attribute__((device)) inline __attribute__((always_inline))
- double cyl_bessel_i0(double __x) { return __ocml_i0_f64(__x); }
- static __attribute__((device)) inline __attribute__((always_inline))
- double cyl_bessel_i1(double __x) { return __ocml_i1_f64(__x); }
- static __attribute__((device)) inline __attribute__((always_inline))
- double erf(double __x) { return __ocml_erf_f64(__x); }
- static __attribute__((device)) inline __attribute__((always_inline))
- double erfc(double __x) { return __ocml_erfc_f64(__x); }
- static __attribute__((device)) inline __attribute__((always_inline))
- double erfcinv(double __x) { return __ocml_erfcinv_f64(__x); }
- static __attribute__((device)) inline __attribute__((always_inline))
- double erfcx(double __x) { return __ocml_erfcx_f64(__x); }
- static __attribute__((device)) inline __attribute__((always_inline))
- double erfinv(double __x) { return __ocml_erfinv_f64(__x); }
- static __attribute__((device)) inline __attribute__((always_inline))
- double exp(double __x) { return __ocml_exp_f64(__x); }
- static __attribute__((device)) inline __attribute__((always_inline))
- double exp10(double __x) { return __ocml_exp10_f64(__x); }
- static __attribute__((device)) inline __attribute__((always_inline))
- double exp2(double __x) { return __ocml_exp2_f64(__x); }
- static __attribute__((device)) inline __attribute__((always_inline))
- double expm1(double __x) { return __ocml_expm1_f64(__x); }
- static __attribute__((device)) inline __attribute__((always_inline))
- double fabs(double __x) { return __builtin_fabs(__x); }
- static __attribute__((device)) inline __attribute__((always_inline))
- double fdim(double __x, double __y) { return __ocml_fdim_f64(__x, __y); }
- static __attribute__((device)) inline __attribute__((always_inline))
- double floor(double __x) { return __ocml_floor_f64(__x); }
- static __attribute__((device)) inline __attribute__((always_inline))
- double fma(double __x, double __y, double __z) {
- return __ocml_fma_f64(__x, __y, __z);
- }
- static __attribute__((device)) inline __attribute__((always_inline))
- double fmax(double __x, double __y) { return __ocml_fmax_f64(__x, __y); }
- static __attribute__((device)) inline __attribute__((always_inline))
- double fmin(double __x, double __y) { return __ocml_fmin_f64(__x, __y); }
- static __attribute__((device)) inline __attribute__((always_inline))
- double fmod(double __x, double __y) { return __ocml_fmod_f64(__x, __y); }
- static __attribute__((device)) inline __attribute__((always_inline))
- double frexp(double __x, int *__nptr) {
- int __tmp;
- double __r =
- __ocml_frexp_f64(__x, (__attribute__((address_space(5))) int *)&__tmp);
- *__nptr = __tmp;
- return __r;
- }
- static __attribute__((device)) inline __attribute__((always_inline))
- double hypot(double __x, double __y) { return __ocml_hypot_f64(__x, __y); }
- static __attribute__((device)) inline __attribute__((always_inline))
- int ilogb(double __x) { return __ocml_ilogb_f64(__x); }
- static __attribute__((device)) inline __attribute__((always_inline))
- bool __finite(double __x) { return __ocml_isfinite_f64(__x); }
- static __attribute__((device)) inline __attribute__((always_inline))
- bool __isinf(double __x) { return __ocml_isinf_f64(__x); }
- static __attribute__((device)) inline __attribute__((always_inline))
- bool __isnan(double __x) { return __ocml_isnan_f64(__x); }
- static __attribute__((device)) inline __attribute__((always_inline))
- double j0(double __x) { return __ocml_j0_f64(__x); }
- static __attribute__((device)) inline __attribute__((always_inline))
- double j1(double __x) { return __ocml_j1_f64(__x); }
- static __attribute__((device)) inline __attribute__((always_inline))
- double jn(int __n, double __x) {
- if (__n == 0)
- return j0(__x);
- if (__n == 1)
- return j1(__x);
- double __x0 = j0(__x);
- double __x1 = j1(__x);
- for (int __i = 1; __i < __n; ++__i) {
- double __x2 = (2 * __i) / __x * __x1 - __x0;
- __x0 = __x1;
- __x1 = __x2;
- }
- return __x1;
- }
- static __attribute__((device)) inline __attribute__((always_inline))
- double ldexp(double __x, int __e) { return __ocml_ldexp_f64(__x, __e); }
- static __attribute__((device)) inline __attribute__((always_inline))
- double lgamma(double __x) { return __ocml_lgamma_f64(__x); }
- static __attribute__((device)) inline __attribute__((always_inline))
- long long int llrint(double __x) { return __ocml_rint_f64(__x); }
- static __attribute__((device)) inline __attribute__((always_inline))
- long long int llround(double __x) { return __ocml_round_f64(__x); }
- static __attribute__((device)) inline __attribute__((always_inline))
- double log(double __x) { return __ocml_log_f64(__x); }
- static __attribute__((device)) inline __attribute__((always_inline))
- double log10(double __x) { return __ocml_log10_f64(__x); }
- static __attribute__((device)) inline __attribute__((always_inline))
- double log1p(double __x) { return __ocml_log1p_f64(__x); }
- static __attribute__((device)) inline __attribute__((always_inline))
- double log2(double __x) { return __ocml_log2_f64(__x); }
- static __attribute__((device)) inline __attribute__((always_inline))
- double logb(double __x) { return __ocml_logb_f64(__x); }
- static __attribute__((device)) inline __attribute__((always_inline))
- long int lrint(double __x) { return __ocml_rint_f64(__x); }
- static __attribute__((device)) inline __attribute__((always_inline))
- long int lround(double __x) { return __ocml_round_f64(__x); }
- static __attribute__((device)) inline __attribute__((always_inline))
- double modf(double __x, double *__iptr) {
- double __tmp;
- double __r =
- __ocml_modf_f64(__x, (__attribute__((address_space(5))) double *)&__tmp);
- *__iptr = __tmp;
- return __r;
- }
- static __attribute__((device)) inline __attribute__((always_inline))
- double nan(const char *__tagp) {
- union {
- double val;
- struct ieee_double {
- long unsigned int mantissa : 51;
- unsigned int quiet : 1;
- unsigned int exponent : 11;
- unsigned int sign : 1;
- } bits;
- } __tmp;
- static_assert((sizeof(__tmp.val)) == (sizeof(__tmp.bits)), "");
- __tmp.bits.sign = 0u;
- __tmp.bits.exponent = ~0u;
- __tmp.bits.quiet = 1u;
- __tmp.bits.mantissa = __make_mantissa(__tagp);
- return __tmp.val;
- }
- static __attribute__((device)) inline __attribute__((always_inline))
- double nearbyint(double __x) { return __ocml_nearbyint_f64(__x); }
- static __attribute__((device)) inline __attribute__((always_inline))
- double nextafter(double __x, double __y) {
- return __ocml_nextafter_f64(__x, __y);
- }
- static __attribute__((device)) inline __attribute__((always_inline))
- double norm(int __dim,
- const double *__a) {
- double __r = 0;
- while (__dim--) {
- __r += __a[0] * __a[0];
- ++__a;
- }
- return __ocml_sqrt_f64(__r);
- }
- static __attribute__((device)) inline __attribute__((always_inline))
- double norm3d(double __x, double __y, double __z) {
- return __ocml_len3_f64(__x, __y, __z);
- }
- static __attribute__((device)) inline __attribute__((always_inline))
- double norm4d(double __x, double __y, double __z, double __w) {
- return __ocml_len4_f64(__x, __y, __z, __w);
- }
- static __attribute__((device)) inline __attribute__((always_inline))
- double normcdf(double __x) { return __ocml_ncdf_f64(__x); }
- static __attribute__((device)) inline __attribute__((always_inline))
- double normcdfinv(double __x) { return __ocml_ncdfinv_f64(__x); }
- static __attribute__((device)) inline __attribute__((always_inline))
- double pow(double __x, double __y) { return __ocml_pow_f64(__x, __y); }
- static __attribute__((device)) inline __attribute__((always_inline))
- double powi(double __x, int __y) { return __ocml_pown_f64(__x, __y); }
- static __attribute__((device)) inline __attribute__((always_inline))
- double rcbrt(double __x) { return __ocml_rcbrt_f64(__x); }
- static __attribute__((device)) inline __attribute__((always_inline))
- double remainder(double __x, double __y) {
- return __ocml_remainder_f64(__x, __y);
- }
- static __attribute__((device)) inline __attribute__((always_inline))
- double remquo(double __x, double __y, int *__quo) {
- int __tmp;
- double __r = __ocml_remquo_f64(
- __x, __y, (__attribute__((address_space(5))) int *)&__tmp);
- *__quo = __tmp;
- return __r;
- }
- static __attribute__((device)) inline __attribute__((always_inline))
- double rhypot(double __x, double __y) { return __ocml_rhypot_f64(__x, __y); }
- static __attribute__((device)) inline __attribute__((always_inline))
- double rint(double __x) { return __ocml_rint_f64(__x); }
- static __attribute__((device)) inline __attribute__((always_inline))
- double rnorm(int __dim,
- const double *__a) {
- double __r = 0;
- while (__dim--) {
- __r += __a[0] * __a[0];
- ++__a;
- }
- return __ocml_rsqrt_f64(__r);
- }
- static __attribute__((device)) inline __attribute__((always_inline))
- double rnorm3d(double __x, double __y, double __z) {
- return __ocml_rlen3_f64(__x, __y, __z);
- }
- static __attribute__((device)) inline __attribute__((always_inline))
- double rnorm4d(double __x, double __y, double __z, double __w) {
- return __ocml_rlen4_f64(__x, __y, __z, __w);
- }
- static __attribute__((device)) inline __attribute__((always_inline))
- double round(double __x) { return __ocml_round_f64(__x); }
- static __attribute__((device)) inline __attribute__((always_inline))
- double rsqrt(double __x) { return __ocml_rsqrt_f64(__x); }
- static __attribute__((device)) inline __attribute__((always_inline))
- double scalbln(double __x, long int __n) {
- return (__n < 9223372036854775807L) ? __ocml_scalbn_f64(__x, __n)
- : __ocml_scalb_f64(__x, __n);
- }
- static __attribute__((device)) inline __attribute__((always_inline))
- double scalbn(double __x, int __n) { return __ocml_scalbn_f64(__x, __n); }
- static __attribute__((device)) inline __attribute__((always_inline))
- bool __signbit(double __x) { return __ocml_signbit_f64(__x); }
- static __attribute__((device)) inline __attribute__((always_inline))
- double sin(double __x) { return __ocml_sin_f64(__x); }
- static __attribute__((device)) inline __attribute__((always_inline))
- void sincos(double __x, double *__sinptr, double *__cosptr) {
- double __tmp;
- *__sinptr = __ocml_sincos_f64(
- __x, (__attribute__((address_space(5))) double *)&__tmp);
- *__cosptr = __tmp;
- }
- static __attribute__((device)) inline __attribute__((always_inline))
- void sincospi(double __x, double *__sinptr, double *__cosptr) {
- double __tmp;
- *__sinptr = __ocml_sincospi_f64(
- __x, (__attribute__((address_space(5))) double *)&__tmp);
- *__cosptr = __tmp;
- }
- static __attribute__((device)) inline __attribute__((always_inline))
- double sinh(double __x) { return __ocml_sinh_f64(__x); }
- static __attribute__((device)) inline __attribute__((always_inline))
- double sinpi(double __x) { return __ocml_sinpi_f64(__x); }
- static __attribute__((device)) inline __attribute__((always_inline))
- double sqrt(double __x) { return __ocml_sqrt_f64(__x); }
- static __attribute__((device)) inline __attribute__((always_inline))
- double tan(double __x) { return __ocml_tan_f64(__x); }
- static __attribute__((device)) inline __attribute__((always_inline))
- double tanh(double __x) { return __ocml_tanh_f64(__x); }
- static __attribute__((device)) inline __attribute__((always_inline))
- double tgamma(double __x) { return __ocml_tgamma_f64(__x); }
- static __attribute__((device)) inline __attribute__((always_inline))
- double trunc(double __x) { return __ocml_trunc_f64(__x); }
- static __attribute__((device)) inline __attribute__((always_inline))
- double y0(double __x) { return __ocml_y0_f64(__x); }
- static __attribute__((device)) inline __attribute__((always_inline))
- double y1(double __x) { return __ocml_y1_f64(__x); }
- static __attribute__((device)) inline __attribute__((always_inline))
- double yn(int __n, double __x) {
- if (__n == 0)
- return y0(__x);
- if (__n == 1)
- return y1(__x);
- double __x0 = y0(__x);
- double __x1 = y1(__x);
- for (int __i = 1; __i < __n; ++__i) {
- double __x2 = (2 * __i) / __x * __x1 - __x0;
- __x0 = __x1;
- __x1 = __x2;
- }
- return __x1;
- }
- # 1190 "/opt/rocm-6.0.0/lib/llvm/lib/clang/17.0.0/include/__clang_hip_math.h" 3
- static __attribute__((device)) inline __attribute__((always_inline))
- double __dadd_rn(double __x, double __y) { return __x + __y; }
- # 1212 "/opt/rocm-6.0.0/lib/llvm/lib/clang/17.0.0/include/__clang_hip_math.h" 3
- static __attribute__((device)) inline __attribute__((always_inline))
- double __ddiv_rn(double __x, double __y) { return __x / __y; }
- # 1234 "/opt/rocm-6.0.0/lib/llvm/lib/clang/17.0.0/include/__clang_hip_math.h" 3
- static __attribute__((device)) inline __attribute__((always_inline))
- double __dmul_rn(double __x, double __y) { return __x * __y; }
- # 1248 "/opt/rocm-6.0.0/lib/llvm/lib/clang/17.0.0/include/__clang_hip_math.h" 3
- static __attribute__((device)) inline __attribute__((always_inline))
- double __drcp_rn(double __x) { return 1.0 / __x; }
- # 1262 "/opt/rocm-6.0.0/lib/llvm/lib/clang/17.0.0/include/__clang_hip_math.h" 3
- static __attribute__((device)) inline __attribute__((always_inline))
- double __dsqrt_rn(double __x) { return __ocml_sqrt_f64(__x); }
- # 1284 "/opt/rocm-6.0.0/lib/llvm/lib/clang/17.0.0/include/__clang_hip_math.h" 3
- static __attribute__((device)) inline __attribute__((always_inline))
- double __dsub_rn(double __x, double __y) { return __x - __y; }
- # 1306 "/opt/rocm-6.0.0/lib/llvm/lib/clang/17.0.0/include/__clang_hip_math.h" 3
- static __attribute__((device)) inline __attribute__((always_inline))
- double __fma_rn(double __x, double __y, double __z) {
- return __ocml_fma_f64(__x, __y, __z);
- }
- # 1325 "/opt/rocm-6.0.0/lib/llvm/lib/clang/17.0.0/include/__clang_hip_math.h" 3
- template <class T> static __attribute__((device)) inline __attribute__((always_inline)) T min(T __arg1, T __arg2) {
- return (__arg1 < __arg2) ? __arg1 : __arg2;
- }
- template <class T> static __attribute__((device)) inline __attribute__((always_inline)) T max(T __arg1, T __arg2) {
- return (__arg1 > __arg2) ? __arg1 : __arg2;
- }
- static __attribute__((device)) inline __attribute__((always_inline)) int min(int __arg1, int __arg2) {
- return (__arg1 < __arg2) ? __arg1 : __arg2;
- }
- static __attribute__((device)) inline __attribute__((always_inline)) int max(int __arg1, int __arg2) {
- return (__arg1 > __arg2) ? __arg1 : __arg2;
- }
- static __attribute__((device)) inline __attribute__((always_inline))
- float max(float __x, float __y) { return fmaxf(__x, __y); }
- static __attribute__((device)) inline __attribute__((always_inline))
- double max(double __x, double __y) { return fmax(__x, __y); }
- static __attribute__((device)) inline __attribute__((always_inline))
- float min(float __x, float __y) { return fminf(__x, __y); }
- static __attribute__((device)) inline __attribute__((always_inline))
- double min(double __x, double __y) { return fmin(__x, __y); }
- # 129 "/opt/rocm-6.0.0/lib/llvm/lib/clang/17.0.0/include/__clang_hip_runtime_wrapper.h" 2 3
- # 1 "/opt/rocm-6.0.0/lib/llvm/lib/clang/17.0.0/include/__clang_hip_stdlib.h" 1 3
- # 130 "/opt/rocm-6.0.0/lib/llvm/lib/clang/17.0.0/include/__clang_hip_runtime_wrapper.h" 2 3
- # 1 "/opt/rocm-6.0.0/lib/llvm/lib/clang/17.0.0/include/__clang_hip_cmath.h" 1 3
- # 41 "/opt/rocm-6.0.0/lib/llvm/lib/clang/17.0.0/include/__clang_hip_cmath.h" 3
- static __attribute__((device)) inline __attribute__((always_inline)) double abs(double __x) { return ::fabs(__x); }
- static __attribute__((device)) inline __attribute__((always_inline)) float abs(float __x) { return ::fabsf(__x); }
- static __attribute__((device)) inline __attribute__((always_inline)) long long abs(long long __n) { return ::llabs(__n); }
- static __attribute__((device)) inline __attribute__((always_inline)) long abs(long __n) { return ::labs(__n); }
- static __attribute__((device)) inline __attribute__((always_inline)) float fma(float __x, float __y, float __z) {
- return ::fmaf(__x, __y, __z);
- }
- # 61 "/opt/rocm-6.0.0/lib/llvm/lib/clang/17.0.0/include/__clang_hip_cmath.h" 3
- static __attribute__((device)) inline __attribute__((always_inline)) float frexp(float __arg, int *__exp) {
- return ::frexpf(__arg, __exp);
- }
- # 93 "/opt/rocm-6.0.0/lib/llvm/lib/clang/17.0.0/include/__clang_hip_cmath.h" 3
- static __attribute__((device)) inline __attribute__((always_inline)) bool isinf(float __x) { return ::__isinff(__x); }
- static __attribute__((device)) inline __attribute__((always_inline)) bool isinf(double __x) { return ::__isinf(__x); }
- static __attribute__((device)) inline __attribute__((always_inline)) bool isfinite(float __x) { return ::__finitef(__x); }
- static __attribute__((device)) inline __attribute__((always_inline)) bool isfinite(double __x) { return ::__finite(__x); }
- static __attribute__((device)) inline __attribute__((always_inline)) bool isnan(float __x) { return ::__isnanf(__x); }
- static __attribute__((device)) inline __attribute__((always_inline)) bool isnan(double __x) { return ::__isnan(__x); }
- static __attribute__((device)) inline __attribute__((always_inline)) bool isgreater(float __x, float __y) {
- return __builtin_isgreater(__x, __y);
- }
- static __attribute__((device)) inline __attribute__((always_inline)) bool isgreater(double __x, double __y) {
- return __builtin_isgreater(__x, __y);
- }
- static __attribute__((device)) inline __attribute__((always_inline)) bool isgreaterequal(float __x, float __y) {
- return __builtin_isgreaterequal(__x, __y);
- }
- static __attribute__((device)) inline __attribute__((always_inline)) bool isgreaterequal(double __x, double __y) {
- return __builtin_isgreaterequal(__x, __y);
- }
- static __attribute__((device)) inline __attribute__((always_inline)) bool isless(float __x, float __y) {
- return __builtin_isless(__x, __y);
- }
- static __attribute__((device)) inline __attribute__((always_inline)) bool isless(double __x, double __y) {
- return __builtin_isless(__x, __y);
- }
- static __attribute__((device)) inline __attribute__((always_inline)) bool islessequal(float __x, float __y) {
- return __builtin_islessequal(__x, __y);
- }
- static __attribute__((device)) inline __attribute__((always_inline)) bool islessequal(double __x, double __y) {
- return __builtin_islessequal(__x, __y);
- }
- static __attribute__((device)) inline __attribute__((always_inline)) bool islessgreater(float __x, float __y) {
- return __builtin_islessgreater(__x, __y);
- }
- static __attribute__((device)) inline __attribute__((always_inline)) bool islessgreater(double __x, double __y) {
- return __builtin_islessgreater(__x, __y);
- }
- static __attribute__((device)) inline __attribute__((always_inline)) bool isnormal(float __x) {
- return __builtin_isnormal(__x);
- }
- static __attribute__((device)) inline __attribute__((always_inline)) bool isnormal(double __x) {
- return __builtin_isnormal(__x);
- }
- static __attribute__((device)) inline __attribute__((always_inline)) bool isunordered(float __x, float __y) {
- return __builtin_isunordered(__x, __y);
- }
- static __attribute__((device)) inline __attribute__((always_inline)) bool isunordered(double __x, double __y) {
- return __builtin_isunordered(__x, __y);
- }
- static __attribute__((device)) inline __attribute__((always_inline)) float modf(float __x, float *__iptr) {
- return ::modff(__x, __iptr);
- }
- static __attribute__((device)) inline __attribute__((always_inline)) float pow(float __base, int __iexp) {
- return ::powif(__base, __iexp);
- }
- static __attribute__((device)) inline __attribute__((always_inline)) double pow(double __base, int __iexp) {
- return ::powi(__base, __iexp);
- }
- static __attribute__((device)) inline __attribute__((always_inline)) float remquo(float __x, float __y, int *__quo) {
- return ::remquof(__x, __y, __quo);
- }
- static __attribute__((device)) inline __attribute__((always_inline)) float scalbln(float __x, long int __n) {
- return ::scalblnf(__x, __n);
- }
- static __attribute__((device)) inline __attribute__((always_inline)) bool signbit(float __x) { return ::__signbitf(__x); }
- static __attribute__((device)) inline __attribute__((always_inline)) bool signbit(double __x) { return ::__signbit(__x); }
- static __attribute__((device)) inline __attribute__((always_inline)) _Float16 fma(_Float16 __x, _Float16 __y,
- _Float16 __z) {
- return __ocml_fma_f16(__x, __y, __z);
- }
- static __attribute__((device)) inline __attribute__((always_inline)) _Float16 pow(_Float16 __base, int __iexp) {
- return __ocml_pown_f16(__base, __iexp);
- }
- # 202 "/opt/rocm-6.0.0/lib/llvm/lib/clang/17.0.0/include/__clang_hip_cmath.h" 3
- static __attribute__((device)) inline __attribute__((always_inline)) float acos(float __x) { return acosf(__x); }
- static __attribute__((device)) inline __attribute__((always_inline)) float acosh(float __x) { return acoshf(__x); }
- static __attribute__((device)) inline __attribute__((always_inline)) float asin(float __x) { return asinf(__x); }
- static __attribute__((device)) inline __attribute__((always_inline)) float asinh(float __x) { return asinhf(__x); }
- static __attribute__((device)) inline __attribute__((always_inline)) float atan(float __x) { return atanf(__x); }
- static __attribute__((device)) inline __attribute__((always_inline)) float atan2(float __x, float __y) { return atan2f(__x, __y); }
- static __attribute__((device)) inline __attribute__((always_inline)) float atanh(float __x) { return atanhf(__x); }
- static __attribute__((device)) inline __attribute__((always_inline)) float cbrt(float __x) { return cbrtf(__x); }
- static __attribute__((device)) inline __attribute__((always_inline)) float ceil(float __x) { return ceilf(__x); }
- static __attribute__((device)) inline __attribute__((always_inline)) float copysign(float __x, float __y) { return copysignf(__x, __y); }
- static __attribute__((device)) inline __attribute__((always_inline)) float cos(float __x) { return cosf(__x); }
- static __attribute__((device)) inline __attribute__((always_inline)) float cosh(float __x) { return coshf(__x); }
- static __attribute__((device)) inline __attribute__((always_inline)) float erf(float __x) { return erff(__x); }
- static __attribute__((device)) inline __attribute__((always_inline)) float erfc(float __x) { return erfcf(__x); }
- static __attribute__((device)) inline __attribute__((always_inline)) float exp(float __x) { return expf(__x); }
- static __attribute__((device)) inline __attribute__((always_inline)) float exp2(float __x) { return exp2f(__x); }
- static __attribute__((device)) inline __attribute__((always_inline)) float expm1(float __x) { return expm1f(__x); }
- static __attribute__((device)) inline __attribute__((always_inline)) float fabs(float __x) { return fabsf(__x); }
- static __attribute__((device)) inline __attribute__((always_inline)) float fdim(float __x, float __y) { return fdimf(__x, __y); }
- static __attribute__((device)) inline __attribute__((always_inline)) float floor(float __x) { return floorf(__x); }
- static __attribute__((device)) inline __attribute__((always_inline)) float fmax(float __x, float __y) { return fmaxf(__x, __y); }
- static __attribute__((device)) inline __attribute__((always_inline)) float fmin(float __x, float __y) { return fminf(__x, __y); }
- static __attribute__((device)) inline __attribute__((always_inline)) float fmod(float __x, float __y) { return fmodf(__x, __y); }
- static __attribute__((device)) inline __attribute__((always_inline)) float hypot(float __x, float __y) { return hypotf(__x, __y); }
- static __attribute__((device)) inline __attribute__((always_inline)) int ilogb(float __x) { return ilogbf(__x); }
- static __attribute__((device)) inline __attribute__((always_inline)) float ldexp(float __x, int __y) { return ldexpf(__x, __y); }
- static __attribute__((device)) inline __attribute__((always_inline)) float lgamma(float __x) { return lgammaf(__x); }
- static __attribute__((device)) inline __attribute__((always_inline)) float log(float __x) { return logf(__x); }
- static __attribute__((device)) inline __attribute__((always_inline)) float log10(float __x) { return log10f(__x); }
- static __attribute__((device)) inline __attribute__((always_inline)) float log1p(float __x) { return log1pf(__x); }
- static __attribute__((device)) inline __attribute__((always_inline)) float log2(float __x) { return log2f(__x); }
- static __attribute__((device)) inline __attribute__((always_inline)) float logb(float __x) { return logbf(__x); }
- static __attribute__((device)) inline __attribute__((always_inline)) long long llrint(float __x) { return llrintf(__x); }
- static __attribute__((device)) inline __attribute__((always_inline)) long long llround(float __x) { return llroundf(__x); }
- static __attribute__((device)) inline __attribute__((always_inline)) long lrint(float __x) { return lrintf(__x); }
- static __attribute__((device)) inline __attribute__((always_inline)) long lround(float __x) { return lroundf(__x); }
- static __attribute__((device)) inline __attribute__((always_inline)) float nearbyint(float __x) { return nearbyintf(__x); }
- static __attribute__((device)) inline __attribute__((always_inline)) float nextafter(float __x, float __y) { return nextafterf(__x, __y); }
- static __attribute__((device)) inline __attribute__((always_inline)) float pow(float __x, float __y) { return powf(__x, __y); }
- static __attribute__((device)) inline __attribute__((always_inline)) float remainder(float __x, float __y) { return remainderf(__x, __y); }
- static __attribute__((device)) inline __attribute__((always_inline)) float rint(float __x) { return rintf(__x); }
- static __attribute__((device)) inline __attribute__((always_inline)) float round(float __x) { return roundf(__x); }
- static __attribute__((device)) inline __attribute__((always_inline)) float scalbn(float __x, int __y) { return scalbnf(__x, __y); }
- static __attribute__((device)) inline __attribute__((always_inline)) float sin(float __x) { return sinf(__x); }
- static __attribute__((device)) inline __attribute__((always_inline)) float sinh(float __x) { return sinhf(__x); }
- static __attribute__((device)) inline __attribute__((always_inline)) float sqrt(float __x) { return sqrtf(__x); }
- static __attribute__((device)) inline __attribute__((always_inline)) float tan(float __x) { return tanf(__x); }
- static __attribute__((device)) inline __attribute__((always_inline)) float tanh(float __x) { return tanhf(__x); }
- static __attribute__((device)) inline __attribute__((always_inline)) float tgamma(float __x) { return tgammaf(__x); }
- static __attribute__((device)) inline __attribute__((always_inline)) float trunc(float __x) { return truncf(__x); }
- # 265 "/opt/rocm-6.0.0/lib/llvm/lib/clang/17.0.0/include/__clang_hip_cmath.h" 3
- template <bool __B, class __T = void> struct __hip_enable_if {};
- template <class __T> struct __hip_enable_if<true, __T> { typedef __T type; };
- namespace __hip {
- template <class _Tp> struct is_integral {
- enum { value = 0 };
- };
- template <> struct is_integral<bool> {
- enum { value = 1 };
- };
- template <> struct is_integral<char> {
- enum { value = 1 };
- };
- template <> struct is_integral<signed char> {
- enum { value = 1 };
- };
- template <> struct is_integral<unsigned char> {
- enum { value = 1 };
- };
- template <> struct is_integral<wchar_t> {
- enum { value = 1 };
- };
- template <> struct is_integral<short> {
- enum { value = 1 };
- };
- template <> struct is_integral<unsigned short> {
- enum { value = 1 };
- };
- template <> struct is_integral<int> {
- enum { value = 1 };
- };
- template <> struct is_integral<unsigned int> {
- enum { value = 1 };
- };
- template <> struct is_integral<long> {
- enum { value = 1 };
- };
- template <> struct is_integral<unsigned long> {
- enum { value = 1 };
- };
- template <> struct is_integral<long long> {
- enum { value = 1 };
- };
- template <> struct is_integral<unsigned long long> {
- enum { value = 1 };
- };
- template <class _Tp> struct is_arithmetic {
- enum { value = 0 };
- };
- template <> struct is_arithmetic<bool> {
- enum { value = 1 };
- };
- template <> struct is_arithmetic<char> {
- enum { value = 1 };
- };
- template <> struct is_arithmetic<signed char> {
- enum { value = 1 };
- };
- template <> struct is_arithmetic<unsigned char> {
- enum { value = 1 };
- };
- template <> struct is_arithmetic<wchar_t> {
- enum { value = 1 };
- };
- template <> struct is_arithmetic<short> {
- enum { value = 1 };
- };
- template <> struct is_arithmetic<unsigned short> {
- enum { value = 1 };
- };
- template <> struct is_arithmetic<int> {
- enum { value = 1 };
- };
- template <> struct is_arithmetic<unsigned int> {
- enum { value = 1 };
- };
- template <> struct is_arithmetic<long> {
- enum { value = 1 };
- };
- template <> struct is_arithmetic<unsigned long> {
- enum { value = 1 };
- };
- template <> struct is_arithmetic<long long> {
- enum { value = 1 };
- };
- template <> struct is_arithmetic<unsigned long long> {
- enum { value = 1 };
- };
- template <> struct is_arithmetic<float> {
- enum { value = 1 };
- };
- template <> struct is_arithmetic<double> {
- enum { value = 1 };
- };
- struct true_type {
- static const __attribute__((constant)) bool value = true;
- };
- struct false_type {
- static const __attribute__((constant)) bool value = false;
- };
- template <typename __T, typename __U> struct is_same : public false_type {};
- template <typename __T> struct is_same<__T, __T> : public true_type {};
- template <typename __T> struct add_rvalue_reference { typedef __T &&type; };
- template <typename __T> typename add_rvalue_reference<__T>::type declval();
- template <class _Tp> struct __numeric_type {
- static void __test(...);
- static _Float16 __test(_Float16);
- static float __test(float);
- static double __test(char);
- static double __test(int);
- static double __test(unsigned);
- static double __test(long);
- static double __test(unsigned long);
- static double __test(long long);
- static double __test(unsigned long long);
- static double __test(double);
- static double __test(long double);
- typedef decltype(__test(declval<_Tp>())) type;
- static const bool value = !is_same<type, void>::value;
- };
- template <> struct __numeric_type<void> { static const bool value = true; };
- template <class _A1, class _A2 = void, class _A3 = void,
- bool = __numeric_type<_A1>::value &&__numeric_type<_A2>::value
- &&__numeric_type<_A3>::value>
- class __promote_imp {
- public:
- static const bool value = false;
- };
- template <class _A1, class _A2, class _A3>
- class __promote_imp<_A1, _A2, _A3, true> {
- private:
- typedef typename __promote_imp<_A1>::type __type1;
- typedef typename __promote_imp<_A2>::type __type2;
- typedef typename __promote_imp<_A3>::type __type3;
- public:
- typedef decltype(__type1() + __type2() + __type3()) type;
- static const bool value = true;
- };
- template <class _A1, class _A2> class __promote_imp<_A1, _A2, void, true> {
- private:
- typedef typename __promote_imp<_A1>::type __type1;
- typedef typename __promote_imp<_A2>::type __type2;
- public:
- typedef decltype(__type1() + __type2()) type;
- static const bool value = true;
- };
- template <class _A1> class __promote_imp<_A1, void, void, true> {
- public:
- typedef typename __numeric_type<_A1>::type type;
- static const bool value = true;
- };
- template <class _A1, class _A2 = void, class _A3 = void>
- class __promote : public __promote_imp<_A1, _A2, _A3> {};
- }
- # 478 "/opt/rocm-6.0.0/lib/llvm/lib/clang/17.0.0/include/__clang_hip_cmath.h" 3
- template <typename __T> static __attribute__((device)) inline __attribute__((always_inline)) typename __hip_enable_if<__hip::is_integral<__T>::value, double>::type acos(__T __x) { return ::acos((double)__x); }
- template <typename __T> static __attribute__((device)) inline __attribute__((always_inline)) typename __hip_enable_if<__hip::is_integral<__T>::value, double>::type acosh(__T __x) { return ::acosh((double)__x); }
- template <typename __T> static __attribute__((device)) inline __attribute__((always_inline)) typename __hip_enable_if<__hip::is_integral<__T>::value, double>::type asin(__T __x) { return ::asin((double)__x); }
- template <typename __T> static __attribute__((device)) inline __attribute__((always_inline)) typename __hip_enable_if<__hip::is_integral<__T>::value, double>::type asinh(__T __x) { return ::asinh((double)__x); }
- template <typename __T> static __attribute__((device)) inline __attribute__((always_inline)) typename __hip_enable_if<__hip::is_integral<__T>::value, double>::type atan(__T __x) { return ::atan((double)__x); }
- template <typename __T1, typename __T2> static __attribute__((device)) inline __attribute__((always_inline)) typename __hip_enable_if< __hip::is_arithmetic<__T1>::value && __hip::is_arithmetic<__T2>::value, typename __hip::__promote<__T1, __T2>::type>::type atan2(__T1 __x, __T2 __y) { typedef typename __hip::__promote<__T1, __T2>::type __result_type; return atan2((__result_type)__x, (__result_type)__y); }
- template <typename __T> static __attribute__((device)) inline __attribute__((always_inline)) typename __hip_enable_if<__hip::is_integral<__T>::value, double>::type atanh(__T __x) { return ::atanh((double)__x); }
- template <typename __T> static __attribute__((device)) inline __attribute__((always_inline)) typename __hip_enable_if<__hip::is_integral<__T>::value, double>::type cbrt(__T __x) { return ::cbrt((double)__x); }
- template <typename __T> static __attribute__((device)) inline __attribute__((always_inline)) typename __hip_enable_if<__hip::is_integral<__T>::value, double>::type ceil(__T __x) { return ::ceil((double)__x); }
- template <typename __T1, typename __T2> static __attribute__((device)) inline __attribute__((always_inline)) typename __hip_enable_if< __hip::is_arithmetic<__T1>::value && __hip::is_arithmetic<__T2>::value, typename __hip::__promote<__T1, __T2>::type>::type copysign(__T1 __x, __T2 __y) { typedef typename __hip::__promote<__T1, __T2>::type __result_type; return copysign((__result_type)__x, (__result_type)__y); }
- template <typename __T> static __attribute__((device)) inline __attribute__((always_inline)) typename __hip_enable_if<__hip::is_integral<__T>::value, double>::type cos(__T __x) { return ::cos((double)__x); }
- template <typename __T> static __attribute__((device)) inline __attribute__((always_inline)) typename __hip_enable_if<__hip::is_integral<__T>::value, double>::type cosh(__T __x) { return ::cosh((double)__x); }
- template <typename __T> static __attribute__((device)) inline __attribute__((always_inline)) typename __hip_enable_if<__hip::is_integral<__T>::value, double>::type erf(__T __x) { return ::erf((double)__x); }
- template <typename __T> static __attribute__((device)) inline __attribute__((always_inline)) typename __hip_enable_if<__hip::is_integral<__T>::value, double>::type erfc(__T __x) { return ::erfc((double)__x); }
- template <typename __T> static __attribute__((device)) inline __attribute__((always_inline)) typename __hip_enable_if<__hip::is_integral<__T>::value, double>::type exp(__T __x) { return ::exp((double)__x); }
- template <typename __T> static __attribute__((device)) inline __attribute__((always_inline)) typename __hip_enable_if<__hip::is_integral<__T>::value, double>::type exp2(__T __x) { return ::exp2((double)__x); }
- template <typename __T> static __attribute__((device)) inline __attribute__((always_inline)) typename __hip_enable_if<__hip::is_integral<__T>::value, double>::type expm1(__T __x) { return ::expm1((double)__x); }
- template <typename __T> static __attribute__((device)) inline __attribute__((always_inline)) typename __hip_enable_if<__hip::is_integral<__T>::value, double>::type fabs(__T __x) { return ::fabs((double)__x); }
- template <typename __T1, typename __T2> static __attribute__((device)) inline __attribute__((always_inline)) typename __hip_enable_if< __hip::is_arithmetic<__T1>::value && __hip::is_arithmetic<__T2>::value, typename __hip::__promote<__T1, __T2>::type>::type fdim(__T1 __x, __T2 __y) { typedef typename __hip::__promote<__T1, __T2>::type __result_type; return fdim((__result_type)__x, (__result_type)__y); }
- template <typename __T> static __attribute__((device)) inline __attribute__((always_inline)) typename __hip_enable_if<__hip::is_integral<__T>::value, double>::type floor(__T __x) { return ::floor((double)__x); }
- template <typename __T1, typename __T2> static __attribute__((device)) inline __attribute__((always_inline)) typename __hip_enable_if< __hip::is_arithmetic<__T1>::value && __hip::is_arithmetic<__T2>::value, typename __hip::__promote<__T1, __T2>::type>::type fmax(__T1 __x, __T2 __y) { typedef typename __hip::__promote<__T1, __T2>::type __result_type; return fmax((__result_type)__x, (__result_type)__y); }
- template <typename __T1, typename __T2> static __attribute__((device)) inline __attribute__((always_inline)) typename __hip_enable_if< __hip::is_arithmetic<__T1>::value && __hip::is_arithmetic<__T2>::value, typename __hip::__promote<__T1, __T2>::type>::type fmin(__T1 __x, __T2 __y) { typedef typename __hip::__promote<__T1, __T2>::type __result_type; return fmin((__result_type)__x, (__result_type)__y); }
- template <typename __T1, typename __T2> static __attribute__((device)) inline __attribute__((always_inline)) typename __hip_enable_if< __hip::is_arithmetic<__T1>::value && __hip::is_arithmetic<__T2>::value, typename __hip::__promote<__T1, __T2>::type>::type fmod(__T1 __x, __T2 __y) { typedef typename __hip::__promote<__T1, __T2>::type __result_type; return fmod((__result_type)__x, (__result_type)__y); }
- template <typename __T1, typename __T2> static __attribute__((device)) inline __attribute__((always_inline)) typename __hip_enable_if< __hip::is_arithmetic<__T1>::value && __hip::is_arithmetic<__T2>::value, typename __hip::__promote<__T1, __T2>::type>::type hypot(__T1 __x, __T2 __y) { typedef typename __hip::__promote<__T1, __T2>::type __result_type; return hypot((__result_type)__x, (__result_type)__y); }
- template <typename __T> static __attribute__((device)) inline __attribute__((always_inline)) typename __hip_enable_if<__hip::is_integral<__T>::value, int>::type ilogb(__T __x) { return ::ilogb((double)__x); }
- template <typename __T> static __attribute__((device)) inline __attribute__((always_inline)) typename __hip_enable_if<__hip::is_integral<__T>::value, bool>::type isfinite(__T __x) { return ::isfinite((double)__x); }
- template <typename __T1, typename __T2> static __attribute__((device)) inline __attribute__((always_inline)) typename __hip_enable_if< __hip::is_arithmetic<__T1>::value && __hip::is_arithmetic<__T2>::value, typename __hip::__promote<__T1, __T2>::type>::type isgreater(__T1 __x, __T2 __y) { typedef typename __hip::__promote<__T1, __T2>::type __result_type; return isgreater((__result_type)__x, (__result_type)__y); }
- template <typename __T1, typename __T2> static __attribute__((device)) inline __attribute__((always_inline)) typename __hip_enable_if< __hip::is_arithmetic<__T1>::value && __hip::is_arithmetic<__T2>::value, typename __hip::__promote<__T1, __T2>::type>::type isgreaterequal(__T1 __x, __T2 __y) { typedef typename __hip::__promote<__T1, __T2>::type __result_type; return isgreaterequal((__result_type)__x, (__result_type)__y); }
- template <typename __T> static __attribute__((device)) inline __attribute__((always_inline)) typename __hip_enable_if<__hip::is_integral<__T>::value, bool>::type isinf(__T __x) { return ::isinf((double)__x); }
- template <typename __T1, typename __T2> static __attribute__((device)) inline __attribute__((always_inline)) typename __hip_enable_if< __hip::is_arithmetic<__T1>::value && __hip::is_arithmetic<__T2>::value, typename __hip::__promote<__T1, __T2>::type>::type isless(__T1 __x, __T2 __y) { typedef typename __hip::__promote<__T1, __T2>::type __result_type; return isless((__result_type)__x, (__result_type)__y); }
- template <typename __T1, typename __T2> static __attribute__((device)) inline __attribute__((always_inline)) typename __hip_enable_if< __hip::is_arithmetic<__T1>::value && __hip::is_arithmetic<__T2>::value, typename __hip::__promote<__T1, __T2>::type>::type islessequal(__T1 __x, __T2 __y) { typedef typename __hip::__promote<__T1, __T2>::type __result_type; return islessequal((__result_type)__x, (__result_type)__y); }
- template <typename __T1, typename __T2> static __attribute__((device)) inline __attribute__((always_inline)) typename __hip_enable_if< __hip::is_arithmetic<__T1>::value && __hip::is_arithmetic<__T2>::value, typename __hip::__promote<__T1, __T2>::type>::type islessgreater(__T1 __x, __T2 __y) { typedef typename __hip::__promote<__T1, __T2>::type __result_type; return islessgreater((__result_type)__x, (__result_type)__y); }
- template <typename __T> static __attribute__((device)) inline __attribute__((always_inline)) typename __hip_enable_if<__hip::is_integral<__T>::value, bool>::type isnan(__T __x) { return ::isnan((double)__x); }
- template <typename __T> static __attribute__((device)) inline __attribute__((always_inline)) typename __hip_enable_if<__hip::is_integral<__T>::value, bool>::type isnormal(__T __x) { return ::isnormal((double)__x); }
- template <typename __T1, typename __T2> static __attribute__((device)) inline __attribute__((always_inline)) typename __hip_enable_if< __hip::is_arithmetic<__T1>::value && __hip::is_arithmetic<__T2>::value, typename __hip::__promote<__T1, __T2>::type>::type isunordered(__T1 __x, __T2 __y) { typedef typename __hip::__promote<__T1, __T2>::type __result_type; return isunordered((__result_type)__x, (__result_type)__y); }
- template <typename __T> static __attribute__((device)) inline __attribute__((always_inline)) typename __hip_enable_if<__hip::is_integral<__T>::value, double>::type lgamma(__T __x) { return ::lgamma((double)__x); }
- template <typename __T> static __attribute__((device)) inline __attribute__((always_inline)) typename __hip_enable_if<__hip::is_integral<__T>::value, double>::type log(__T __x) { return ::log((double)__x); }
- template <typename __T> static __attribute__((device)) inline __attribute__((always_inline)) typename __hip_enable_if<__hip::is_integral<__T>::value, double>::type log10(__T __x) { return ::log10((double)__x); }
- template <typename __T> static __attribute__((device)) inline __attribute__((always_inline)) typename __hip_enable_if<__hip::is_integral<__T>::value, double>::type log1p(__T __x) { return ::log1p((double)__x); }
- template <typename __T> static __attribute__((device)) inline __attribute__((always_inline)) typename __hip_enable_if<__hip::is_integral<__T>::value, double>::type log2(__T __x) { return ::log2((double)__x); }
- template <typename __T> static __attribute__((device)) inline __attribute__((always_inline)) typename __hip_enable_if<__hip::is_integral<__T>::value, double>::type logb(__T __x) { return ::logb((double)__x); }
- template <typename __T> static __attribute__((device)) inline __attribute__((always_inline)) typename __hip_enable_if<__hip::is_integral<__T>::value, long long>::type llrint(__T __x) { return ::llrint((double)__x); }
- template <typename __T> static __attribute__((device)) inline __attribute__((always_inline)) typename __hip_enable_if<__hip::is_integral<__T>::value, long long>::type llround(__T __x) { return ::llround((double)__x); }
- template <typename __T> static __attribute__((device)) inline __attribute__((always_inline)) typename __hip_enable_if<__hip::is_integral<__T>::value, long>::type lrint(__T __x) { return ::lrint((double)__x); }
- template <typename __T> static __attribute__((device)) inline __attribute__((always_inline)) typename __hip_enable_if<__hip::is_integral<__T>::value, long>::type lround(__T __x) { return ::lround((double)__x); }
- template <typename __T> static __attribute__((device)) inline __attribute__((always_inline)) typename __hip_enable_if<__hip::is_integral<__T>::value, double>::type nearbyint(__T __x) { return ::nearbyint((double)__x); }
- template <typename __T1, typename __T2> static __attribute__((device)) inline __attribute__((always_inline)) typename __hip_enable_if< __hip::is_arithmetic<__T1>::value && __hip::is_arithmetic<__T2>::value, typename __hip::__promote<__T1, __T2>::type>::type nextafter(__T1 __x, __T2 __y) { typedef typename __hip::__promote<__T1, __T2>::type __result_type; return nextafter((__result_type)__x, (__result_type)__y); }
- template <typename __T1, typename __T2> static __attribute__((device)) inline __attribute__((always_inline)) typename __hip_enable_if< __hip::is_arithmetic<__T1>::value && __hip::is_arithmetic<__T2>::value, typename __hip::__promote<__T1, __T2>::type>::type pow(__T1 __x, __T2 __y) { typedef typename __hip::__promote<__T1, __T2>::type __result_type; return pow((__result_type)__x, (__result_type)__y); }
- template <typename __T1, typename __T2> static __attribute__((device)) inline __attribute__((always_inline)) typename __hip_enable_if< __hip::is_arithmetic<__T1>::value && __hip::is_arithmetic<__T2>::value, typename __hip::__promote<__T1, __T2>::type>::type remainder(__T1 __x, __T2 __y) { typedef typename __hip::__promote<__T1, __T2>::type __result_type; return remainder((__result_type)__x, (__result_type)__y); }
- template <typename __T> static __attribute__((device)) inline __attribute__((always_inline)) typename __hip_enable_if<__hip::is_integral<__T>::value, double>::type rint(__T __x) { return ::rint((double)__x); }
- template <typename __T> static __attribute__((device)) inline __attribute__((always_inline)) typename __hip_enable_if<__hip::is_integral<__T>::value, double>::type round(__T __x) { return ::round((double)__x); }
- template <typename __T> static __attribute__((device)) inline __attribute__((always_inline)) typename __hip_enable_if<__hip::is_integral<__T>::value, bool>::type signbit(__T __x) { return ::signbit((double)__x); }
- template <typename __T> static __attribute__((device)) inline __attribute__((always_inline)) typename __hip_enable_if<__hip::is_integral<__T>::value, double>::type sin(__T __x) { return ::sin((double)__x); }
- template <typename __T> static __attribute__((device)) inline __attribute__((always_inline)) typename __hip_enable_if<__hip::is_integral<__T>::value, double>::type sinh(__T __x) { return ::sinh((double)__x); }
- template <typename __T> static __attribute__((device)) inline __attribute__((always_inline)) typename __hip_enable_if<__hip::is_integral<__T>::value, double>::type sqrt(__T __x) { return ::sqrt((double)__x); }
- template <typename __T> static __attribute__((device)) inline __attribute__((always_inline)) typename __hip_enable_if<__hip::is_integral<__T>::value, double>::type tan(__T __x) { return ::tan((double)__x); }
- template <typename __T> static __attribute__((device)) inline __attribute__((always_inline)) typename __hip_enable_if<__hip::is_integral<__T>::value, double>::type tanh(__T __x) { return ::tanh((double)__x); }
- template <typename __T> static __attribute__((device)) inline __attribute__((always_inline)) typename __hip_enable_if<__hip::is_integral<__T>::value, double>::type tgamma(__T __x) { return ::tgamma((double)__x); }
- template <typename __T> static __attribute__((device)) inline __attribute__((always_inline)) typename __hip_enable_if<__hip::is_integral<__T>::value, double>::type trunc(__T __x) { return ::trunc((double)__x); }
- template <typename __T1, typename __T2> static __attribute__((device)) inline __attribute__((always_inline)) typename __hip_enable_if< __hip::is_arithmetic<__T1>::value && __hip::is_arithmetic<__T2>::value, typename __hip::__promote<__T1, __T2>::type>::type max(__T1 __x, __T2 __y) { typedef typename __hip::__promote<__T1, __T2>::type __result_type; return max((__result_type)__x, (__result_type)__y); }
- template <typename __T1, typename __T2> static __attribute__((device)) inline __attribute__((always_inline)) typename __hip_enable_if< __hip::is_arithmetic<__T1>::value && __hip::is_arithmetic<__T2>::value, typename __hip::__promote<__T1, __T2>::type>::type min(__T1 __x, __T2 __y) { typedef typename __hip::__promote<__T1, __T2>::type __result_type; return min((__result_type)__x, (__result_type)__y); }
- template <typename __T1, typename __T2, typename __T3>
- static __attribute__((device)) inline __attribute__((always_inline)) typename __hip_enable_if<
- __hip::is_arithmetic<__T1>::value && __hip::is_arithmetic<__T2>::value &&
- __hip::is_arithmetic<__T3>::value,
- typename __hip::__promote<__T1, __T2, __T3>::type>::type
- fma(__T1 __x, __T2 __y, __T3 __z) {
- typedef typename __hip::__promote<__T1, __T2, __T3>::type __result_type;
- return ::fma((__result_type)__x, (__result_type)__y, (__result_type)__z);
- }
- # 568 "/opt/rocm-6.0.0/lib/llvm/lib/clang/17.0.0/include/__clang_hip_cmath.h" 3
- template <typename __T>
- static __attribute__((device)) inline __attribute__((always_inline))
- typename __hip_enable_if<__hip::is_integral<__T>::value, double>::type
- frexp(__T __x, int *__exp) {
- return ::frexp((double)__x, __exp);
- }
- template <typename __T>
- static __attribute__((device)) inline __attribute__((always_inline))
- typename __hip_enable_if<__hip::is_integral<__T>::value, double>::type
- ldexp(__T __x, int __exp) {
- return ::ldexp((double)__x, __exp);
- }
- template <typename __T>
- static __attribute__((device)) inline __attribute__((always_inline))
- typename __hip_enable_if<__hip::is_integral<__T>::value, double>::type
- modf(__T __x, double *__exp) {
- return ::modf((double)__x, __exp);
- }
- template <typename __T1, typename __T2>
- static __attribute__((device)) inline __attribute__((always_inline))
- typename __hip_enable_if<__hip::is_arithmetic<__T1>::value &&
- __hip::is_arithmetic<__T2>::value,
- typename __hip::__promote<__T1, __T2>::type>::type
- remquo(__T1 __x, __T2 __y, int *__quo) {
- typedef typename __hip::__promote<__T1, __T2>::type __result_type;
- return ::remquo((__result_type)__x, (__result_type)__y, __quo);
- }
- # 610 "/opt/rocm-6.0.0/lib/llvm/lib/clang/17.0.0/include/__clang_hip_cmath.h" 3
- template <typename __T>
- static __attribute__((device)) inline __attribute__((always_inline))
- typename __hip_enable_if<__hip::is_integral<__T>::value, double>::type
- scalbln(__T __x, long int __exp) {
- return ::scalbln((double)__x, __exp);
- }
- template <typename __T>
- static __attribute__((device)) inline __attribute__((always_inline))
- typename __hip_enable_if<__hip::is_integral<__T>::value, double>::type
- scalbn(__T __x, int __exp) {
- return ::scalbn((double)__x, __exp);
- }
- # 133 "/opt/rocm-6.0.0/lib/llvm/lib/clang/17.0.0/include/__clang_hip_runtime_wrapper.h" 2 3
- # 2 "<built-in>" 2
- # 1 "/long_pathname_so_that_rpms_can_package_the_debug_info/src/out/ubuntu-22.04/22.04/build/hip-on-rocclr/hipamd/src/hiprtc/hip_rtc_gen/hipRTC_header.h" 2
- # 1 "/long_pathname_so_that_rpms_can_package_the_debug_info/src/external/hip-on-vdi/include/hip/hip_runtime.h" 1 3
- # 58 "/long_pathname_so_that_rpms_can_package_the_debug_info/src/external/hip-on-vdi/include/hip/hip_runtime.h" 3
- # 1 "/long_pathname_so_that_rpms_can_package_the_debug_info/src/out/ubuntu-22.04/22.04/build/hip-on-rocclr/hipamd/include/hip/hip_version.h" 1 3
- # 59 "/long_pathname_so_that_rpms_can_package_the_debug_info/src/external/hip-on-vdi/include/hip/hip_runtime.h" 2 3
- # 1 "/long_pathname_so_that_rpms_can_package_the_debug_info/src/external/hip-on-vdi/include/hip/hip_common.h" 1 3
- # 27 "/long_pathname_so_that_rpms_can_package_the_debug_info/src/external/hip-on-vdi/include/hip/hip_common.h" 3
- #pragma clang diagnostic push
- #pragma clang diagnostic ignored "-Wreserved-macro-identifier"
- # 97 "/long_pathname_so_that_rpms_can_package_the_debug_info/src/external/hip-on-vdi/include/hip/hip_common.h" 3
- #pragma clang diagnostic pop
- # 60 "/long_pathname_so_that_rpms_can_package_the_debug_info/src/external/hip-on-vdi/include/hip/hip_runtime.h" 2 3
- # 1 "/long_pathname_so_that_rpms_can_package_the_debug_info/src/external/clr/hipamd/include/hip/amd_detail/amd_hip_runtime.h" 1 3
- # 32 "/long_pathname_so_that_rpms_can_package_the_debug_info/src/external/clr/hipamd/include/hip/amd_detail/amd_hip_runtime.h" 3
- # 1 "/long_pathname_so_that_rpms_can_package_the_debug_info/src/external/clr/hipamd/include/hip/amd_detail/amd_hip_common.h" 1 3
- # 33 "/long_pathname_so_that_rpms_can_package_the_debug_info/src/external/clr/hipamd/include/hip/amd_detail/amd_hip_runtime.h" 2 3
- # 43 "/long_pathname_so_that_rpms_can_package_the_debug_info/src/external/clr/hipamd/include/hip/amd_detail/amd_hip_runtime.h" 3
- extern "C" {
- # 54 "/long_pathname_so_that_rpms_can_package_the_debug_info/src/external/clr/hipamd/include/hip/amd_detail/amd_hip_runtime.h" 3
- const char* amd_dbgapi_get_build_name();
- # 63 "/long_pathname_so_that_rpms_can_package_the_debug_info/src/external/clr/hipamd/include/hip/amd_detail/amd_hip_runtime.h" 3
- const char* amd_dbgapi_get_git_hash();
- # 72 "/long_pathname_so_that_rpms_can_package_the_debug_info/src/external/clr/hipamd/include/hip/amd_detail/amd_hip_runtime.h" 3
- size_t amd_dbgapi_get_build_id();
- }
- # 92 "/long_pathname_so_that_rpms_can_package_the_debug_info/src/external/clr/hipamd/include/hip/amd_detail/amd_hip_runtime.h" 3
- typedef unsigned int uint32_t;
- typedef unsigned long long uint64_t;
- typedef signed int int32_t;
- typedef signed long long int64_t;
- namespace std {
- using ::uint32_t;
- using ::uint64_t;
- using ::int32_t;
- using ::int64_t;
- }
- # 124 "/long_pathname_so_that_rpms_can_package_the_debug_info/src/external/clr/hipamd/include/hip/amd_detail/amd_hip_runtime.h" 3
- # 1 "/long_pathname_so_that_rpms_can_package_the_debug_info/src/external/clr/hipamd/include/hip/amd_detail/hip_ldg.h" 1 3
- # 27 "/long_pathname_so_that_rpms_can_package_the_debug_info/src/external/clr/hipamd/include/hip/amd_detail/hip_ldg.h" 3
- # 1 "/long_pathname_so_that_rpms_can_package_the_debug_info/src/external/clr/hipamd/include/hip/amd_detail/amd_hip_vector_types.h" 1 3
- # 31 "/long_pathname_so_that_rpms_can_package_the_debug_info/src/external/clr/hipamd/include/hip/amd_detail/amd_hip_vector_types.h" 3
- # 1 "/long_pathname_so_that_rpms_can_package_the_debug_info/src/external/clr/hipamd/include/hip/amd_detail/host_defines.h" 1 3
- # 38 "/long_pathname_so_that_rpms_can_package_the_debug_info/src/external/clr/hipamd/include/hip/amd_detail/host_defines.h" 3
- namespace __hip_internal {
- typedef unsigned char uint8_t;
- typedef unsigned short uint16_t;
- typedef unsigned int uint32_t;
- typedef unsigned long long uint64_t;
- typedef signed char int8_t;
- typedef signed short int16_t;
- typedef signed int int32_t;
- typedef signed long long int64_t;
- template <class _Tp, _Tp __v> struct integral_constant {
- static constexpr const _Tp value = __v;
- typedef _Tp value_type;
- typedef integral_constant type;
- constexpr operator value_type() const { return value; }
- constexpr value_type operator()() const { return value; }
- };
- template <class _Tp, _Tp __v> constexpr const _Tp integral_constant<_Tp, __v>::value;
- typedef integral_constant<bool, true> true_type;
- typedef integral_constant<bool, false> false_type;
- template <bool B> using bool_constant = integral_constant<bool, B>;
- typedef bool_constant<true> true_type;
- typedef bool_constant<false> false_type;
- template <bool __B, class __T = void> struct enable_if {};
- template <class __T> struct enable_if<true, __T> { typedef __T type; };
- template<bool _B> struct true_or_false_type : public false_type {};
- template<> struct true_or_false_type<true> : public true_type {};
- template <class _Tp> struct is_integral : public false_type {};
- template <> struct is_integral<bool> : public true_type {};
- template <> struct is_integral<char> : public true_type {};
- template <> struct is_integral<signed char> : public true_type {};
- template <> struct is_integral<unsigned char> : public true_type {};
- template <> struct is_integral<wchar_t> : public true_type {};
- template <> struct is_integral<short> : public true_type {};
- template <> struct is_integral<unsigned short> : public true_type {};
- template <> struct is_integral<int> : public true_type {};
- template <> struct is_integral<unsigned int> : public true_type {};
- template <> struct is_integral<long> : public true_type {};
- template <> struct is_integral<unsigned long> : public true_type {};
- template <> struct is_integral<long long> : public true_type {};
- template <> struct is_integral<unsigned long long> : public true_type {};
- template <class _Tp> struct is_arithmetic : public false_type {};
- template <> struct is_arithmetic<bool> : public true_type {};
- template <> struct is_arithmetic<char> : public true_type {};
- template <> struct is_arithmetic<signed char> : public true_type {};
- template <> struct is_arithmetic<unsigned char> : public true_type {};
- template <> struct is_arithmetic<wchar_t> : public true_type {};
- template <> struct is_arithmetic<short> : public true_type {};
- template <> struct is_arithmetic<unsigned short> : public true_type {};
- template <> struct is_arithmetic<int> : public true_type {};
- template <> struct is_arithmetic<unsigned int> : public true_type {};
- template <> struct is_arithmetic<long> : public true_type {};
- template <> struct is_arithmetic<unsigned long> : public true_type {};
- template <> struct is_arithmetic<long long> : public true_type {};
- template <> struct is_arithmetic<unsigned long long> : public true_type {};
- template <> struct is_arithmetic<float> : public true_type {};
- template <> struct is_arithmetic<double> : public true_type {};
- template<typename _Tp> struct is_floating_point : public false_type {};
- template<> struct is_floating_point<float> : public true_type {};
- template<> struct is_floating_point<double> : public true_type {};
- template<> struct is_floating_point<long double> : public true_type {};
- template <typename __T, typename __U> struct is_same : public false_type {};
- template <typename __T> struct is_same<__T, __T> : public true_type {};
- template<typename _Tp, bool = is_arithmetic<_Tp>::value>
- struct is_signed : public false_type {};
- template<typename _Tp>
- struct is_signed<_Tp, true> : public true_or_false_type<_Tp(-1) < _Tp(0)> {};
- template<typename _CharT> struct char_traits;
- template<typename _CharT, typename _Traits = char_traits<_CharT>> class basic_istream;
- template<typename _CharT, typename _Traits = char_traits<_CharT>> class basic_ostream;
- typedef basic_istream<char> istream;
- typedef basic_ostream<char> ostream;
- template<typename _Tp>
- struct is_standard_layout
- : public integral_constant<bool, __is_standard_layout(_Tp)>
- { };
- template<typename _Tp>
- struct is_trivial
- : public integral_constant<bool, __is_trivial(_Tp)>
- { };
- }
- typedef __hip_internal::uint8_t __hip_uint8_t;
- typedef __hip_internal::uint16_t __hip_uint16_t;
- typedef __hip_internal::uint32_t __hip_uint32_t;
- typedef __hip_internal::uint64_t __hip_uint64_t;
- typedef __hip_internal::int8_t __hip_int8_t;
- typedef __hip_internal::int16_t __hip_int16_t;
- typedef __hip_internal::int32_t __hip_int32_t;
- typedef __hip_internal::int64_t __hip_int64_t;
- # 32 "/long_pathname_so_that_rpms_can_package_the_debug_info/src/external/clr/hipamd/include/hip/amd_detail/amd_hip_vector_types.h" 2 3
- # 52 "/long_pathname_so_that_rpms_can_package_the_debug_info/src/external/clr/hipamd/include/hip/amd_detail/amd_hip_vector_types.h" 3
- namespace std {
- using ::size_t;
- template <class _Tp, _Tp __v> struct integral_constant {
- static constexpr const _Tp value = __v;
- typedef _Tp value_type;
- typedef integral_constant type;
- constexpr operator value_type() const { return value; }
- constexpr value_type operator()() const { return value; }
- };
- template <class _Tp, _Tp __v> constexpr const _Tp integral_constant<_Tp, __v>::value;
- typedef integral_constant<bool, true> true_type;
- typedef integral_constant<bool, false> false_type;
- template <bool B> using bool_constant = integral_constant<bool, B>;
- typedef bool_constant<true> true_type;
- typedef bool_constant<false> false_type;
- template <bool __B, class __T = void> struct enable_if {};
- template <class __T> struct enable_if<true, __T> { typedef __T type; };
- template<bool _B> struct true_or_false_type : public false_type {};
- template<> struct true_or_false_type<true> : public true_type {};
- template <class _Tp> struct is_integral : public false_type {};
- template <> struct is_integral<bool> : public true_type {};
- template <> struct is_integral<char> : public true_type {};
- template <> struct is_integral<signed char> : public true_type {};
- template <> struct is_integral<unsigned char> : public true_type {};
- template <> struct is_integral<wchar_t> : public true_type {};
- template <> struct is_integral<short> : public true_type {};
- template <> struct is_integral<unsigned short> : public true_type {};
- template <> struct is_integral<int> : public true_type {};
- template <> struct is_integral<unsigned int> : public true_type {};
- template <> struct is_integral<long> : public true_type {};
- template <> struct is_integral<unsigned long> : public true_type {};
- template <> struct is_integral<long long> : public true_type {};
- template <> struct is_integral<unsigned long long> : public true_type {};
- template <class _Tp> struct is_arithmetic : public false_type {};
- template <> struct is_arithmetic<bool> : public true_type {};
- template <> struct is_arithmetic<char> : public true_type {};
- template <> struct is_arithmetic<signed char> : public true_type {};
- template <> struct is_arithmetic<unsigned char> : public true_type {};
- template <> struct is_arithmetic<wchar_t> : public true_type {};
- template <> struct is_arithmetic<short> : public true_type {};
- template <> struct is_arithmetic<unsigned short> : public true_type {};
- template <> struct is_arithmetic<int> : public true_type {};
- template <> struct is_arithmetic<unsigned int> : public true_type {};
- template <> struct is_arithmetic<long> : public true_type {};
- template <> struct is_arithmetic<unsigned long> : public true_type {};
- template <> struct is_arithmetic<long long> : public true_type {};
- template <> struct is_arithmetic<unsigned long long> : public true_type {};
- template <> struct is_arithmetic<float> : public true_type {};
- template <> struct is_arithmetic<double> : public true_type {};
- template<typename _Tp> struct is_floating_point : public false_type {};
- template<> struct is_floating_point<float> : public true_type {};
- template<> struct is_floating_point<double> : public true_type {};
- template<> struct is_floating_point<long double> : public true_type {};
- template <typename __T, typename __U> struct is_same : public false_type {};
- template <typename __T> struct is_same<__T, __T> : public true_type {};
- template<typename _Tp, bool = is_arithmetic<_Tp>::value>
- struct is_signed : public false_type {};
- template<typename _Tp>
- struct is_signed<_Tp, true> : public true_or_false_type<_Tp(-1) < _Tp(0)> {};
- template <class _T1, class _T2> struct is_convertible
- : public true_or_false_type<__is_convertible_to(_T1, _T2)> {};
- template<typename _CharT> struct char_traits;
- template<typename _CharT, typename _Traits = char_traits<_CharT>> class basic_istream;
- template<typename _CharT, typename _Traits = char_traits<_CharT>> class basic_ostream;
- typedef basic_istream<char> istream;
- typedef basic_ostream<char> ostream;
- template <typename __T> struct is_scalar : public integral_constant<bool, __is_scalar(__T)> {};
- }
- namespace hip_impl {
- inline
- constexpr
- unsigned int next_pot(unsigned int x) {
- return 1u << (32u - __builtin_clz(x - 1u));
- }
- }
- template<typename T, unsigned int n> struct HIP_vector_base;
- template<typename T>
- struct HIP_vector_base<T, 1> {
- using Native_vec_ = T __attribute__((ext_vector_type(1)));
- union {
- Native_vec_ data;
- struct {
- T x;
- };
- };
- using value_type = T;
- __attribute__((device))
- HIP_vector_base() = default;
- __attribute__((device))
- explicit
- constexpr
- HIP_vector_base(T x_) noexcept : data{x_} {}
- __attribute__((device))
- constexpr
- HIP_vector_base(const HIP_vector_base&) = default;
- __attribute__((device))
- constexpr
- HIP_vector_base(HIP_vector_base&&) = default;
- __attribute__((device))
- ~HIP_vector_base() = default;
- __attribute__((device))
- HIP_vector_base& operator=(const HIP_vector_base&) = default;
- };
- template<typename T>
- struct HIP_vector_base<T, 2> {
- using Native_vec_ = T __attribute__((ext_vector_type(2)));
- union
- {
- Native_vec_ data;
- struct {
- T x;
- T y;
- };
- };
- using value_type = T;
- __attribute__((device))
- HIP_vector_base() = default;
- __attribute__((device))
- explicit
- constexpr
- HIP_vector_base(T x_) noexcept : data{x_, x_} {}
- __attribute__((device))
- constexpr
- HIP_vector_base(T x_, T y_) noexcept : data{x_, y_} {}
- __attribute__((device))
- constexpr
- HIP_vector_base(const HIP_vector_base&) = default;
- __attribute__((device))
- constexpr
- HIP_vector_base(HIP_vector_base&&) = default;
- __attribute__((device))
- ~HIP_vector_base() = default;
- __attribute__((device))
- HIP_vector_base& operator=(const HIP_vector_base&) = default;
- };
- template<typename T>
- struct HIP_vector_base<T, 3> {
- struct Native_vec_ {
- T d[3];
- __attribute__((device))
- Native_vec_() = default;
- __attribute__((device))
- explicit
- constexpr
- Native_vec_(T x_) noexcept : d{x_, x_, x_} {}
- __attribute__((device))
- constexpr
- Native_vec_(T x_, T y_, T z_) noexcept : d{x_, y_, z_} {}
- __attribute__((device))
- constexpr
- Native_vec_(const Native_vec_&) = default;
- __attribute__((device))
- constexpr
- Native_vec_(Native_vec_&&) = default;
- __attribute__((device))
- ~Native_vec_() = default;
- __attribute__((device))
- Native_vec_& operator=(const Native_vec_&) = default;
- __attribute__((device))
- Native_vec_& operator=(Native_vec_&&) = default;
- __attribute__((device))
- T& operator[](unsigned int idx) noexcept { return d[idx]; }
- __attribute__((device))
- T operator[](unsigned int idx) const noexcept { return d[idx]; }
- __attribute__((device))
- Native_vec_& operator+=(const Native_vec_& x_) noexcept
- {
- for (auto i = 0u; i != 3u; ++i) d[i] += x_.d[i];
- return *this;
- }
- __attribute__((device))
- Native_vec_& operator-=(const Native_vec_& x_) noexcept
- {
- for (auto i = 0u; i != 3u; ++i) d[i] -= x_.d[i];
- return *this;
- }
- __attribute__((device))
- Native_vec_& operator*=(const Native_vec_& x_) noexcept
- {
- for (auto i = 0u; i != 3u; ++i) d[i] *= x_.d[i];
- return *this;
- }
- __attribute__((device))
- Native_vec_& operator/=(const Native_vec_& x_) noexcept
- {
- for (auto i = 0u; i != 3u; ++i) d[i] /= x_.d[i];
- return *this;
- }
- template<
- typename U = T,
- typename std::enable_if<std::is_signed<U>{}>::type* = nullptr>
- __attribute__((device))
- Native_vec_ operator-() const noexcept
- {
- auto r{*this};
- for (auto&& x : r.d) x = -x;
- return r;
- }
- template<
- typename U = T,
- typename std::enable_if<std::is_integral<U>{}>::type* = nullptr>
- __attribute__((device))
- Native_vec_ operator~() const noexcept
- {
- auto r{*this};
- for (auto&& x : r.d) x = ~x;
- return r;
- }
- template<
- typename U = T,
- typename std::enable_if<std::is_integral<U>{}>::type* = nullptr>
- __attribute__((device))
- Native_vec_& operator%=(const Native_vec_& x_) noexcept
- {
- for (auto i = 0u; i != 3u; ++i) d[i] %= x_.d[i];
- return *this;
- }
- template<
- typename U = T,
- typename std::enable_if<std::is_integral<U>{}>::type* = nullptr>
- __attribute__((device))
- Native_vec_& operator^=(const Native_vec_& x_) noexcept
- {
- for (auto i = 0u; i != 3u; ++i) d[i] ^= x_.d[i];
- return *this;
- }
- template<
- typename U = T,
- typename std::enable_if<std::is_integral<U>{}>::type* = nullptr>
- __attribute__((device))
- Native_vec_& operator|=(const Native_vec_& x_) noexcept
- {
- for (auto i = 0u; i != 3u; ++i) d[i] |= x_.d[i];
- return *this;
- }
- template<
- typename U = T,
- typename std::enable_if<std::is_integral<U>{}>::type* = nullptr>
- __attribute__((device))
- Native_vec_& operator&=(const Native_vec_& x_) noexcept
- {
- for (auto i = 0u; i != 3u; ++i) d[i] &= x_.d[i];
- return *this;
- }
- template<
- typename U = T,
- typename std::enable_if<std::is_integral<U>{}>::type* = nullptr>
- __attribute__((device))
- Native_vec_& operator>>=(const Native_vec_& x_) noexcept
- {
- for (auto i = 0u; i != 3u; ++i) d[i] >>= x_.d[i];
- return *this;
- }
- template<
- typename U = T,
- typename std::enable_if<std::is_integral<U>{}>::type* = nullptr>
- __attribute__((device))
- Native_vec_& operator<<=(const Native_vec_& x_) noexcept
- {
- for (auto i = 0u; i != 3u; ++i) d[i] <<= x_.d[i];
- return *this;
- }
- using Vec3_cmp = int __attribute__((vector_size(4 * sizeof(int))));
- __attribute__((device))
- Vec3_cmp operator==(const Native_vec_& x_) const noexcept
- {
- return Vec3_cmp{d[0] == x_.d[0], d[1] == x_.d[1], d[2] == x_.d[2]};
- }
- };
- union {
- Native_vec_ data;
- struct {
- T x;
- T y;
- T z;
- };
- };
- using value_type = T;
- __attribute__((device))
- HIP_vector_base() = default;
- __attribute__((device))
- explicit
- constexpr
- HIP_vector_base(T x_) noexcept : data{x_, x_, x_} {}
- __attribute__((device))
- constexpr
- HIP_vector_base(T x_, T y_, T z_) noexcept : data{x_, y_, z_} {}
- __attribute__((device))
- constexpr
- HIP_vector_base(const HIP_vector_base&) = default;
- __attribute__((device))
- constexpr
- HIP_vector_base(HIP_vector_base&&) = default;
- __attribute__((device))
- ~HIP_vector_base() = default;
- __attribute__((device))
- HIP_vector_base& operator=(const HIP_vector_base&) = default;
- __attribute__((device))
- HIP_vector_base& operator=(HIP_vector_base&&) = default;
- };
- template<typename T>
- struct HIP_vector_base<T, 4> {
- using Native_vec_ = T __attribute__((ext_vector_type(4)));
- union
- {
- Native_vec_ data;
- struct {
- T x;
- T y;
- T z;
- T w;
- };
- };
- using value_type = T;
- __attribute__((device))
- HIP_vector_base() = default;
- __attribute__((device))
- explicit
- constexpr
- HIP_vector_base(T x_) noexcept : data{x_, x_, x_, x_} {}
- __attribute__((device))
- constexpr
- HIP_vector_base(T x_, T y_, T z_, T w_) noexcept : data{x_, y_, z_, w_} {}
- __attribute__((device))
- constexpr
- HIP_vector_base(const HIP_vector_base&) = default;
- __attribute__((device))
- constexpr
- HIP_vector_base(HIP_vector_base&&) = default;
- __attribute__((device))
- ~HIP_vector_base() = default;
- __attribute__((device))
- HIP_vector_base& operator=(const HIP_vector_base&) = default;
- };
- template<typename T, unsigned int rank>
- struct HIP_vector_type : public HIP_vector_base<T, rank> {
- using HIP_vector_base<T, rank>::data;
- using typename HIP_vector_base<T, rank>::Native_vec_;
- __attribute__((device))
- HIP_vector_type() = default;
- template<
- typename U,
- typename std::enable_if<
- std::is_convertible<U, T>::value>::type* = nullptr>
- __attribute__((device))
- explicit
- constexpr
- HIP_vector_type(U x_) noexcept
- : HIP_vector_base<T, rank>{static_cast<T>(x_)}
- {}
- template<
- typename... Us,
- typename std::enable_if<
- (rank > 1) && sizeof...(Us) == rank>::type* = nullptr>
- __attribute__((device))
- constexpr
- HIP_vector_type(Us... xs) noexcept
- : HIP_vector_base<T, rank>{static_cast<T>(xs)...}
- {}
- __attribute__((device))
- constexpr
- HIP_vector_type(const HIP_vector_type&) = default;
- __attribute__((device))
- constexpr
- HIP_vector_type(HIP_vector_type&&) = default;
- __attribute__((device))
- ~HIP_vector_type() = default;
- __attribute__((device))
- HIP_vector_type& operator=(const HIP_vector_type&) = default;
- __attribute__((device))
- HIP_vector_type& operator=(HIP_vector_type&&) = default;
- __attribute__((device))
- HIP_vector_type& operator++() noexcept
- {
- return *this += HIP_vector_type{1};
- }
- __attribute__((device))
- HIP_vector_type operator++(int) noexcept
- {
- auto tmp(*this);
- ++*this;
- return tmp;
- }
- __attribute__((device))
- HIP_vector_type& operator--() noexcept
- {
- return *this -= HIP_vector_type{1};
- }
- __attribute__((device))
- HIP_vector_type operator--(int) noexcept
- {
- auto tmp(*this);
- --*this;
- return tmp;
- }
- __attribute__((device))
- HIP_vector_type& operator+=(const HIP_vector_type& x) noexcept
- {
- data += x.data;
- return *this;
- }
- template<
- typename U,
- typename std::enable_if<
- std::is_convertible<U, T>{}>::type* = nullptr>
- __attribute__((device))
- HIP_vector_type& operator+=(U x) noexcept
- {
- return *this += HIP_vector_type{x};
- }
- __attribute__((device))
- HIP_vector_type& operator-=(const HIP_vector_type& x) noexcept
- {
- data -= x.data;
- return *this;
- }
- template<
- typename U,
- typename std::enable_if<
- std::is_convertible<U, T>{}>::type* = nullptr>
- __attribute__((device))
- HIP_vector_type& operator-=(U x) noexcept
- {
- return *this -= HIP_vector_type{x};
- }
- __attribute__((device))
- HIP_vector_type& operator*=(const HIP_vector_type& x) noexcept
- {
- data *= x.data;
- return *this;
- }
- friend __attribute__((device)) inline constexpr HIP_vector_type operator*(
- HIP_vector_type x, const HIP_vector_type& y) noexcept
- {
- return HIP_vector_type{ x } *= y;
- }
- template<
- typename U,
- typename std::enable_if<
- std::is_convertible<U, T>{}>::type* = nullptr>
- __attribute__((device))
- HIP_vector_type& operator*=(U x) noexcept
- {
- return *this *= HIP_vector_type{x};
- }
- friend __attribute__((device)) inline constexpr HIP_vector_type operator/(
- HIP_vector_type x, const HIP_vector_type& y) noexcept
- {
- return HIP_vector_type{ x } /= y;
- }
- __attribute__((device))
- HIP_vector_type& operator/=(const HIP_vector_type& x) noexcept
- {
- data /= x.data;
- return *this;
- }
- template<
- typename U,
- typename std::enable_if<
- std::is_convertible<U, T>{}>::type* = nullptr>
- __attribute__((device))
- HIP_vector_type& operator/=(U x) noexcept
- {
- return *this /= HIP_vector_type{x};
- }
- template<
- typename U = T,
- typename std::enable_if<std::is_signed<U>{}>::type* = nullptr>
- __attribute__((device))
- HIP_vector_type operator-() const noexcept
- {
- auto tmp(*this);
- tmp.data = -tmp.data;
- return tmp;
- }
- template<
- typename U = T,
- typename std::enable_if<std::is_integral<U>{}>::type* = nullptr>
- __attribute__((device))
- HIP_vector_type operator~() const noexcept
- {
- HIP_vector_type r{*this};
- r.data = ~r.data;
- return r;
- }
- template<
- typename U = T,
- typename std::enable_if<std::is_integral<U>{}>::type* = nullptr>
- __attribute__((device))
- HIP_vector_type& operator%=(const HIP_vector_type& x) noexcept
- {
- data %= x.data;
- return *this;
- }
- template<
- typename U = T,
- typename std::enable_if<std::is_integral<U>{}>::type* = nullptr>
- __attribute__((device))
- HIP_vector_type& operator^=(const HIP_vector_type& x) noexcept
- {
- data ^= x.data;
- return *this;
- }
- template<
- typename U = T,
- typename std::enable_if<std::is_integral<U>{}>::type* = nullptr>
- __attribute__((device))
- HIP_vector_type& operator|=(const HIP_vector_type& x) noexcept
- {
- data |= x.data;
- return *this;
- }
- template<
- typename U = T,
- typename std::enable_if<std::is_integral<U>{}>::type* = nullptr>
- __attribute__((device))
- HIP_vector_type& operator&=(const HIP_vector_type& x) noexcept
- {
- data &= x.data;
- return *this;
- }
- template<
- typename U = T,
- typename std::enable_if<std::is_integral<U>{}>::type* = nullptr>
- __attribute__((device))
- HIP_vector_type& operator>>=(const HIP_vector_type& x) noexcept
- {
- data >>= x.data;
- return *this;
- }
- template<
- typename U = T,
- typename std::enable_if<std::is_integral<U>{}>::type* = nullptr>
- __attribute__((device))
- HIP_vector_type& operator<<=(const HIP_vector_type& x) noexcept
- {
- data <<= x.data;
- return *this;
- }
- };
- template<typename T, unsigned int n>
- __attribute__((device))
- inline
- constexpr
- HIP_vector_type<T, n> operator+(
- const HIP_vector_type<T, n>& x, const HIP_vector_type<T, n>& y) noexcept
- {
- return HIP_vector_type<T, n>{x} += y;
- }
- template<typename T, unsigned int n, typename U>
- __attribute__((device))
- inline
- constexpr
- HIP_vector_type<T, n> operator+(
- const HIP_vector_type<T, n>& x, U y) noexcept
- {
- return HIP_vector_type<T, n>{x} += HIP_vector_type<T, n>{y};
- }
- template<typename T, unsigned int n, typename U>
- __attribute__((device))
- inline
- constexpr
- HIP_vector_type<T, n> operator+(
- U x, const HIP_vector_type<T, n>& y) noexcept
- {
- return HIP_vector_type<T, n>{x} += y;
- }
- template<typename T, unsigned int n>
- __attribute__((device))
- inline
- constexpr
- HIP_vector_type<T, n> operator-(
- const HIP_vector_type<T, n>& x, const HIP_vector_type<T, n>& y) noexcept
- {
- return HIP_vector_type<T, n>{x} -= y;
- }
- template<typename T, unsigned int n, typename U>
- __attribute__((device))
- inline
- constexpr
- HIP_vector_type<T, n> operator-(
- const HIP_vector_type<T, n>& x, U y) noexcept
- {
- return HIP_vector_type<T, n>{x} -= HIP_vector_type<T, n>{y};
- }
- template<typename T, unsigned int n, typename U>
- __attribute__((device))
- inline
- constexpr
- HIP_vector_type<T, n> operator-(
- U x, const HIP_vector_type<T, n>& y) noexcept
- {
- return HIP_vector_type<T, n>{x} -= y;
- }
- template<typename T, unsigned int n, typename U>
- __attribute__((device))
- inline
- constexpr
- HIP_vector_type<T, n> operator*(
- const HIP_vector_type<T, n>& x, U y) noexcept
- {
- return HIP_vector_type<T, n>{x} *= HIP_vector_type<T, n>{y};
- }
- template<typename T, unsigned int n, typename U>
- __attribute__((device))
- inline
- constexpr
- HIP_vector_type<T, n> operator*(
- U x, const HIP_vector_type<T, n>& y) noexcept
- {
- return HIP_vector_type<T, n>{x} *= y;
- }
- template<typename T, unsigned int n, typename U>
- __attribute__((device))
- inline
- constexpr
- HIP_vector_type<T, n> operator/(
- const HIP_vector_type<T, n>& x, U y) noexcept
- {
- return HIP_vector_type<T, n>{x} /= HIP_vector_type<T, n>{y};
- }
- template<typename T, unsigned int n, typename U>
- __attribute__((device))
- inline
- constexpr
- HIP_vector_type<T, n> operator/(
- U x, const HIP_vector_type<T, n>& y) noexcept
- {
- return HIP_vector_type<T, n>{x} /= y;
- }
- template<typename V>
- __attribute__((device))
- inline
- constexpr
- bool _hip_any_zero(const V& x, int n) noexcept
- {
- return
- (n == -1) ? true : ((x[n] == 0) ? false : _hip_any_zero(x, n - 1));
- }
- template<typename T, unsigned int n>
- __attribute__((device))
- inline
- constexpr
- bool operator==(
- const HIP_vector_type<T, n>& x, const HIP_vector_type<T, n>& y) noexcept
- {
- return _hip_any_zero(x.data == y.data, n - 1);
- }
- template<typename T, unsigned int n, typename U>
- __attribute__((device))
- inline
- constexpr
- bool operator==(const HIP_vector_type<T, n>& x, U y) noexcept
- {
- return x == HIP_vector_type<T, n>{y};
- }
- template<typename T, unsigned int n, typename U>
- __attribute__((device))
- inline
- constexpr
- bool operator==(U x, const HIP_vector_type<T, n>& y) noexcept
- {
- return HIP_vector_type<T, n>{x} == y;
- }
- template<typename T, unsigned int n>
- __attribute__((device))
- inline
- constexpr
- bool operator!=(
- const HIP_vector_type<T, n>& x, const HIP_vector_type<T, n>& y) noexcept
- {
- return !(x == y);
- }
- template<typename T, unsigned int n, typename U>
- __attribute__((device))
- inline
- constexpr
- bool operator!=(const HIP_vector_type<T, n>& x, U y) noexcept
- {
- return !(x == y);
- }
- template<typename T, unsigned int n, typename U>
- __attribute__((device))
- inline
- constexpr
- bool operator!=(U x, const HIP_vector_type<T, n>& y) noexcept
- {
- return !(x == y);
- }
- template<
- typename T,
- unsigned int n,
- typename std::enable_if<std::is_integral<T>{}>* = nullptr>
- __attribute__((device))
- inline
- constexpr
- HIP_vector_type<T, n> operator%(
- const HIP_vector_type<T, n>& x, const HIP_vector_type<T, n>& y) noexcept
- {
- return HIP_vector_type<T, n>{x} %= y;
- }
- template<
- typename T,
- unsigned int n,
- typename U,
- typename std::enable_if<std::is_integral<T>{}>* = nullptr>
- __attribute__((device))
- inline
- constexpr
- HIP_vector_type<T, n> operator%(
- const HIP_vector_type<T, n>& x, U y) noexcept
- {
- return HIP_vector_type<T, n>{x} %= HIP_vector_type<T, n>{y};
- }
- template<
- typename T,
- unsigned int n,
- typename U,
- typename std::enable_if<std::is_integral<T>{}>* = nullptr>
- __attribute__((device))
- inline
- constexpr
- HIP_vector_type<T, n> operator%(
- U x, const HIP_vector_type<T, n>& y) noexcept
- {
- return HIP_vector_type<T, n>{x} %= y;
- }
- template<
- typename T,
- unsigned int n,
- typename std::enable_if<std::is_integral<T>{}>* = nullptr>
- __attribute__((device))
- inline
- constexpr
- HIP_vector_type<T, n> operator^(
- const HIP_vector_type<T, n>& x, const HIP_vector_type<T, n>& y) noexcept
- {
- return HIP_vector_type<T, n>{x} ^= y;
- }
- template<
- typename T,
- unsigned int n,
- typename U,
- typename std::enable_if<std::is_integral<T>{}>* = nullptr>
- __attribute__((device))
- inline
- constexpr
- HIP_vector_type<T, n> operator^(
- const HIP_vector_type<T, n>& x, U y) noexcept
- {
- return HIP_vector_type<T, n>{x} ^= HIP_vector_type<T, n>{y};
- }
- template<
- typename T,
- unsigned int n,
- typename U,
- typename std::enable_if<std::is_integral<T>{}>* = nullptr>
- __attribute__((device))
- inline
- constexpr
- HIP_vector_type<T, n> operator^(
- U x, const HIP_vector_type<T, n>& y) noexcept
- {
- return HIP_vector_type<T, n>{x} ^= y;
- }
- template<
- typename T,
- unsigned int n,
- typename std::enable_if<std::is_integral<T>{}>* = nullptr>
- __attribute__((device))
- inline
- constexpr
- HIP_vector_type<T, n> operator|(
- const HIP_vector_type<T, n>& x, const HIP_vector_type<T, n>& y) noexcept
- {
- return HIP_vector_type<T, n>{x} |= y;
- }
- template<
- typename T,
- unsigned int n,
- typename U,
- typename std::enable_if<std::is_integral<T>{}>* = nullptr>
- __attribute__((device))
- inline
- constexpr
- HIP_vector_type<T, n> operator|(
- const HIP_vector_type<T, n>& x, U y) noexcept
- {
- return HIP_vector_type<T, n>{x} |= HIP_vector_type<T, n>{y};
- }
- template<
- typename T,
- unsigned int n,
- typename U,
- typename std::enable_if<std::is_integral<T>{}>* = nullptr>
- __attribute__((device))
- inline
- constexpr
- HIP_vector_type<T, n> operator|(
- U x, const HIP_vector_type<T, n>& y) noexcept
- {
- return HIP_vector_type<T, n>{x} |= y;
- }
- template<
- typename T,
- unsigned int n,
- typename std::enable_if<std::is_integral<T>{}>* = nullptr>
- __attribute__((device))
- inline
- constexpr
- HIP_vector_type<T, n> operator&(
- const HIP_vector_type<T, n>& x, const HIP_vector_type<T, n>& y) noexcept
- {
- return HIP_vector_type<T, n>{x} &= y;
- }
- template<
- typename T,
- unsigned int n,
- typename U,
- typename std::enable_if<std::is_integral<T>{}>* = nullptr>
- __attribute__((device))
- inline
- constexpr
- HIP_vector_type<T, n> operator&(
- const HIP_vector_type<T, n>& x, U y) noexcept
- {
- return HIP_vector_type<T, n>{x} &= HIP_vector_type<T, n>{y};
- }
- template<
- typename T,
- unsigned int n,
- typename U,
- typename std::enable_if<std::is_integral<T>{}>* = nullptr>
- __attribute__((device))
- inline
- constexpr
- HIP_vector_type<T, n> operator&(
- U x, const HIP_vector_type<T, n>& y) noexcept
- {
- return HIP_vector_type<T, n>{x} &= y;
- }
- template<
- typename T,
- unsigned int n,
- typename std::enable_if<std::is_integral<T>{}>* = nullptr>
- __attribute__((device))
- inline
- constexpr
- HIP_vector_type<T, n> operator>>(
- const HIP_vector_type<T, n>& x, const HIP_vector_type<T, n>& y) noexcept
- {
- return HIP_vector_type<T, n>{x} >>= y;
- }
- template<
- typename T,
- unsigned int n,
- typename U,
- typename std::enable_if<std::is_integral<T>{}>* = nullptr>
- __attribute__((device))
- inline
- constexpr
- HIP_vector_type<T, n> operator>>(
- const HIP_vector_type<T, n>& x, U y) noexcept
- {
- return HIP_vector_type<T, n>{x} >>= HIP_vector_type<T, n>{y};
- }
- template<
- typename T,
- unsigned int n,
- typename U,
- typename std::enable_if<std::is_integral<T>{}>* = nullptr>
- __attribute__((device))
- inline
- constexpr
- HIP_vector_type<T, n> operator>>(
- U x, const HIP_vector_type<T, n>& y) noexcept
- {
- return HIP_vector_type<T, n>{x} >>= y;
- }
- template<
- typename T,
- unsigned int n,
- typename std::enable_if<std::is_integral<T>{}>* = nullptr>
- __attribute__((device))
- inline
- constexpr
- HIP_vector_type<T, n> operator<<(
- const HIP_vector_type<T, n>& x, const HIP_vector_type<T, n>& y) noexcept
- {
- return HIP_vector_type<T, n>{x} <<= y;
- }
- template<
- typename T,
- unsigned int n,
- typename U,
- typename std::enable_if<std::is_integral<T>{}>* = nullptr>
- __attribute__((device))
- inline
- constexpr
- HIP_vector_type<T, n> operator<<(
- const HIP_vector_type<T, n>& x, U y) noexcept
- {
- return HIP_vector_type<T, n>{x} <<= HIP_vector_type<T, n>{y};
- }
- template<
- typename T,
- unsigned int n,
- typename U,
- typename std::enable_if<std::is_arithmetic<U>::value>::type,
- typename std::enable_if<std::is_integral<T>{}>* = nullptr>
- __attribute__((device))
- inline
- constexpr
- HIP_vector_type<T, n> operator<<(
- U x, const HIP_vector_type<T, n>& y) noexcept
- {
- return HIP_vector_type<T, n>{x} <<= y;
- }
- template <typename T, unsigned int rankT, typename U, unsigned int rankU>
- inline __attribute__((always_inline)) __attribute__((device)) typename std::enable_if<(rankT == 1 && rankU >= 1),
- const HIP_vector_type<T, rankT>>::type
- __hipMapVector(const HIP_vector_type<U, rankU>& u) {
- return HIP_vector_type<T, rankT>(static_cast<T>(u.x));
- };
- template <typename T, unsigned int rankT, typename U, unsigned int rankU>
- inline __attribute__((always_inline)) __attribute__((device)) typename std::enable_if<(rankT == 2 && rankU == 1),
- const HIP_vector_type<T, rankT>>::type
- __hipMapVector(const HIP_vector_type<U, rankU>& u) {
- return HIP_vector_type<T, rankT> (static_cast<T>(u.x), static_cast<T>(0));
- };
- template <typename T, unsigned int rankT, typename U, unsigned int rankU>
- inline __attribute__((always_inline)) __attribute__((device)) typename std::enable_if<(rankT == 2 && rankU >= 2),
- const HIP_vector_type<T, rankT>>::type
- __hipMapVector(const HIP_vector_type<U, rankU>& u) {
- return HIP_vector_type<T, rankT> (static_cast<T>(u.x), static_cast<T>(u.y));
- };
- template <typename T, unsigned int rankT, typename U, unsigned int rankU>
- inline __attribute__((always_inline)) __attribute__((device)) typename std::enable_if<(rankT == 4 && rankU == 1),
- const HIP_vector_type<T, rankT>>::type
- __hipMapVector(const HIP_vector_type<U, rankU>& u) {
- return HIP_vector_type<T, rankT> (static_cast<T>(u.x), static_cast<T>(0),
- static_cast<T>(0), static_cast<T>(0));
- };
- template <typename T, unsigned int rankT, typename U, unsigned int rankU>
- inline __attribute__((always_inline)) __attribute__((device)) typename std::enable_if<(rankT == 4 && rankU == 2),
- const HIP_vector_type<T, rankT>>::type
- __hipMapVector(const HIP_vector_type<U, rankU>& u) {
- return HIP_vector_type<T, rankT>(static_cast<T>(u.x), static_cast<T>(u.y),
- static_cast<T>(0), static_cast<T>(0));
- };
- template <typename T, unsigned int rankT, typename U, unsigned int rankU>
- inline __attribute__((always_inline)) __attribute__((device)) typename std::enable_if<(rankT == 4 && rankU == 4),
- const HIP_vector_type<T, rankT>>::type
- __hipMapVector(const HIP_vector_type<U, rankU>& u) {
- return HIP_vector_type<T, rankT> (static_cast<T>(u.x), static_cast<T>(u.y),
- static_cast<T>(u.z), static_cast<T>(u.w));
- };
- # 1135 "/long_pathname_so_that_rpms_can_package_the_debug_info/src/external/clr/hipamd/include/hip/amd_detail/amd_hip_vector_types.h" 3
- using uchar1 = HIP_vector_type<unsigned char, 1>; using uchar2 = HIP_vector_type<unsigned char, 2>; using uchar3 = HIP_vector_type<unsigned char, 3>; using uchar4 = HIP_vector_type<unsigned char, 4>;;
- using char1 = HIP_vector_type<char, 1>; using char2 = HIP_vector_type<char, 2>; using char3 = HIP_vector_type<char, 3>; using char4 = HIP_vector_type<char, 4>;;
- using ushort1 = HIP_vector_type<unsigned short, 1>; using ushort2 = HIP_vector_type<unsigned short, 2>; using ushort3 = HIP_vector_type<unsigned short, 3>; using ushort4 = HIP_vector_type<unsigned short, 4>;;
- using short1 = HIP_vector_type<short, 1>; using short2 = HIP_vector_type<short, 2>; using short3 = HIP_vector_type<short, 3>; using short4 = HIP_vector_type<short, 4>;;
- using uint1 = HIP_vector_type<unsigned int, 1>; using uint2 = HIP_vector_type<unsigned int, 2>; using uint3 = HIP_vector_type<unsigned int, 3>; using uint4 = HIP_vector_type<unsigned int, 4>;;
- using int1 = HIP_vector_type<int, 1>; using int2 = HIP_vector_type<int, 2>; using int3 = HIP_vector_type<int, 3>; using int4 = HIP_vector_type<int, 4>;;
- using ulong1 = HIP_vector_type<unsigned long, 1>; using ulong2 = HIP_vector_type<unsigned long, 2>; using ulong3 = HIP_vector_type<unsigned long, 3>; using ulong4 = HIP_vector_type<unsigned long, 4>;;
- using long1 = HIP_vector_type<long, 1>; using long2 = HIP_vector_type<long, 2>; using long3 = HIP_vector_type<long, 3>; using long4 = HIP_vector_type<long, 4>;;
- using ulonglong1 = HIP_vector_type<unsigned long long, 1>; using ulonglong2 = HIP_vector_type<unsigned long long, 2>; using ulonglong3 = HIP_vector_type<unsigned long long, 3>; using ulonglong4 = HIP_vector_type<unsigned long long, 4>;;
- using longlong1 = HIP_vector_type<long long, 1>; using longlong2 = HIP_vector_type<long long, 2>; using longlong3 = HIP_vector_type<long long, 3>; using longlong4 = HIP_vector_type<long long, 4>;;
- using float1 = HIP_vector_type<float, 1>; using float2 = HIP_vector_type<float, 2>; using float3 = HIP_vector_type<float, 3>; using float4 = HIP_vector_type<float, 4>;;
- using double1 = HIP_vector_type<double, 1>; using double2 = HIP_vector_type<double, 2>; using double3 = HIP_vector_type<double, 3>; using double4 = HIP_vector_type<double, 4>;;
- # 2117 "/long_pathname_so_that_rpms_can_package_the_debug_info/src/external/clr/hipamd/include/hip/amd_detail/amd_hip_vector_types.h" 3
- static inline __attribute__((device)) uchar1 make_uchar1(unsigned char x) { uchar1 r{x}; return r; };
- static inline __attribute__((device)) uchar2 make_uchar2(unsigned char x, unsigned char y) { uchar2 r{x, y}; return r; };
- static inline __attribute__((device)) uchar3 make_uchar3(unsigned char x, unsigned char y, unsigned char z) { uchar3 r{x, y, z}; return r; };
- static inline __attribute__((device)) uchar4 make_uchar4(unsigned char x, unsigned char y, unsigned char z, unsigned char w) { uchar4 r{x, y, z, w}; return r; };
- static inline __attribute__((device)) char1 make_char1(signed char x) { char1 r{x}; return r; };
- static inline __attribute__((device)) char2 make_char2(signed char x, signed char y) { char2 r{x, y}; return r; };
- static inline __attribute__((device)) char3 make_char3(signed char x, signed char y, signed char z) { char3 r{x, y, z}; return r; };
- static inline __attribute__((device)) char4 make_char4(signed char x, signed char y, signed char z, signed char w) { char4 r{x, y, z, w}; return r; };
- static inline __attribute__((device)) ushort1 make_ushort1(unsigned short x) { ushort1 r{x}; return r; };
- static inline __attribute__((device)) ushort2 make_ushort2(unsigned short x, unsigned short y) { ushort2 r{x, y}; return r; };
- static inline __attribute__((device)) ushort3 make_ushort3(unsigned short x, unsigned short y, unsigned short z) { ushort3 r{x, y, z}; return r; };
- static inline __attribute__((device)) ushort4 make_ushort4(unsigned short x, unsigned short y, unsigned short z, unsigned short w) { ushort4 r{x, y, z, w}; return r; };
- static inline __attribute__((device)) short1 make_short1(signed short x) { short1 r{x}; return r; };
- static inline __attribute__((device)) short2 make_short2(signed short x, signed short y) { short2 r{x, y}; return r; };
- static inline __attribute__((device)) short3 make_short3(signed short x, signed short y, signed short z) { short3 r{x, y, z}; return r; };
- static inline __attribute__((device)) short4 make_short4(signed short x, signed short y, signed short z, signed short w) { short4 r{x, y, z, w}; return r; };
- static inline __attribute__((device)) uint1 make_uint1(unsigned int x) { uint1 r{x}; return r; };
- static inline __attribute__((device)) uint2 make_uint2(unsigned int x, unsigned int y) { uint2 r{x, y}; return r; };
- static inline __attribute__((device)) uint3 make_uint3(unsigned int x, unsigned int y, unsigned int z) { uint3 r{x, y, z}; return r; };
- static inline __attribute__((device)) uint4 make_uint4(unsigned int x, unsigned int y, unsigned int z, unsigned int w) { uint4 r{x, y, z, w}; return r; };
- static inline __attribute__((device)) int1 make_int1(signed int x) { int1 r{x}; return r; };
- static inline __attribute__((device)) int2 make_int2(signed int x, signed int y) { int2 r{x, y}; return r; };
- static inline __attribute__((device)) int3 make_int3(signed int x, signed int y, signed int z) { int3 r{x, y, z}; return r; };
- static inline __attribute__((device)) int4 make_int4(signed int x, signed int y, signed int z, signed int w) { int4 r{x, y, z, w}; return r; };
- static inline __attribute__((device)) float1 make_float1(float x) { float1 r{x}; return r; };
- static inline __attribute__((device)) float2 make_float2(float x, float y) { float2 r{x, y}; return r; };
- static inline __attribute__((device)) float3 make_float3(float x, float y, float z) { float3 r{x, y, z}; return r; };
- static inline __attribute__((device)) float4 make_float4(float x, float y, float z, float w) { float4 r{x, y, z, w}; return r; };
- static inline __attribute__((device)) double1 make_double1(double x) { double1 r{x}; return r; };
- static inline __attribute__((device)) double2 make_double2(double x, double y) { double2 r{x, y}; return r; };
- static inline __attribute__((device)) double3 make_double3(double x, double y, double z) { double3 r{x, y, z}; return r; };
- static inline __attribute__((device)) double4 make_double4(double x, double y, double z, double w) { double4 r{x, y, z, w}; return r; };
- static inline __attribute__((device)) ulong1 make_ulong1(unsigned long x) { ulong1 r{x}; return r; };
- static inline __attribute__((device)) ulong2 make_ulong2(unsigned long x, unsigned long y) { ulong2 r{x, y}; return r; };
- static inline __attribute__((device)) ulong3 make_ulong3(unsigned long x, unsigned long y, unsigned long z) { ulong3 r{x, y, z}; return r; };
- static inline __attribute__((device)) ulong4 make_ulong4(unsigned long x, unsigned long y, unsigned long z, unsigned long w) { ulong4 r{x, y, z, w}; return r; };
- static inline __attribute__((device)) long1 make_long1(signed long x) { long1 r{x}; return r; };
- static inline __attribute__((device)) long2 make_long2(signed long x, signed long y) { long2 r{x, y}; return r; };
- static inline __attribute__((device)) long3 make_long3(signed long x, signed long y, signed long z) { long3 r{x, y, z}; return r; };
- static inline __attribute__((device)) long4 make_long4(signed long x, signed long y, signed long z, signed long w) { long4 r{x, y, z, w}; return r; };
- static inline __attribute__((device)) ulonglong1 make_ulonglong1(unsigned long long x) { ulonglong1 r{x}; return r; };
- static inline __attribute__((device)) ulonglong2 make_ulonglong2(unsigned long long x, unsigned long long y) { ulonglong2 r{x, y}; return r; };
- static inline __attribute__((device)) ulonglong3 make_ulonglong3(unsigned long long x, unsigned long long y, unsigned long long z) { ulonglong3 r{x, y, z}; return r; };
- static inline __attribute__((device)) ulonglong4 make_ulonglong4(unsigned long long x, unsigned long long y, unsigned long long z, unsigned long long w) { ulonglong4 r{x, y, z, w}; return r; };
- static inline __attribute__((device)) longlong1 make_longlong1(signed long long x) { longlong1 r{x}; return r; };
- static inline __attribute__((device)) longlong2 make_longlong2(signed long long x, signed long long y) { longlong2 r{x, y}; return r; };
- static inline __attribute__((device)) longlong3 make_longlong3(signed long long x, signed long long y, signed long long z) { longlong3 r{x, y, z}; return r; };
- static inline __attribute__((device)) longlong4 make_longlong4(signed long long x, signed long long y, signed long long z, signed long long w) { longlong4 r{x, y, z, w}; return r; };
- # 28 "/long_pathname_so_that_rpms_can_package_the_debug_info/src/external/clr/hipamd/include/hip/amd_detail/hip_ldg.h" 2 3
- __attribute__((device)) inline static char __ldg(const char* ptr) { return *ptr; }
- __attribute__((device)) inline static char2 __ldg(const char2* ptr) { return *ptr; }
- __attribute__((device)) inline static char4 __ldg(const char4* ptr) { return *ptr; }
- __attribute__((device)) inline static signed char __ldg(const signed char* ptr) { return ptr[0]; }
- __attribute__((device)) inline static unsigned char __ldg(const unsigned char* ptr) { return ptr[0]; }
- __attribute__((device)) inline static short __ldg(const short* ptr) { return ptr[0]; }
- __attribute__((device)) inline static short2 __ldg(const short2* ptr) { return ptr[0]; }
- __attribute__((device)) inline static short4 __ldg(const short4* ptr) { return ptr[0]; }
- __attribute__((device)) inline static unsigned short __ldg(const unsigned short* ptr) { return ptr[0]; }
- __attribute__((device)) inline static int __ldg(const int* ptr) { return ptr[0]; }
- __attribute__((device)) inline static int2 __ldg(const int2* ptr) { return ptr[0]; }
- __attribute__((device)) inline static int4 __ldg(const int4* ptr) { return ptr[0]; }
- __attribute__((device)) inline static unsigned int __ldg(const unsigned int* ptr) { return ptr[0]; }
- __attribute__((device)) inline static long __ldg(const long* ptr) { return ptr[0]; }
- __attribute__((device)) inline static unsigned long __ldg(const unsigned long* ptr) { return ptr[0]; }
- __attribute__((device)) inline static long long __ldg(const long long* ptr) { return ptr[0]; }
- __attribute__((device)) inline static longlong2 __ldg(const longlong2* ptr) { return ptr[0]; }
- __attribute__((device)) inline static unsigned long long __ldg(const unsigned long long* ptr) { return ptr[0]; }
- __attribute__((device)) inline static uchar2 __ldg(const uchar2* ptr) { return ptr[0]; }
- __attribute__((device)) inline static uchar4 __ldg(const uchar4* ptr) { return ptr[0]; }
- __attribute__((device)) inline static ushort2 __ldg(const ushort2* ptr) { return ptr[0]; }
- __attribute__((device)) inline static uint2 __ldg(const uint2* ptr) { return ptr[0]; }
- __attribute__((device)) inline static uint4 __ldg(const uint4* ptr) { return ptr[0]; }
- __attribute__((device)) inline static ulonglong2 __ldg(const ulonglong2* ptr) { return ptr[0]; }
- __attribute__((device)) inline static float __ldg(const float* ptr) { return ptr[0]; }
- __attribute__((device)) inline static float2 __ldg(const float2* ptr) { return ptr[0]; }
- __attribute__((device)) inline static float4 __ldg(const float4* ptr) { return ptr[0]; }
- __attribute__((device)) inline static double __ldg(const double* ptr) { return ptr[0]; }
- __attribute__((device)) inline static double2 __ldg(const double2* ptr) { return ptr[0]; }
- # 125 "/long_pathname_so_that_rpms_can_package_the_debug_info/src/external/clr/hipamd/include/hip/amd_detail/amd_hip_runtime.h" 2 3
- # 250 "/long_pathname_so_that_rpms_can_package_the_debug_info/src/external/clr/hipamd/include/hip/amd_detail/amd_hip_runtime.h" 3
- extern "C" __attribute__((device)) __attribute__((const)) size_t __ockl_get_local_id(unsigned int);
- extern "C" __attribute__((device)) __attribute__((const)) size_t __ockl_get_group_id(unsigned int);
- extern "C" __attribute__((device)) __attribute__((const)) size_t __ockl_get_local_size(unsigned int);
- extern "C" __attribute__((device)) __attribute__((const)) size_t __ockl_get_num_groups(unsigned int);
- struct __HIP_BlockIdx {
- __attribute__((device))
- std::uint32_t operator()(std::uint32_t x) const noexcept { return __ockl_get_group_id(x); }
- };
- struct __HIP_BlockDim {
- __attribute__((device))
- std::uint32_t operator()(std::uint32_t x) const noexcept {
- return __ockl_get_local_size(x);
- }
- };
- struct __HIP_GridDim {
- __attribute__((device))
- std::uint32_t operator()(std::uint32_t x) const noexcept {
- return __ockl_get_num_groups(x);
- }
- };
- struct __HIP_ThreadIdx {
- __attribute__((device))
- std::uint32_t operator()(std::uint32_t x) const noexcept {
- return __ockl_get_local_id(x);
- }
- };
- typedef struct dim3 {
- uint32_t x;
- uint32_t y;
- uint32_t z;
- constexpr __attribute__((device)) dim3(uint32_t _x = 1, uint32_t _y = 1, uint32_t _z = 1) : x(_x), y(_y), z(_z){};
- } dim3;
- extern "C" __attribute__((device)) __attribute__((const)) size_t __ockl_get_global_size(unsigned int);
- template <typename F> struct __HIP_Coordinates {
- using R = decltype(F{}(0));
- struct __X {
- __attribute__((device)) operator R() const noexcept { return F{}(0); }
- __attribute__((device)) R operator+=(const R& rhs) { return F{}(0) + rhs; }
- };
- struct __Y {
- __attribute__((device)) operator R() const noexcept { return F{}(1); }
- __attribute__((device)) R operator+=(const R& rhs) { return F{}(1) + rhs; }
- };
- struct __Z {
- __attribute__((device)) operator R() const noexcept { return F{}(2); }
- __attribute__((device)) R operator+=(const R& rhs) { return F{}(2) + rhs; }
- };
- __attribute__((weak))
- __attribute__((device)) static constexpr __X x{};
- __attribute__((weak))
- __attribute__((device)) static constexpr __Y y{};
- __attribute__((weak))
- __attribute__((device)) static constexpr __Z z{};
- __attribute__((device)) operator dim3() const { return dim3(x, y, z); }
- };
- template <typename F>
- constexpr typename __HIP_Coordinates<F>::__X __HIP_Coordinates<F>::x;
- template <typename F>
- constexpr typename __HIP_Coordinates<F>::__Y __HIP_Coordinates<F>::y;
- template <typename F>
- constexpr typename __HIP_Coordinates<F>::__Z __HIP_Coordinates<F>::z;
- inline
- __attribute__((device))
- std::uint32_t operator*(__HIP_Coordinates<__HIP_GridDim>::__X,
- __HIP_Coordinates<__HIP_BlockDim>::__X) noexcept {
- return __ockl_get_global_size(0);
- }
- inline
- __attribute__((device))
- std::uint32_t operator*(__HIP_Coordinates<__HIP_BlockDim>::__X,
- __HIP_Coordinates<__HIP_GridDim>::__X) noexcept {
- return __ockl_get_global_size(0);
- }
- inline
- __attribute__((device))
- std::uint32_t operator*(__HIP_Coordinates<__HIP_GridDim>::__Y,
- __HIP_Coordinates<__HIP_BlockDim>::__Y) noexcept {
- return __ockl_get_global_size(1);
- }
- inline
- __attribute__((device))
- std::uint32_t operator*(__HIP_Coordinates<__HIP_BlockDim>::__Y,
- __HIP_Coordinates<__HIP_GridDim>::__Y) noexcept {
- return __ockl_get_global_size(1);
- }
- inline
- __attribute__((device))
- std::uint32_t operator*(__HIP_Coordinates<__HIP_GridDim>::__Z,
- __HIP_Coordinates<__HIP_BlockDim>::__Z) noexcept {
- return __ockl_get_global_size(2);
- }
- inline
- __attribute__((device))
- std::uint32_t operator*(__HIP_Coordinates<__HIP_BlockDim>::__Z,
- __HIP_Coordinates<__HIP_GridDim>::__Z) noexcept {
- return __ockl_get_global_size(2);
- }
- static constexpr __HIP_Coordinates<__HIP_BlockDim> blockDim{};
- static constexpr __HIP_Coordinates<__HIP_BlockIdx> blockIdx{};
- static constexpr __HIP_Coordinates<__HIP_GridDim> gridDim{};
- static constexpr __HIP_Coordinates<__HIP_ThreadIdx> threadIdx{};
- extern "C" __attribute__((device)) __attribute__((const)) size_t __ockl_get_local_id(unsigned int);
- extern "C" __attribute__((device)) __attribute__((const)) size_t __ockl_get_group_id(unsigned int);
- extern "C" __attribute__((device)) __attribute__((const)) size_t __ockl_get_local_size(unsigned int);
- extern "C" __attribute__((device)) __attribute__((const)) size_t __ockl_get_num_groups(unsigned int);
- # 63 "/long_pathname_so_that_rpms_can_package_the_debug_info/src/external/hip-on-vdi/include/hip/hip_runtime.h" 2 3
- # 73 "/long_pathname_so_that_rpms_can_package_the_debug_info/src/external/hip-on-vdi/include/hip/hip_runtime.h" 3
- # 1 "/long_pathname_so_that_rpms_can_package_the_debug_info/src/external/hip-on-vdi/include/hip/hip_vector_types.h" 1 3
- # 74 "/long_pathname_so_that_rpms_can_package_the_debug_info/src/external/hip-on-vdi/include/hip/hip_runtime.h" 2 3
- # 6 "/long_pathname_so_that_rpms_can_package_the_debug_info/src/out/ubuntu-22.04/22.04/build/hip-on-rocclr/hipamd/src/hiprtc/hip_rtc_gen/hipRTC_header.h" 2
- # 1 "/long_pathname_so_that_rpms_can_package_the_debug_info/src/external/hip-on-vdi/include/hip/hip_bfloat16.h" 1 3
- # 37 "/long_pathname_so_that_rpms_can_package_the_debug_info/src/external/hip-on-vdi/include/hip/hip_bfloat16.h" 3
- # 1 "/long_pathname_so_that_rpms_can_package_the_debug_info/src/external/clr/hipamd/include/hip/amd_detail/amd_hip_bfloat16.h" 1 3
- # 55 "/long_pathname_so_that_rpms_can_package_the_debug_info/src/external/clr/hipamd/include/hip/amd_detail/amd_hip_bfloat16.h" 3
- #pragma clang diagnostic push
- #pragma clang diagnostic ignored "-Wshadow"
- struct hip_bfloat16
- {
- __hip_uint16_t data;
- enum truncate_t
- {
- truncate
- };
- __attribute__((device)) hip_bfloat16() = default;
- explicit __attribute__((device)) hip_bfloat16(float f)
- : data(float_to_bfloat16(f))
- {
- }
- explicit __attribute__((device)) hip_bfloat16(float f, truncate_t)
- : data(truncate_float_to_bfloat16(f))
- {
- }
- __attribute__((device)) operator float() const
- {
- union
- {
- uint32_t int32;
- float fp32;
- } u = {uint32_t(data) << 16};
- return u.fp32;
- }
- __attribute__((device)) hip_bfloat16 &operator=(const float& f)
- {
- data = float_to_bfloat16(f);
- return *this;
- }
- static __attribute__((device)) hip_bfloat16 round_to_bfloat16(float f)
- {
- hip_bfloat16 output;
- output.data = float_to_bfloat16(f);
- return output;
- }
- static __attribute__((device)) hip_bfloat16 round_to_bfloat16(float f, truncate_t)
- {
- hip_bfloat16 output;
- output.data = truncate_float_to_bfloat16(f);
- return output;
- }
- private:
- static __attribute__((device)) __hip_uint16_t float_to_bfloat16(float f)
- {
- union
- {
- float fp32;
- uint32_t int32;
- } u = {f};
- if(~u.int32 & 0x7f800000)
- {
- # 136 "/long_pathname_so_that_rpms_can_package_the_debug_info/src/external/clr/hipamd/include/hip/amd_detail/amd_hip_bfloat16.h" 3
- u.int32 += 0x7fff + ((u.int32 >> 16) & 1);
- }
- else if(u.int32 & 0xffff)
- {
- # 148 "/long_pathname_so_that_rpms_can_package_the_debug_info/src/external/clr/hipamd/include/hip/amd_detail/amd_hip_bfloat16.h" 3
- u.int32 |= 0x10000;
- }
- return __hip_uint16_t(u.int32 >> 16);
- }
- static __attribute__((device)) __hip_uint16_t truncate_float_to_bfloat16(float f)
- {
- union
- {
- float fp32;
- uint32_t int32;
- } u = {f};
- return __hip_uint16_t(u.int32 >> 16) | (!(~u.int32 & 0x7f800000) && (u.int32 & 0xffff));
- }
- };
- #pragma clang diagnostic pop
- typedef struct
- {
- __hip_uint16_t data;
- } hip_bfloat16_public;
- static_assert(__hip_internal::is_standard_layout<hip_bfloat16>{},
- "hip_bfloat16 is not a standard layout type, and thus is "
- "incompatible with C.");
- static_assert(__hip_internal::is_trivial<hip_bfloat16>{},
- "hip_bfloat16 is not a trivial type, and thus is "
- "incompatible with C.");
- # 189 "/long_pathname_so_that_rpms_can_package_the_debug_info/src/external/clr/hipamd/include/hip/amd_detail/amd_hip_bfloat16.h" 3
- inline __attribute__((device)) hip_bfloat16 operator+(hip_bfloat16 a)
- {
- return a;
- }
- inline __attribute__((device)) hip_bfloat16 operator-(hip_bfloat16 a)
- {
- a.data ^= 0x8000;
- return a;
- }
- inline __attribute__((device)) hip_bfloat16 operator+(hip_bfloat16 a, hip_bfloat16 b)
- {
- return hip_bfloat16(float(a) + float(b));
- }
- inline __attribute__((device)) hip_bfloat16 operator-(hip_bfloat16 a, hip_bfloat16 b)
- {
- return hip_bfloat16(float(a) - float(b));
- }
- inline __attribute__((device)) hip_bfloat16 operator*(hip_bfloat16 a, hip_bfloat16 b)
- {
- return hip_bfloat16(float(a) * float(b));
- }
- inline __attribute__((device)) hip_bfloat16 operator/(hip_bfloat16 a, hip_bfloat16 b)
- {
- return hip_bfloat16(float(a) / float(b));
- }
- inline __attribute__((device)) bool operator<(hip_bfloat16 a, hip_bfloat16 b)
- {
- return float(a) < float(b);
- }
- inline __attribute__((device)) bool operator==(hip_bfloat16 a, hip_bfloat16 b)
- {
- return float(a) == float(b);
- }
- inline __attribute__((device)) bool operator>(hip_bfloat16 a, hip_bfloat16 b)
- {
- return b < a;
- }
- inline __attribute__((device)) bool operator<=(hip_bfloat16 a, hip_bfloat16 b)
- {
- return !(a > b);
- }
- inline __attribute__((device)) bool operator!=(hip_bfloat16 a, hip_bfloat16 b)
- {
- return !(a == b);
- }
- inline __attribute__((device)) bool operator>=(hip_bfloat16 a, hip_bfloat16 b)
- {
- return !(a < b);
- }
- inline __attribute__((device)) hip_bfloat16& operator+=(hip_bfloat16& a, hip_bfloat16 b)
- {
- return a = a + b;
- }
- inline __attribute__((device)) hip_bfloat16& operator-=(hip_bfloat16& a, hip_bfloat16 b)
- {
- return a = a - b;
- }
- inline __attribute__((device)) hip_bfloat16& operator*=(hip_bfloat16& a, hip_bfloat16 b)
- {
- return a = a * b;
- }
- inline __attribute__((device)) hip_bfloat16& operator/=(hip_bfloat16& a, hip_bfloat16 b)
- {
- return a = a / b;
- }
- inline __attribute__((device)) hip_bfloat16& operator++(hip_bfloat16& a)
- {
- return a += hip_bfloat16(1.0f);
- }
- inline __attribute__((device)) hip_bfloat16& operator--(hip_bfloat16& a)
- {
- return a -= hip_bfloat16(1.0f);
- }
- inline __attribute__((device)) hip_bfloat16 operator++(hip_bfloat16& a, int)
- {
- hip_bfloat16 orig = a;
- ++a;
- return orig;
- }
- inline __attribute__((device)) hip_bfloat16 operator--(hip_bfloat16& a, int)
- {
- hip_bfloat16 orig = a;
- --a;
- return orig;
- }
- namespace std
- {
- constexpr __attribute__((device)) bool isinf(hip_bfloat16 a)
- {
- return !(~a.data & 0x7f80) && !(a.data & 0x7f);
- }
- constexpr __attribute__((device)) bool isnan(hip_bfloat16 a)
- {
- return !(~a.data & 0x7f80) && +(a.data & 0x7f);
- }
- constexpr __attribute__((device)) bool iszero(hip_bfloat16 a)
- {
- return !(a.data & 0x7fff);
- }
- }
- # 38 "/long_pathname_so_that_rpms_can_package_the_debug_info/src/external/hip-on-vdi/include/hip/hip_bfloat16.h" 2 3
- # 7 "/long_pathname_so_that_rpms_can_package_the_debug_info/src/out/ubuntu-22.04/22.04/build/hip-on-rocclr/hipamd/src/hiprtc/hip_rtc_gen/hipRTC_header.h" 2
- #pragma clang diagnostic push
- #pragma clang diagnostic ignored "-Wreserved-id-macro"
- #pragma clang diagnostic ignored "-Wc++98-compat-pedantic"
- #pragma clang diagnostic ignored "-Wreserved-macro-identifier"
- #pragma clang diagnostic ignored "-Wundef"
- #define __device__ __attribute__((device))
- #define __host__ __attribute__((host))
- #define __global__ __attribute__((global))
- #define __constant__ __attribute__((constant))
- #define __shared__ __attribute__((shared))
- #define __align__(x) __attribute__((aligned(x)))
- #if !defined(__has_feature) || !__has_feature(cuda_noinline_keyword)
- #define __noinline__ __attribute__((noinline))
- #endif
- #define __forceinline__ inline __attribute__((always_inline))
- #if __HIP_NO_IMAGE_SUPPORT
- #define __hip_img_chk__ __attribute__((unavailable("The image/texture API not supported on the device")))
- #else
- #define __hip_img_chk__
- #endif
- #define launch_bounds_impl0(requiredMaxThreadsPerBlock) \
- __attribute__((amdgpu_flat_work_group_size(1, requiredMaxThreadsPerBlock)))
- #define launch_bounds_impl1(requiredMaxThreadsPerBlock, minBlocksPerMultiprocessor) \
- __attribute__((amdgpu_flat_work_group_size(1, requiredMaxThreadsPerBlock), \
- amdgpu_waves_per_eu(minBlocksPerMultiprocessor)))
- #define select_impl_(_1, _2, impl_, ...) impl_
- #define __launch_bounds__(...) \
- select_impl_(__VA_ARGS__, launch_bounds_impl1, launch_bounds_impl0)(__VA_ARGS__)
- #define HIP_INCLUDE_HIP_HIP_RUNTIME_H
- #define _HIP_BFLOAT16_H_
- #define HIP_INCLUDE_HIP_MATH_FUNCTIONS_H
- #define HIP_INCLUDE_HIP_HIP_VECTOR_TYPES_H
- #if !__HIP_NO_STD_DEFS__
- #if defined(__HIPRTC_PTRDIFF_T_IS_LONG_LONG__) && __HIPRTC_PTRDIFF_T_IS_LONG_LONG__==1
- typedef long long ptrdiff_t;
- #else
- typedef __PTRDIFF_TYPE__ ptrdiff_t;
- #endif
- typedef long clock_t;
- namespace std {
- using ::ptrdiff_t;
- using ::clock_t;
- }
- #endif // __HIP_NO_STD_DEFS__
- #pragma clang diagnostic pop/*
- Copyright (c) 2015 - 2023 Advanced Micro Devices, Inc. All rights reserved.
- Permission is hereby granted, free of charge, to any person obtaining a copy
- of this software and associated documentation files (the "Software"), to deal
- in the Software without restriction, including without limitation the rights
- to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
- copies of the Software, and to permit persons to whom the Software is
- furnished to do so, subject to the following conditions:
- The above copyright notice and this permission notice shall be included in
- all copies or substantial portions of the Software.
- THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
- AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
- OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
- THE SOFTWARE.
- */
- #ifndef HIP_INCLUDE_HIP_HIP_COMMON_H
- #define HIP_INCLUDE_HIP_HIP_COMMON_H
- #if defined(__clang__)
- #pragma clang diagnostic push
- #pragma clang diagnostic ignored "-Wreserved-macro-identifier"
- #endif
- // Common code included at start of every hip file.
- // Auto enable __HIP_PLATFORM_AMD__ if compiling on AMD platform
- // Other compiler (GCC,ICC,etc) need to set one of these macros explicitly
- #if defined(__clang__) && defined(__HIP__)
- #ifndef __HIP_PLATFORM_AMD__
- #define __HIP_PLATFORM_AMD__
- #endif
- #endif // defined(__clang__) && defined(__HIP__)
- // Auto enable __HIP_PLATFORM_NVIDIA__ if compiling with NVIDIA platform
- #if defined(__NVCC__) || (defined(__clang__) && defined(__CUDA__) && !defined(__HIP__))
- #ifndef __HIP_PLATFORM_NVIDIA__
- #define __HIP_PLATFORM_NVIDIA__
- #endif
- #ifdef __CUDACC__
- #define __HIPCC__
- #endif
- #endif //__NVCC__
- // Auto enable __HIP_DEVICE_COMPILE__ if compiled in HCC or NVCC device path
- #if (defined(__HCC_ACCELERATOR__) && __HCC_ACCELERATOR__ != 0) || \
- (defined(__CUDA_ARCH__) && __CUDA_ARCH__ != 0)
- #define __HIP_DEVICE_COMPILE__ 1
- #endif
- #ifdef __GNUC__
- #define HIP_PUBLIC_API __attribute__ ((visibility ("default")))
- #define HIP_INTERNAL_EXPORTED_API __attribute__ ((visibility ("default")))
- #else
- #define HIP_PUBLIC_API
- #define HIP_INTERNAL_EXPORTED_API
- #endif
- #if __HIP_DEVICE_COMPILE__ == 0
- // 32-bit Atomics
- #define __HIP_ARCH_HAS_GLOBAL_INT32_ATOMICS__ (0)
- #define __HIP_ARCH_HAS_GLOBAL_FLOAT_ATOMIC_EXCH__ (0)
- #define __HIP_ARCH_HAS_SHARED_INT32_ATOMICS__ (0)
- #define __HIP_ARCH_HAS_SHARED_FLOAT_ATOMIC_EXCH__ (0)
- #define __HIP_ARCH_HAS_FLOAT_ATOMIC_ADD__ (0)
- // 64-bit Atomics
- #define __HIP_ARCH_HAS_GLOBAL_INT64_ATOMICS__ (0)
- #define __HIP_ARCH_HAS_SHARED_INT64_ATOMICS__ (0)
- // Doubles
- #define __HIP_ARCH_HAS_DOUBLES__ (0)
- // Warp cross-lane operations
- #define __HIP_ARCH_HAS_WARP_VOTE__ (0)
- #define __HIP_ARCH_HAS_WARP_BALLOT__ (0)
- #define __HIP_ARCH_HAS_WARP_SHUFFLE__ (0)
- #define __HIP_ARCH_HAS_WARP_FUNNEL_SHIFT__ (0)
- // Sync
- #define __HIP_ARCH_HAS_THREAD_FENCE_SYSTEM__ (0)
- #define __HIP_ARCH_HAS_SYNC_THREAD_EXT__ (0)
- // Misc
- #define __HIP_ARCH_HAS_SURFACE_FUNCS__ (0)
- #define __HIP_ARCH_HAS_3DGRID__ (0)
- #define __HIP_ARCH_HAS_DYNAMIC_PARALLEL__ (0)
- #endif
- #if defined(__clang__)
- #pragma clang diagnostic pop
- #endif
- #endif
- /*
- Copyright (c) 2015 - 2023 Advanced Micro Devices, Inc. All rights reserved.
- Permission is hereby granted, free of charge, to any person obtaining a copy
- of this software and associated documentation files (the "Software"), to deal
- in the Software without restriction, including without limitation the rights
- to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
- copies of the Software, and to permit persons to whom the Software is
- furnished to do so, subject to the following conditions:
- The above copyright notice and this permission notice shall be included in
- all copies or substantial portions of the Software.
- THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
- AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
- OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
- THE SOFTWARE.
- */
- #ifndef HIP_INCLUDE_HIP_LIBRARY_TYPES_H
- #define HIP_INCLUDE_HIP_LIBRARY_TYPES_H
- #if !defined(__HIPCC_RTC__)
- #include <hip/hip_common.h>
- #endif
- #if defined(__HIP_PLATFORM_AMD__) && !defined(__HIP_PLATFORM_NVIDIA__)
- typedef enum hipDataType {
- HIP_R_32F = 0,
- HIP_R_64F = 1,
- HIP_R_16F = 2,
- HIP_R_8I = 3,
- HIP_C_32F = 4,
- HIP_C_64F = 5,
- HIP_C_16F = 6,
- HIP_C_8I = 7,
- HIP_R_8U = 8,
- HIP_C_8U = 9,
- HIP_R_32I = 10,
- HIP_C_32I = 11,
- HIP_R_32U = 12,
- HIP_C_32U = 13,
- HIP_R_16BF = 14,
- HIP_C_16BF = 15,
- HIP_R_4I = 16,
- HIP_C_4I = 17,
- HIP_R_4U = 18,
- HIP_C_4U = 19,
- HIP_R_16I = 20,
- HIP_C_16I = 21,
- HIP_R_16U = 22,
- HIP_C_16U = 23,
- HIP_R_64I = 24,
- HIP_C_64I = 25,
- HIP_R_64U = 26,
- HIP_C_64U = 27,
- // HIP specific Data Types
- HIP_R_8F_E4M3_FNUZ = 1000,
- HIP_R_8F_E5M2_FNUZ = 1001
- } hipDataType;
- typedef enum hipLibraryPropertyType {
- HIP_LIBRARY_MAJOR_VERSION,
- HIP_LIBRARY_MINOR_VERSION,
- HIP_LIBRARY_PATCH_LEVEL
- } hipLibraryPropertyType;
- #elif !defined(__HIP_PLATFORM_AMD__) && defined(__HIP_PLATFORM_NVIDIA__)
- #include "library_types.h"
- #else
- #error("Must define exactly one of __HIP_PLATFORM_AMD__ or __HIP_PLATFORM_NVIDIA__");
- #endif
- #endif
- /*
- Copyright (c) 2015 - 2023 Advanced Micro Devices, Inc. All rights reserved.
- Permission is hereby granted, free of charge, to any person obtaining a copy
- of this software and associated documentation files (the "Software"), to deal
- in the Software without restriction, including without limitation the rights
- to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
- copies of the Software, and to permit persons to whom the Software is
- furnished to do so, subject to the following conditions:
- The above copyright notice and this permission notice shall be included in
- all copies or substantial portions of the Software.
- THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
- AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
- OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
- THE SOFTWARE.
- */
- #ifndef HIP_INCLUDE_HIP_DRIVER_TYPES_H
- #define HIP_INCLUDE_HIP_DRIVER_TYPES_H
- #if !defined(__HIPCC_RTC__)
- #include <hip/hip_common.h>
- #endif
- #if !defined(__HIP_PLATFORM_AMD__) && defined(__HIP_PLATFORM_NVIDIA__)
- #include "driver_types.h"
- #elif defined(__HIP_PLATFORM_AMD__) && !defined(__HIP_PLATFORM_NVIDIA__)
- #if !defined(__HIPCC_RTC__)
- #ifndef __cplusplus
- #include <stdbool.h>
- #endif
- #endif // !defined(__HIPCC_RTC__)
- typedef void* hipDeviceptr_t;
- typedef enum hipChannelFormatKind {
- hipChannelFormatKindSigned = 0,
- hipChannelFormatKindUnsigned = 1,
- hipChannelFormatKindFloat = 2,
- hipChannelFormatKindNone = 3
- }hipChannelFormatKind;
- typedef struct hipChannelFormatDesc {
- int x;
- int y;
- int z;
- int w;
- enum hipChannelFormatKind f;
- }hipChannelFormatDesc;
- #define HIP_TRSA_OVERRIDE_FORMAT 0x01
- #define HIP_TRSF_READ_AS_INTEGER 0x01
- #define HIP_TRSF_NORMALIZED_COORDINATES 0x02
- #define HIP_TRSF_SRGB 0x10
- typedef struct hipArray* hipArray_t;
- typedef const struct hipArray* hipArray_const_t;
- typedef enum hipArray_Format {
- HIP_AD_FORMAT_UNSIGNED_INT8 = 0x01,
- HIP_AD_FORMAT_UNSIGNED_INT16 = 0x02,
- HIP_AD_FORMAT_UNSIGNED_INT32 = 0x03,
- HIP_AD_FORMAT_SIGNED_INT8 = 0x08,
- HIP_AD_FORMAT_SIGNED_INT16 = 0x09,
- HIP_AD_FORMAT_SIGNED_INT32 = 0x0a,
- HIP_AD_FORMAT_HALF = 0x10,
- HIP_AD_FORMAT_FLOAT = 0x20
- }hipArray_Format;
- typedef struct HIP_ARRAY_DESCRIPTOR {
- size_t Width;
- size_t Height;
- enum hipArray_Format Format;
- unsigned int NumChannels;
- }HIP_ARRAY_DESCRIPTOR;
- typedef struct HIP_ARRAY3D_DESCRIPTOR {
- size_t Width;
- size_t Height;
- size_t Depth;
- enum hipArray_Format Format;
- unsigned int NumChannels;
- unsigned int Flags;
- }HIP_ARRAY3D_DESCRIPTOR;
- #if !defined(__HIPCC_RTC__)
- typedef struct hip_Memcpy2D {
- size_t srcXInBytes;
- size_t srcY;
- hipMemoryType srcMemoryType;
- const void* srcHost;
- hipDeviceptr_t srcDevice;
- hipArray_t srcArray;
- size_t srcPitch;
- size_t dstXInBytes;
- size_t dstY;
- hipMemoryType dstMemoryType;
- void* dstHost;
- hipDeviceptr_t dstDevice;
- hipArray_t dstArray;
- size_t dstPitch;
- size_t WidthInBytes;
- size_t Height;
- } hip_Memcpy2D;
- #endif // !defined(__HIPCC_RTC__)
- typedef struct hipMipmappedArray {
- void* data;
- struct hipChannelFormatDesc desc;
- unsigned int type;
- unsigned int width;
- unsigned int height;
- unsigned int depth;
- unsigned int min_mipmap_level;
- unsigned int max_mipmap_level;
- unsigned int flags;
- enum hipArray_Format format;
- unsigned int num_channels;
- } hipMipmappedArray;
- typedef struct hipMipmappedArray* hipMipmappedArray_t;
- typedef hipMipmappedArray_t hipmipmappedArray;
- typedef const struct hipMipmappedArray* hipMipmappedArray_const_t;
- /**
- * hip resource types
- */
- typedef enum hipResourceType {
- hipResourceTypeArray = 0x00,
- hipResourceTypeMipmappedArray = 0x01,
- hipResourceTypeLinear = 0x02,
- hipResourceTypePitch2D = 0x03
- }hipResourceType;
- typedef enum HIPresourcetype_enum {
- HIP_RESOURCE_TYPE_ARRAY = 0x00, /**< Array resoure */
- HIP_RESOURCE_TYPE_MIPMAPPED_ARRAY = 0x01, /**< Mipmapped array resource */
- HIP_RESOURCE_TYPE_LINEAR = 0x02, /**< Linear resource */
- HIP_RESOURCE_TYPE_PITCH2D = 0x03 /**< Pitch 2D resource */
- } HIPresourcetype, hipResourcetype;
- /**
- * hip address modes
- */
- typedef enum HIPaddress_mode_enum {
- HIP_TR_ADDRESS_MODE_WRAP = 0,
- HIP_TR_ADDRESS_MODE_CLAMP = 1,
- HIP_TR_ADDRESS_MODE_MIRROR = 2,
- HIP_TR_ADDRESS_MODE_BORDER = 3
- } HIPaddress_mode;
- /**
- * hip filter modes
- */
- typedef enum HIPfilter_mode_enum {
- HIP_TR_FILTER_MODE_POINT = 0,
- HIP_TR_FILTER_MODE_LINEAR = 1
- } HIPfilter_mode;
- /**
- * Texture descriptor
- */
- typedef struct HIP_TEXTURE_DESC_st {
- HIPaddress_mode addressMode[3]; /**< Address modes */
- HIPfilter_mode filterMode; /**< Filter mode */
- unsigned int flags; /**< Flags */
- unsigned int maxAnisotropy; /**< Maximum anisotropy ratio */
- HIPfilter_mode mipmapFilterMode; /**< Mipmap filter mode */
- float mipmapLevelBias; /**< Mipmap level bias */
- float minMipmapLevelClamp; /**< Mipmap minimum level clamp */
- float maxMipmapLevelClamp; /**< Mipmap maximum level clamp */
- float borderColor[4]; /**< Border Color */
- int reserved[12];
- } HIP_TEXTURE_DESC;
- /**
- * hip texture resource view formats
- */
- typedef enum hipResourceViewFormat {
- hipResViewFormatNone = 0x00,
- hipResViewFormatUnsignedChar1 = 0x01,
- hipResViewFormatUnsignedChar2 = 0x02,
- hipResViewFormatUnsignedChar4 = 0x03,
- hipResViewFormatSignedChar1 = 0x04,
- hipResViewFormatSignedChar2 = 0x05,
- hipResViewFormatSignedChar4 = 0x06,
- hipResViewFormatUnsignedShort1 = 0x07,
- hipResViewFormatUnsignedShort2 = 0x08,
- hipResViewFormatUnsignedShort4 = 0x09,
- hipResViewFormatSignedShort1 = 0x0a,
- hipResViewFormatSignedShort2 = 0x0b,
- hipResViewFormatSignedShort4 = 0x0c,
- hipResViewFormatUnsignedInt1 = 0x0d,
- hipResViewFormatUnsignedInt2 = 0x0e,
- hipResViewFormatUnsignedInt4 = 0x0f,
- hipResViewFormatSignedInt1 = 0x10,
- hipResViewFormatSignedInt2 = 0x11,
- hipResViewFormatSignedInt4 = 0x12,
- hipResViewFormatHalf1 = 0x13,
- hipResViewFormatHalf2 = 0x14,
- hipResViewFormatHalf4 = 0x15,
- hipResViewFormatFloat1 = 0x16,
- hipResViewFormatFloat2 = 0x17,
- hipResViewFormatFloat4 = 0x18,
- hipResViewFormatUnsignedBlockCompressed1 = 0x19,
- hipResViewFormatUnsignedBlockCompressed2 = 0x1a,
- hipResViewFormatUnsignedBlockCompressed3 = 0x1b,
- hipResViewFormatUnsignedBlockCompressed4 = 0x1c,
- hipResViewFormatSignedBlockCompressed4 = 0x1d,
- hipResViewFormatUnsignedBlockCompressed5 = 0x1e,
- hipResViewFormatSignedBlockCompressed5 = 0x1f,
- hipResViewFormatUnsignedBlockCompressed6H = 0x20,
- hipResViewFormatSignedBlockCompressed6H = 0x21,
- hipResViewFormatUnsignedBlockCompressed7 = 0x22
- }hipResourceViewFormat;
- typedef enum HIPresourceViewFormat_enum
- {
- HIP_RES_VIEW_FORMAT_NONE = 0x00, /**< No resource view format (use underlying resource format) */
- HIP_RES_VIEW_FORMAT_UINT_1X8 = 0x01, /**< 1 channel unsigned 8-bit integers */
- HIP_RES_VIEW_FORMAT_UINT_2X8 = 0x02, /**< 2 channel unsigned 8-bit integers */
- HIP_RES_VIEW_FORMAT_UINT_4X8 = 0x03, /**< 4 channel unsigned 8-bit integers */
- HIP_RES_VIEW_FORMAT_SINT_1X8 = 0x04, /**< 1 channel signed 8-bit integers */
- HIP_RES_VIEW_FORMAT_SINT_2X8 = 0x05, /**< 2 channel signed 8-bit integers */
- HIP_RES_VIEW_FORMAT_SINT_4X8 = 0x06, /**< 4 channel signed 8-bit integers */
- HIP_RES_VIEW_FORMAT_UINT_1X16 = 0x07, /**< 1 channel unsigned 16-bit integers */
- HIP_RES_VIEW_FORMAT_UINT_2X16 = 0x08, /**< 2 channel unsigned 16-bit integers */
- HIP_RES_VIEW_FORMAT_UINT_4X16 = 0x09, /**< 4 channel unsigned 16-bit integers */
- HIP_RES_VIEW_FORMAT_SINT_1X16 = 0x0a, /**< 1 channel signed 16-bit integers */
- HIP_RES_VIEW_FORMAT_SINT_2X16 = 0x0b, /**< 2 channel signed 16-bit integers */
- HIP_RES_VIEW_FORMAT_SINT_4X16 = 0x0c, /**< 4 channel signed 16-bit integers */
- HIP_RES_VIEW_FORMAT_UINT_1X32 = 0x0d, /**< 1 channel unsigned 32-bit integers */
- HIP_RES_VIEW_FORMAT_UINT_2X32 = 0x0e, /**< 2 channel unsigned 32-bit integers */
- HIP_RES_VIEW_FORMAT_UINT_4X32 = 0x0f, /**< 4 channel unsigned 32-bit integers */
- HIP_RES_VIEW_FORMAT_SINT_1X32 = 0x10, /**< 1 channel signed 32-bit integers */
- HIP_RES_VIEW_FORMAT_SINT_2X32 = 0x11, /**< 2 channel signed 32-bit integers */
- HIP_RES_VIEW_FORMAT_SINT_4X32 = 0x12, /**< 4 channel signed 32-bit integers */
- HIP_RES_VIEW_FORMAT_FLOAT_1X16 = 0x13, /**< 1 channel 16-bit floating point */
- HIP_RES_VIEW_FORMAT_FLOAT_2X16 = 0x14, /**< 2 channel 16-bit floating point */
- HIP_RES_VIEW_FORMAT_FLOAT_4X16 = 0x15, /**< 4 channel 16-bit floating point */
- HIP_RES_VIEW_FORMAT_FLOAT_1X32 = 0x16, /**< 1 channel 32-bit floating point */
- HIP_RES_VIEW_FORMAT_FLOAT_2X32 = 0x17, /**< 2 channel 32-bit floating point */
- HIP_RES_VIEW_FORMAT_FLOAT_4X32 = 0x18, /**< 4 channel 32-bit floating point */
- HIP_RES_VIEW_FORMAT_UNSIGNED_BC1 = 0x19, /**< Block compressed 1 */
- HIP_RES_VIEW_FORMAT_UNSIGNED_BC2 = 0x1a, /**< Block compressed 2 */
- HIP_RES_VIEW_FORMAT_UNSIGNED_BC3 = 0x1b, /**< Block compressed 3 */
- HIP_RES_VIEW_FORMAT_UNSIGNED_BC4 = 0x1c, /**< Block compressed 4 unsigned */
- HIP_RES_VIEW_FORMAT_SIGNED_BC4 = 0x1d, /**< Block compressed 4 signed */
- HIP_RES_VIEW_FORMAT_UNSIGNED_BC5 = 0x1e, /**< Block compressed 5 unsigned */
- HIP_RES_VIEW_FORMAT_SIGNED_BC5 = 0x1f, /**< Block compressed 5 signed */
- HIP_RES_VIEW_FORMAT_UNSIGNED_BC6H = 0x20, /**< Block compressed 6 unsigned half-float */
- HIP_RES_VIEW_FORMAT_SIGNED_BC6H = 0x21, /**< Block compressed 6 signed half-float */
- HIP_RES_VIEW_FORMAT_UNSIGNED_BC7 = 0x22 /**< Block compressed 7 */
- } HIPresourceViewFormat;
- /**
- * HIP resource descriptor
- */
- typedef struct hipResourceDesc {
- enum hipResourceType resType;
- union {
- struct {
- hipArray_t array;
- } array;
- struct {
- hipMipmappedArray_t mipmap;
- } mipmap;
- struct {
- void* devPtr;
- struct hipChannelFormatDesc desc;
- size_t sizeInBytes;
- } linear;
- struct {
- void* devPtr;
- struct hipChannelFormatDesc desc;
- size_t width;
- size_t height;
- size_t pitchInBytes;
- } pitch2D;
- } res;
- }hipResourceDesc;
- typedef struct HIP_RESOURCE_DESC_st
- {
- HIPresourcetype resType; /**< Resource type */
- union {
- struct {
- hipArray_t hArray; /**< HIP array */
- } array;
- struct {
- hipMipmappedArray_t hMipmappedArray; /**< HIP mipmapped array */
- } mipmap;
- struct {
- hipDeviceptr_t devPtr; /**< Device pointer */
- hipArray_Format format; /**< Array format */
- unsigned int numChannels; /**< Channels per array element */
- size_t sizeInBytes; /**< Size in bytes */
- } linear;
- struct {
- hipDeviceptr_t devPtr; /**< Device pointer */
- hipArray_Format format; /**< Array format */
- unsigned int numChannels; /**< Channels per array element */
- size_t width; /**< Width of the array in elements */
- size_t height; /**< Height of the array in elements */
- size_t pitchInBytes; /**< Pitch between two rows in bytes */
- } pitch2D;
- struct {
- int reserved[32];
- } reserved;
- } res;
- unsigned int flags; /**< Flags (must be zero) */
- } HIP_RESOURCE_DESC;
- /**
- * hip resource view descriptor
- */
- struct hipResourceViewDesc {
- enum hipResourceViewFormat format;
- size_t width;
- size_t height;
- size_t depth;
- unsigned int firstMipmapLevel;
- unsigned int lastMipmapLevel;
- unsigned int firstLayer;
- unsigned int lastLayer;
- };
- /**
- * Resource view descriptor
- */
- typedef struct HIP_RESOURCE_VIEW_DESC_st
- {
- HIPresourceViewFormat format; /**< Resource view format */
- size_t width; /**< Width of the resource view */
- size_t height; /**< Height of the resource view */
- size_t depth; /**< Depth of the resource view */
- unsigned int firstMipmapLevel; /**< First defined mipmap level */
- unsigned int lastMipmapLevel; /**< Last defined mipmap level */
- unsigned int firstLayer; /**< First layer index */
- unsigned int lastLayer; /**< Last layer index */
- unsigned int reserved[16];
- } HIP_RESOURCE_VIEW_DESC;
- /**
- * Memory copy types
- *
- */
- #if !defined(__HIPCC_RTC__)
- typedef enum hipMemcpyKind {
- hipMemcpyHostToHost = 0, ///< Host-to-Host Copy
- hipMemcpyHostToDevice = 1, ///< Host-to-Device Copy
- hipMemcpyDeviceToHost = 2, ///< Device-to-Host Copy
- hipMemcpyDeviceToDevice = 3, ///< Device-to-Device Copy
- hipMemcpyDefault =
- 4 ///< Runtime will automatically determine copy-kind based on virtual addresses.
- } hipMemcpyKind;
- typedef struct hipPitchedPtr {
- void* ptr;
- size_t pitch;
- size_t xsize;
- size_t ysize;
- }hipPitchedPtr;
- typedef struct hipExtent {
- size_t width; // Width in elements when referring to array memory, in bytes when referring to
- // linear memory
- size_t height;
- size_t depth;
- }hipExtent;
- typedef struct hipPos {
- size_t x;
- size_t y;
- size_t z;
- }hipPos;
- typedef struct hipMemcpy3DParms {
- hipArray_t srcArray;
- struct hipPos srcPos;
- struct hipPitchedPtr srcPtr;
- hipArray_t dstArray;
- struct hipPos dstPos;
- struct hipPitchedPtr dstPtr;
- struct hipExtent extent;
- enum hipMemcpyKind kind;
- } hipMemcpy3DParms;
- typedef struct HIP_MEMCPY3D {
- size_t srcXInBytes;
- size_t srcY;
- size_t srcZ;
- size_t srcLOD;
- hipMemoryType srcMemoryType;
- const void* srcHost;
- hipDeviceptr_t srcDevice;
- hipArray_t srcArray;
- size_t srcPitch;
- size_t srcHeight;
- size_t dstXInBytes;
- size_t dstY;
- size_t dstZ;
- size_t dstLOD;
- hipMemoryType dstMemoryType;
- void* dstHost;
- hipDeviceptr_t dstDevice;
- hipArray_t dstArray;
- size_t dstPitch;
- size_t dstHeight;
- size_t WidthInBytes;
- size_t Height;
- size_t Depth;
- } HIP_MEMCPY3D;
- static inline struct hipPitchedPtr make_hipPitchedPtr(void* d, size_t p, size_t xsz,
- size_t ysz) {
- struct hipPitchedPtr s;
- s.ptr = d;
- s.pitch = p;
- s.xsize = xsz;
- s.ysize = ysz;
- return s;
- }
- static inline struct hipPos make_hipPos(size_t x, size_t y, size_t z) {
- struct hipPos p;
- p.x = x;
- p.y = y;
- p.z = z;
- return p;
- }
- static inline struct hipExtent make_hipExtent(size_t w, size_t h, size_t d) {
- struct hipExtent e;
- e.width = w;
- e.height = h;
- e.depth = d;
- return e;
- }
- typedef enum hipFunction_attribute {
- HIP_FUNC_ATTRIBUTE_MAX_THREADS_PER_BLOCK,
- HIP_FUNC_ATTRIBUTE_SHARED_SIZE_BYTES,
- HIP_FUNC_ATTRIBUTE_CONST_SIZE_BYTES,
- HIP_FUNC_ATTRIBUTE_LOCAL_SIZE_BYTES,
- HIP_FUNC_ATTRIBUTE_NUM_REGS,
- HIP_FUNC_ATTRIBUTE_PTX_VERSION,
- HIP_FUNC_ATTRIBUTE_BINARY_VERSION,
- HIP_FUNC_ATTRIBUTE_CACHE_MODE_CA,
- HIP_FUNC_ATTRIBUTE_MAX_DYNAMIC_SHARED_SIZE_BYTES,
- HIP_FUNC_ATTRIBUTE_PREFERRED_SHARED_MEMORY_CARVEOUT,
- HIP_FUNC_ATTRIBUTE_MAX
- } hipFunction_attribute;
- typedef enum hipPointer_attribute {
- HIP_POINTER_ATTRIBUTE_CONTEXT = 1, ///< The context on which a pointer was allocated
- ///< @warning - not supported in HIP
- HIP_POINTER_ATTRIBUTE_MEMORY_TYPE, ///< memory type describing location of a pointer
- HIP_POINTER_ATTRIBUTE_DEVICE_POINTER,///< address at which the pointer is allocated on device
- HIP_POINTER_ATTRIBUTE_HOST_POINTER, ///< address at which the pointer is allocated on host
- HIP_POINTER_ATTRIBUTE_P2P_TOKENS, ///< A pair of tokens for use with linux kernel interface
- ///< @warning - not supported in HIP
- HIP_POINTER_ATTRIBUTE_SYNC_MEMOPS, ///< Synchronize every synchronous memory operation
- ///< initiated on this region
- HIP_POINTER_ATTRIBUTE_BUFFER_ID, ///< Unique ID for an allocated memory region
- HIP_POINTER_ATTRIBUTE_IS_MANAGED, ///< Indicates if the pointer points to managed memory
- HIP_POINTER_ATTRIBUTE_DEVICE_ORDINAL,///< device ordinal of a device on which a pointer
- ///< was allocated or registered
- HIP_POINTER_ATTRIBUTE_IS_LEGACY_HIP_IPC_CAPABLE, ///< if this pointer maps to an allocation
- ///< that is suitable for hipIpcGetMemHandle
- ///< @warning - not supported in HIP
- HIP_POINTER_ATTRIBUTE_RANGE_START_ADDR,///< Starting address for this requested pointer
- HIP_POINTER_ATTRIBUTE_RANGE_SIZE, ///< Size of the address range for this requested pointer
- HIP_POINTER_ATTRIBUTE_MAPPED, ///< tells if this pointer is in a valid address range
- ///< that is mapped to a backing allocation
- HIP_POINTER_ATTRIBUTE_ALLOWED_HANDLE_TYPES,///< Bitmask of allowed hipmemAllocationHandleType
- ///< for this allocation @warning - not supported in HIP
- HIP_POINTER_ATTRIBUTE_IS_GPU_DIRECT_RDMA_CAPABLE, ///< returns if the memory referenced by
- ///< this pointer can be used with the GPUDirect RDMA API
- ///< @warning - not supported in HIP
- HIP_POINTER_ATTRIBUTE_ACCESS_FLAGS, ///< Returns the access flags the device associated with
- ///< for the corresponding memory referenced by the ptr
- HIP_POINTER_ATTRIBUTE_MEMPOOL_HANDLE ///< Returns the mempool handle for the allocation if
- ///< it was allocated from a mempool
- ///< @warning - not supported in HIP
- } hipPointer_attribute;
- #endif // !defined(__HIPCC_RTC__)
- #else
- #error("Must define exactly one of __HIP_PLATFORM_AMD__ or __HIP_PLATFORM_NVIDIA__");
- #endif
- #endif
- /*
- Copyright (c) 2022 - 2023 Advanced Micro Devices, Inc. All rights reserved.
- Permission is hereby granted, free of charge, to any person obtaining a copy
- of this software and associated documentation files (the "Software"), to deal
- in the Software without restriction, including without limitation the rights
- to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
- copies of the Software, and to permit persons to whom the Software is
- furnished to do so, subject to the following conditions:
- The above copyright notice and this permission notice shall be included in
- all copies or substantial portions of the Software.
- THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
- AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
- OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
- THE SOFTWARE.
- */
- /**
- * @file surface_types.h
- * @brief Defines surface types for HIP runtime.
- */
- #ifndef HIP_INCLUDE_HIP_SURFACE_TYPES_H
- #define HIP_INCLUDE_HIP_SURFACE_TYPES_H
- #if defined(__clang__)
- #pragma clang diagnostic push
- #pragma clang diagnostic ignored "-Wreserved-identifier"
- #endif
- #if !defined(__HIPCC_RTC__)
- #include <hip/driver_types.h>
- #endif
- /**
- * An opaque value that represents a hip surface object
- */
- struct __hip_surface;
- typedef struct __hip_surface* hipSurfaceObject_t;
- /**
- * hip surface reference
- */
- struct surfaceReference {
- hipSurfaceObject_t surfaceObject;
- };
- /**
- * hip surface boundary modes
- */
- enum hipSurfaceBoundaryMode {
- hipBoundaryModeZero = 0,
- hipBoundaryModeTrap = 1,
- hipBoundaryModeClamp = 2
- };
- #if defined(__clang__)
- #pragma clang diagnostic pop
- #endif
- #endif /* !HIP_INCLUDE_HIP_SURFACE_TYPES_H */
- /*
- Copyright (c) 2015 - 2023 Advanced Micro Devices, Inc. All rights reserved.
- Permission is hereby granted, free of charge, to any person obtaining a copy
- of this software and associated documentation files (the "Software"), to deal
- in the Software without restriction, including without limitation the rights
- to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
- copies of the Software, and to permit persons to whom the Software is
- furnished to do so, subject to the following conditions:
- The above copyright notice and this permission notice shall be included in
- all copies or substantial portions of the Software.
- THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
- AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
- OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
- THE SOFTWARE.
- */
- #ifndef HIP_INCLUDE_HIP_AMD_DETAIL_CHANNEL_DESCRIPTOR_H
- #define HIP_INCLUDE_HIP_AMD_DETAIL_CHANNEL_DESCRIPTOR_H
- #if !defined(__HIPCC_RTC__)
- #include <hip/hip_common.h>
- #include <hip/driver_types.h>
- #include <hip/amd_detail/amd_hip_vector_types.h>
- #endif
- #ifdef __cplusplus
- extern "C" HIP_PUBLIC_API
- hipChannelFormatDesc hipCreateChannelDesc(int x, int y, int z, int w, hipChannelFormatKind f);
- static inline hipChannelFormatDesc hipCreateChannelDescHalf() {
- int e = (int)sizeof(unsigned short) * 8;
- return hipCreateChannelDesc(e, 0, 0, 0, hipChannelFormatKindFloat);
- }
- static inline hipChannelFormatDesc hipCreateChannelDescHalf1() {
- int e = (int)sizeof(unsigned short) * 8;
- return hipCreateChannelDesc(e, 0, 0, 0, hipChannelFormatKindFloat);
- }
- static inline hipChannelFormatDesc hipCreateChannelDescHalf2() {
- int e = (int)sizeof(unsigned short) * 8;
- return hipCreateChannelDesc(e, e, 0, 0, hipChannelFormatKindFloat);
- }
- static inline hipChannelFormatDesc hipCreateChannelDescHalf4() {
- int e = (int)sizeof(unsigned short) * 8;
- return hipCreateChannelDesc(e, e, e, e, hipChannelFormatKindFloat);
- }
- template <typename T>
- static inline hipChannelFormatDesc hipCreateChannelDesc() {
- return hipCreateChannelDesc(0, 0, 0, 0, hipChannelFormatKindNone);
- }
- template <>
- inline hipChannelFormatDesc hipCreateChannelDesc<char>() {
- int e = (int)sizeof(char) * 8;
- return hipCreateChannelDesc(e, 0, 0, 0, hipChannelFormatKindSigned);
- }
- template <>
- inline hipChannelFormatDesc hipCreateChannelDesc<signed char>() {
- int e = (int)sizeof(signed char) * 8;
- return hipCreateChannelDesc(e, 0, 0, 0, hipChannelFormatKindSigned);
- }
- template <>
- inline hipChannelFormatDesc hipCreateChannelDesc<unsigned char>() {
- int e = (int)sizeof(unsigned char) * 8;
- return hipCreateChannelDesc(e, 0, 0, 0, hipChannelFormatKindUnsigned);
- }
- template <>
- inline hipChannelFormatDesc hipCreateChannelDesc<uchar1>() {
- int e = (int)sizeof(unsigned char) * 8;
- return hipCreateChannelDesc(e, 0, 0, 0, hipChannelFormatKindUnsigned);
- }
- template <>
- inline hipChannelFormatDesc hipCreateChannelDesc<char1>() {
- int e = (int)sizeof(signed char) * 8;
- return hipCreateChannelDesc(e, 0, 0, 0, hipChannelFormatKindSigned);
- }
- template <>
- inline hipChannelFormatDesc hipCreateChannelDesc<uchar2>() {
- int e = (int)sizeof(unsigned char) * 8;
- return hipCreateChannelDesc(e, e, 0, 0, hipChannelFormatKindUnsigned);
- }
- template <>
- inline hipChannelFormatDesc hipCreateChannelDesc<char2>() {
- int e = (int)sizeof(signed char) * 8;
- return hipCreateChannelDesc(e, e, 0, 0, hipChannelFormatKindSigned);
- }
- #ifndef __GNUC__ // vector3 is the same as vector4
- template <>
- inline hipChannelFormatDesc hipCreateChannelDesc<uchar3>() {
- int e = (int)sizeof(unsigned char) * 8;
- return hipCreateChannelDesc(e, e, e, 0, hipChannelFormatKindUnsigned);
- }
- template <>
- inline hipChannelFormatDesc hipCreateChannelDesc<char3>() {
- int e = (int)sizeof(signed char) * 8;
- return hipCreateChannelDesc(e, e, e, 0, hipChannelFormatKindSigned);
- }
- #endif
- template <>
- inline hipChannelFormatDesc hipCreateChannelDesc<uchar4>() {
- int e = (int)sizeof(unsigned char) * 8;
- return hipCreateChannelDesc(e, e, e, e, hipChannelFormatKindUnsigned);
- }
- template <>
- inline hipChannelFormatDesc hipCreateChannelDesc<char4>() {
- int e = (int)sizeof(signed char) * 8;
- return hipCreateChannelDesc(e, e, e, e, hipChannelFormatKindSigned);
- }
- template <>
- inline hipChannelFormatDesc hipCreateChannelDesc<unsigned short>() {
- int e = (int)sizeof(unsigned short) * 8;
- return hipCreateChannelDesc(e, 0, 0, 0, hipChannelFormatKindUnsigned);
- }
- template <>
- inline hipChannelFormatDesc hipCreateChannelDesc<signed short>() {
- int e = (int)sizeof(signed short) * 8;
- return hipCreateChannelDesc(e, 0, 0, 0, hipChannelFormatKindSigned);
- }
- template <>
- inline hipChannelFormatDesc hipCreateChannelDesc<ushort1>() {
- int e = (int)sizeof(unsigned short) * 8;
- return hipCreateChannelDesc(e, 0, 0, 0, hipChannelFormatKindUnsigned);
- }
- template <>
- inline hipChannelFormatDesc hipCreateChannelDesc<short1>() {
- int e = (int)sizeof(signed short) * 8;
- return hipCreateChannelDesc(e, 0, 0, 0, hipChannelFormatKindSigned);
- }
- template <>
- inline hipChannelFormatDesc hipCreateChannelDesc<ushort2>() {
- int e = (int)sizeof(unsigned short) * 8;
- return hipCreateChannelDesc(e, e, 0, 0, hipChannelFormatKindUnsigned);
- }
- template <>
- inline hipChannelFormatDesc hipCreateChannelDesc<short2>() {
- int e = (int)sizeof(signed short) * 8;
- return hipCreateChannelDesc(e, e, 0, 0, hipChannelFormatKindSigned);
- }
- #ifndef __GNUC__
- template <>
- inline hipChannelFormatDesc hipCreateChannelDesc<ushort3>() {
- int e = (int)sizeof(unsigned short) * 8;
- return hipCreateChannelDesc(e, e, e, 0, hipChannelFormatKindUnsigned);
- }
- template <>
- inline hipChannelFormatDesc hipCreateChannelDesc<short3>() {
- int e = (int)sizeof(signed short) * 8;
- return hipCreateChannelDesc(e, e, e, 0, hipChannelFormatKindSigned);
- }
- #endif
- template <>
- inline hipChannelFormatDesc hipCreateChannelDesc<ushort4>() {
- int e = (int)sizeof(unsigned short) * 8;
- return hipCreateChannelDesc(e, e, e, e, hipChannelFormatKindUnsigned);
- }
- template <>
- inline hipChannelFormatDesc hipCreateChannelDesc<short4>() {
- int e = (int)sizeof(signed short) * 8;
- return hipCreateChannelDesc(e, e, e, e, hipChannelFormatKindSigned);
- }
- template <>
- inline hipChannelFormatDesc hipCreateChannelDesc<unsigned int>() {
- int e = (int)sizeof(unsigned int) * 8;
- return hipCreateChannelDesc(e, 0, 0, 0, hipChannelFormatKindUnsigned);
- }
- template <>
- inline hipChannelFormatDesc hipCreateChannelDesc<signed int>() {
- int e = (int)sizeof(signed int) * 8;
- return hipCreateChannelDesc(e, 0, 0, 0, hipChannelFormatKindSigned);
- }
- template <>
- inline hipChannelFormatDesc hipCreateChannelDesc<uint1>() {
- int e = (int)sizeof(unsigned int) * 8;
- return hipCreateChannelDesc(e, 0, 0, 0, hipChannelFormatKindUnsigned);
- }
- template <>
- inline hipChannelFormatDesc hipCreateChannelDesc<int1>() {
- int e = (int)sizeof(signed int) * 8;
- return hipCreateChannelDesc(e, 0, 0, 0, hipChannelFormatKindSigned);
- }
- template <>
- inline hipChannelFormatDesc hipCreateChannelDesc<uint2>() {
- int e = (int)sizeof(unsigned int) * 8;
- return hipCreateChannelDesc(e, e, 0, 0, hipChannelFormatKindUnsigned);
- }
- template <>
- inline hipChannelFormatDesc hipCreateChannelDesc<int2>() {
- int e = (int)sizeof(signed int) * 8;
- return hipCreateChannelDesc(e, e, 0, 0, hipChannelFormatKindSigned);
- }
- #ifndef __GNUC__
- template <>
- inline hipChannelFormatDesc hipCreateChannelDesc<uint3>() {
- int e = (int)sizeof(unsigned int) * 8;
- return hipCreateChannelDesc(e, e, e, 0, hipChannelFormatKindUnsigned);
- }
- template <>
- inline hipChannelFormatDesc hipCreateChannelDesc<int3>() {
- int e = (int)sizeof(signed int) * 8;
- return hipCreateChannelDesc(e, e, e, 0, hipChannelFormatKindSigned);
- }
- #endif
- template <>
- inline hipChannelFormatDesc hipCreateChannelDesc<uint4>() {
- int e = (int)sizeof(unsigned int) * 8;
- return hipCreateChannelDesc(e, e, e, e, hipChannelFormatKindUnsigned);
- }
- template <>
- inline hipChannelFormatDesc hipCreateChannelDesc<int4>() {
- int e = (int)sizeof(signed int) * 8;
- return hipCreateChannelDesc(e, e, e, e, hipChannelFormatKindSigned);
- }
- template <>
- inline hipChannelFormatDesc hipCreateChannelDesc<float>() {
- int e = (int)sizeof(float) * 8;
- return hipCreateChannelDesc(e, 0, 0, 0, hipChannelFormatKindFloat);
- }
- template <>
- inline hipChannelFormatDesc hipCreateChannelDesc<float1>() {
- int e = (int)sizeof(float) * 8;
- return hipCreateChannelDesc(e, 0, 0, 0, hipChannelFormatKindFloat);
- }
- template <>
- inline hipChannelFormatDesc hipCreateChannelDesc<float2>() {
- int e = (int)sizeof(float) * 8;
- return hipCreateChannelDesc(e, e, 0, 0, hipChannelFormatKindFloat);
- }
- #ifndef __GNUC__
- template <>
- inline hipChannelFormatDesc hipCreateChannelDesc<float3>() {
- int e = (int)sizeof(float) * 8;
- return hipCreateChannelDesc(e, e, e, 0, hipChannelFormatKindFloat);
- }
- #endif
- template <>
- inline hipChannelFormatDesc hipCreateChannelDesc<float4>() {
- int e = (int)sizeof(float) * 8;
- return hipCreateChannelDesc(e, e, e, e, hipChannelFormatKindFloat);
- }
- #if !defined(__LP64__)
- template <>
- inline hipChannelFormatDesc hipCreateChannelDesc<unsigned long>() {
- int e = (int)sizeof(unsigned long) * 8;
- return hipCreateChannelDesc(e, 0, 0, 0, hipChannelFormatKindUnsigned);
- }
- template <>
- inline hipChannelFormatDesc hipCreateChannelDesc<signed long>() {
- int e = (int)sizeof(signed long) * 8;
- return hipCreateChannelDesc(e, 0, 0, 0, hipChannelFormatKindSigned);
- }
- template <>
- inline hipChannelFormatDesc hipCreateChannelDesc<ulong1>() {
- int e = (int)sizeof(unsigned long) * 8;
- return hipCreateChannelDesc(e, 0, 0, 0, hipChannelFormatKindUnsigned);
- }
- template <>
- inline hipChannelFormatDesc hipCreateChannelDesc<long1>() {
- int e = (int)sizeof(signed long) * 8;
- return hipCreateChannelDesc(e, 0, 0, 0, hipChannelFormatKindSigned);
- }
- template <>
- inline hipChannelFormatDesc hipCreateChannelDesc<ulong2>() {
- int e = (int)sizeof(unsigned long) * 8;
- return hipCreateChannelDesc(e, e, 0, 0, hipChannelFormatKindUnsigned);
- }
- template <>
- inline hipChannelFormatDesc hipCreateChannelDesc<long2>() {
- int e = (int)sizeof(signed long) * 8;
- return hipCreateChannelDesc(e, e, 0, 0, hipChannelFormatKindSigned);
- }
- #ifndef __GNUC__
- template <>
- inline hipChannelFormatDesc hipCreateChannelDesc<ulong3>() {
- int e = (int)sizeof(unsigned long) * 8;
- return hipCreateChannelDesc(e, e, e, 0, hipChannelFormatKindUnsigned);
- }
- template <>
- inline hipChannelFormatDesc hipCreateChannelDesc<long3>() {
- int e = (int)sizeof(signed long) * 8;
- return hipCreateChannelDesc(e, e, e, 0, hipChannelFormatKindSigned);
- }
- #endif
- template <>
- inline hipChannelFormatDesc hipCreateChannelDesc<ulong4>() {
- int e = (int)sizeof(unsigned long) * 8;
- return hipCreateChannelDesc(e, e, e, e, hipChannelFormatKindUnsigned);
- }
- template <>
- inline hipChannelFormatDesc hipCreateChannelDesc<long4>() {
- int e = (int)sizeof(signed long) * 8;
- return hipCreateChannelDesc(e, e, e, e, hipChannelFormatKindSigned);
- }
- #endif /* !__LP64__ */
- #else
- struct hipChannelFormatDesc hipCreateChannelDesc(int x, int y, int z, int w,
- enum hipChannelFormatKind f);
- #endif /* __cplusplus */
- #endif /* !HIP_INCLUDE_HIP_AMD_DETAIL_CHANNEL_DESCRIPTOR_H */
- /*
- Copyright (c) 2015 - 2023 Advanced Micro Devices, Inc. All rights reserved.
- Permission is hereby granted, free of charge, to any person obtaining a copy
- of this software and associated documentation files (the "Software"), to deal
- in the Software without restriction, including without limitation the rights
- to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
- copies of the Software, and to permit persons to whom the Software is
- furnished to do so, subject to the following conditions:
- The above copyright notice and this permission notice shall be included in
- all copies or substantial portions of the Software.
- THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
- AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
- OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
- THE SOFTWARE.
- */
- #ifndef HIP_INCLUDE_HIP_TEXTURE_TYPES_H
- #define HIP_INCLUDE_HIP_TEXTURE_TYPES_H
- #if defined(__clang__)
- #pragma clang diagnostic push
- #pragma clang diagnostic ignored "-Wreserved-identifier"
- #pragma clang diagnostic ignored "-Wreserved-macro-identifier"
- #pragma clang diagnostic ignored "-Wc++98-compat"
- #endif
- #if !defined(__HIPCC_RTC__)
- #include <hip/hip_common.h>
- #endif
- #if !defined(__HIP_PLATFORM_AMD__) && defined(__HIP_PLATFORM_NVIDIA__)
- #include "texture_types.h"
- #elif defined(__HIP_PLATFORM_AMD__) && !defined(__HIP_PLATFORM_NVIDIA__)
- /*******************************************************************************
- * *
- * *
- * *
- *******************************************************************************/
- #if !defined(__HIPCC_RTC__)
- #include <limits.h>
- #include <hip/channel_descriptor.h>
- #include <hip/driver_types.h>
- #endif // !defined(__HIPCC_RTC__)
- #define hipTextureType1D 0x01
- #define hipTextureType2D 0x02
- #define hipTextureType3D 0x03
- #define hipTextureTypeCubemap 0x0C
- #define hipTextureType1DLayered 0xF1
- #define hipTextureType2DLayered 0xF2
- #define hipTextureTypeCubemapLayered 0xFC
- /**
- * Should be same as HSA_IMAGE_OBJECT_SIZE_DWORD/HSA_SAMPLER_OBJECT_SIZE_DWORD
- */
- #define HIP_IMAGE_OBJECT_SIZE_DWORD 12
- #define HIP_SAMPLER_OBJECT_SIZE_DWORD 8
- #define HIP_SAMPLER_OBJECT_OFFSET_DWORD HIP_IMAGE_OBJECT_SIZE_DWORD
- #define HIP_TEXTURE_OBJECT_SIZE_DWORD (HIP_IMAGE_OBJECT_SIZE_DWORD + HIP_SAMPLER_OBJECT_SIZE_DWORD)
- /**
- * An opaque value that represents a hip texture object
- */
- struct __hip_texture;
- typedef struct __hip_texture* hipTextureObject_t;
- /**
- * hip texture address modes
- */
- enum hipTextureAddressMode {
- hipAddressModeWrap = 0,
- hipAddressModeClamp = 1,
- hipAddressModeMirror = 2,
- hipAddressModeBorder = 3
- };
- /**
- * hip texture filter modes
- */
- enum hipTextureFilterMode { hipFilterModePoint = 0, hipFilterModeLinear = 1 };
- /**
- * hip texture read modes
- */
- enum hipTextureReadMode { hipReadModeElementType = 0, hipReadModeNormalizedFloat = 1 };
- /**
- * hip texture reference
- */
- typedef struct textureReference {
- int normalized;
- enum hipTextureReadMode readMode;// used only for driver API's
- enum hipTextureFilterMode filterMode;
- enum hipTextureAddressMode addressMode[3]; // Texture address mode for up to 3 dimensions
- struct hipChannelFormatDesc channelDesc;
- int sRGB; // Perform sRGB->linear conversion during texture read
- unsigned int maxAnisotropy; // Limit to the anisotropy ratio
- enum hipTextureFilterMode mipmapFilterMode;
- float mipmapLevelBias;
- float minMipmapLevelClamp;
- float maxMipmapLevelClamp;
- hipTextureObject_t textureObject;
- int numChannels;
- enum hipArray_Format format;
- }textureReference;
- /**
- * hip texture descriptor
- */
- typedef struct hipTextureDesc {
- enum hipTextureAddressMode addressMode[3]; // Texture address mode for up to 3 dimensions
- enum hipTextureFilterMode filterMode;
- enum hipTextureReadMode readMode;
- int sRGB; // Perform sRGB->linear conversion during texture read
- float borderColor[4];
- int normalizedCoords;
- unsigned int maxAnisotropy;
- enum hipTextureFilterMode mipmapFilterMode;
- float mipmapLevelBias;
- float minMipmapLevelClamp;
- float maxMipmapLevelClamp;
- }hipTextureDesc;
- #if __cplusplus
- /*******************************************************************************
- * *
- * *
- * *
- *******************************************************************************/
- #if __HIP__
- #define __HIP_TEXTURE_ATTRIB __attribute__((device_builtin_texture_type))
- #else
- #define __HIP_TEXTURE_ATTRIB
- #endif
- typedef textureReference* hipTexRef;
- template <class T, int texType = hipTextureType1D,
- enum hipTextureReadMode mode = hipReadModeElementType>
- struct __HIP_TEXTURE_ATTRIB texture : public textureReference {
- texture(int norm = 0, enum hipTextureFilterMode fMode = hipFilterModePoint,
- enum hipTextureAddressMode aMode = hipAddressModeClamp) {
- normalized = norm;
- readMode = mode;
- filterMode = fMode;
- addressMode[0] = aMode;
- addressMode[1] = aMode;
- addressMode[2] = aMode;
- channelDesc = hipCreateChannelDesc<T>();
- sRGB = 0;
- textureObject = nullptr;
- maxAnisotropy = 0;
- mipmapLevelBias = 0;
- minMipmapLevelClamp = 0;
- maxMipmapLevelClamp = 0;
- }
- texture(int norm, enum hipTextureFilterMode fMode, enum hipTextureAddressMode aMode,
- struct hipChannelFormatDesc desc) {
- normalized = norm;
- readMode = mode;
- filterMode = fMode;
- addressMode[0] = aMode;
- addressMode[1] = aMode;
- addressMode[2] = aMode;
- channelDesc = desc;
- sRGB = 0;
- textureObject = nullptr;
- maxAnisotropy = 0;
- mipmapLevelBias = 0;
- minMipmapLevelClamp = 0;
- maxMipmapLevelClamp = 0;
- }
- };
- #endif /* __cplusplus */
- #else
- #error("Must define exactly one of __HIP_PLATFORM_AMD__ or __HIP_PLATFORM_NVIDIA__");
- #endif
- #if defined(__clang__)
- #pragma clang diagnostic pop
- #endif
- #endif
- /*
- Copyright (c) 2015 - 2023 Advanced Micro Devices, Inc. All rights reserved.
- Permission is hereby granted, free of charge, to any person obtaining a copy
- of this software and associated documentation files (the "Software"), to deal
- in the Software without restriction, including without limitation the rights
- to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
- copies of the Software, and to permit persons to whom the Software is
- furnished to do so, subject to the following conditions:
- The above copyright notice and this permission notice shall be included in
- all copies or substantial portions of the Software.
- THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
- AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
- OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
- THE SOFTWARE.
- */
- #pragma once
- #if !defined(__HIPCC_RTC__)
- #include <hip/hip_vector_types.h>
- #endif
- extern "C" {
- #define ADDRESS_SPACE_CONSTANT __attribute__((address_space(4)))
- __device__ float4::Native_vec_ __ockl_image_load_1D(unsigned int ADDRESS_SPACE_CONSTANT*i, int c);
- __device__ float4::Native_vec_ __ockl_image_load_1Db(unsigned int ADDRESS_SPACE_CONSTANT*i, int c);
- __device__ float4::Native_vec_ __ockl_image_load_1Da(unsigned int ADDRESS_SPACE_CONSTANT*i, int2::Native_vec_ c);
- __device__ float4::Native_vec_ __ockl_image_load_2D(unsigned int ADDRESS_SPACE_CONSTANT*i, int2::Native_vec_ c);
- __device__ float4::Native_vec_ __ockl_image_load_2Da(unsigned int ADDRESS_SPACE_CONSTANT*i, int4::Native_vec_ c);
- __device__ float4::Native_vec_ __ockl_image_load_3D(unsigned int ADDRESS_SPACE_CONSTANT*i, int4::Native_vec_ c);
- __device__ float4::Native_vec_ __ockl_image_load_CM(unsigned int ADDRESS_SPACE_CONSTANT*i, int2::Native_vec_ c, int f);
- __device__ float4::Native_vec_ __ockl_image_load_CMa(unsigned int ADDRESS_SPACE_CONSTANT*i, int4::Native_vec_ c, int f);
- __device__ float4::Native_vec_ __ockl_image_load_lod_1D(unsigned int ADDRESS_SPACE_CONSTANT*i, int c, int l);
- __device__ float4::Native_vec_ __ockl_image_load_lod_1Da(unsigned int ADDRESS_SPACE_CONSTANT*i, int2::Native_vec_ c, int l);
- __device__ float4::Native_vec_ __ockl_image_load_lod_2D(unsigned int ADDRESS_SPACE_CONSTANT*i, int2::Native_vec_ c, int l);
- __device__ float4::Native_vec_ __ockl_image_load_lod_2Da(unsigned int ADDRESS_SPACE_CONSTANT*i, int4::Native_vec_ c, int l);
- __device__ float4::Native_vec_ __ockl_image_load_lod_3D(unsigned int ADDRESS_SPACE_CONSTANT*i, int4::Native_vec_ c, int l);
- __device__ float4::Native_vec_ __ockl_image_load_lod_CM(unsigned int ADDRESS_SPACE_CONSTANT*i, int2::Native_vec_ c, int f, int l);
- __device__ float4::Native_vec_ __ockl_image_load_lod_CMa(unsigned int ADDRESS_SPACE_CONSTANT*i, int4::Native_vec_ c, int f, int l);
- __device__ void __ockl_image_store_1D(unsigned int ADDRESS_SPACE_CONSTANT*i, int c, float4::Native_vec_ p);
- __device__ void __ockl_image_store_1Da(unsigned int ADDRESS_SPACE_CONSTANT*i, int2::Native_vec_ c, float4::Native_vec_ p);
- __device__ void __ockl_image_store_2D(unsigned int ADDRESS_SPACE_CONSTANT*i, int2::Native_vec_ c, float4::Native_vec_ p);
- __device__ void __ockl_image_store_2Da(unsigned int ADDRESS_SPACE_CONSTANT*i, int4::Native_vec_ c, float4::Native_vec_ p);
- __device__ void __ockl_image_store_3D(unsigned int ADDRESS_SPACE_CONSTANT*i, int4::Native_vec_ c, float4::Native_vec_ p);
- __device__ void __ockl_image_store_CM(unsigned int ADDRESS_SPACE_CONSTANT*i, int2::Native_vec_ c, int f, float4::Native_vec_ p);
- __device__ void __ockl_image_store_CMa(unsigned int ADDRESS_SPACE_CONSTANT*i, int4::Native_vec_ c, int f, float4::Native_vec_ p);
- __device__ void __ockl_image_store_lod_1D(unsigned int ADDRESS_SPACE_CONSTANT*i, int c, int l, float4::Native_vec_ p);
- __device__ void __ockl_image_store_lod_1Da(unsigned int ADDRESS_SPACE_CONSTANT*i, int2::Native_vec_ c, int l, float4::Native_vec_ p);
- __device__ void __ockl_image_store_lod_2D(unsigned int ADDRESS_SPACE_CONSTANT*i, int2::Native_vec_ c, int l, float4::Native_vec_ p);
- __device__ void __ockl_image_store_lod_2Da(unsigned int ADDRESS_SPACE_CONSTANT*i, int4::Native_vec_ c, int l, float4::Native_vec_ p);
- __device__ void __ockl_image_store_lod_3D(unsigned int ADDRESS_SPACE_CONSTANT*i, int4::Native_vec_ c, int l, float4::Native_vec_ p);
- __device__ void __ockl_image_store_lod_CM(unsigned int ADDRESS_SPACE_CONSTANT*i, int2::Native_vec_ c, int f, int l, float4::Native_vec_ p);
- __device__ void __ockl_image_store_lod_CMa(unsigned int ADDRESS_SPACE_CONSTANT*i, int4::Native_vec_ c, int f, int l, float4::Native_vec_ p);
- __device__ float4::Native_vec_ __ockl_image_sample_1D(unsigned int ADDRESS_SPACE_CONSTANT*i, unsigned int ADDRESS_SPACE_CONSTANT*s, float c);
- __device__ float4::Native_vec_ __ockl_image_sample_1Da(unsigned int ADDRESS_SPACE_CONSTANT*i, unsigned int ADDRESS_SPACE_CONSTANT*s, float2::Native_vec_ c);
- __device__ float4::Native_vec_ __ockl_image_sample_2D(unsigned int ADDRESS_SPACE_CONSTANT*i, unsigned int ADDRESS_SPACE_CONSTANT*s, float2::Native_vec_ c);
- __device__ float4::Native_vec_ __ockl_image_sample_2Da(unsigned int ADDRESS_SPACE_CONSTANT*i, unsigned int ADDRESS_SPACE_CONSTANT*s, float4::Native_vec_ c);
- __device__ float4::Native_vec_ __ockl_image_sample_3D(unsigned int ADDRESS_SPACE_CONSTANT*i, unsigned int ADDRESS_SPACE_CONSTANT*s, float4::Native_vec_ c);
- __device__ float4::Native_vec_ __ockl_image_sample_CM(unsigned int ADDRESS_SPACE_CONSTANT*i, unsigned int ADDRESS_SPACE_CONSTANT*s, float4::Native_vec_ c);
- __device__ float4::Native_vec_ __ockl_image_sample_CMa(unsigned int ADDRESS_SPACE_CONSTANT*i, unsigned int ADDRESS_SPACE_CONSTANT*s, float4::Native_vec_ c);
- __device__ float4::Native_vec_ __ockl_image_sample_grad_1D(unsigned int ADDRESS_SPACE_CONSTANT*i, unsigned int ADDRESS_SPACE_CONSTANT*s, float c, float dx, float dy);
- __device__ float4::Native_vec_ __ockl_image_sample_grad_1Da(unsigned int ADDRESS_SPACE_CONSTANT*i, unsigned int ADDRESS_SPACE_CONSTANT*s, float2::Native_vec_ c, float dx, float dy);
- __device__ float4::Native_vec_ __ockl_image_sample_grad_2D(unsigned int ADDRESS_SPACE_CONSTANT*i, unsigned int ADDRESS_SPACE_CONSTANT*s, float2::Native_vec_ c, float2::Native_vec_ dx, float2::Native_vec_ dy);
- __device__ float4::Native_vec_ __ockl_image_sample_grad_2Da(unsigned int ADDRESS_SPACE_CONSTANT*i, unsigned int ADDRESS_SPACE_CONSTANT*s, float4::Native_vec_ c, float2::Native_vec_ dx, float2::Native_vec_ dy);
- __device__ float4::Native_vec_ __ockl_image_sample_grad_3D(unsigned int ADDRESS_SPACE_CONSTANT*i, unsigned int ADDRESS_SPACE_CONSTANT*s, float4::Native_vec_ c, float4::Native_vec_ dx, float4::Native_vec_ dy);
- __device__ float4::Native_vec_ __ockl_image_sample_lod_1D(unsigned int ADDRESS_SPACE_CONSTANT*i, unsigned int ADDRESS_SPACE_CONSTANT*s, float c, float l);
- __device__ float4::Native_vec_ __ockl_image_sample_lod_1Da(unsigned int ADDRESS_SPACE_CONSTANT*i, unsigned int ADDRESS_SPACE_CONSTANT*s, float2::Native_vec_ c, float l);
- __device__ float4::Native_vec_ __ockl_image_sample_lod_2D(unsigned int ADDRESS_SPACE_CONSTANT*i, unsigned int ADDRESS_SPACE_CONSTANT*s, float2::Native_vec_ c, float l);
- __device__ float4::Native_vec_ __ockl_image_sample_lod_2Da(unsigned int ADDRESS_SPACE_CONSTANT*i, unsigned int ADDRESS_SPACE_CONSTANT*s, float4::Native_vec_ c, float l);
- __device__ float4::Native_vec_ __ockl_image_sample_lod_3D(unsigned int ADDRESS_SPACE_CONSTANT*i, unsigned int ADDRESS_SPACE_CONSTANT*s, float4::Native_vec_ c, float l);
- __device__ float4::Native_vec_ __ockl_image_sample_lod_CM(unsigned int ADDRESS_SPACE_CONSTANT*i, unsigned int ADDRESS_SPACE_CONSTANT*s, float4::Native_vec_ c, float l);
- __device__ float4::Native_vec_ __ockl_image_sample_lod_CMa(unsigned int ADDRESS_SPACE_CONSTANT*i, unsigned int ADDRESS_SPACE_CONSTANT*s, float4::Native_vec_ c, float l);
- __device__ float4::Native_vec_ __ockl_image_gather4r_2D(unsigned int ADDRESS_SPACE_CONSTANT*i, unsigned int ADDRESS_SPACE_CONSTANT*s, float2::Native_vec_ c);
- __device__ float4::Native_vec_ __ockl_image_gather4g_2D(unsigned int ADDRESS_SPACE_CONSTANT*i, unsigned int ADDRESS_SPACE_CONSTANT*s, float2::Native_vec_ c);
- __device__ float4::Native_vec_ __ockl_image_gather4b_2D(unsigned int ADDRESS_SPACE_CONSTANT*i, unsigned int ADDRESS_SPACE_CONSTANT*s, float2::Native_vec_ c);
- __device__ float4::Native_vec_ __ockl_image_gather4a_2D(unsigned int ADDRESS_SPACE_CONSTANT*i, unsigned int ADDRESS_SPACE_CONSTANT*s, float2::Native_vec_ c);
- __device__ int __ockl_image_channel_data_type_1D(unsigned int ADDRESS_SPACE_CONSTANT* i);
- __device__ int __ockl_image_channel_data_type_1Da(unsigned int ADDRESS_SPACE_CONSTANT* i);
- __device__ int __ockl_image_channel_data_type_1Db(unsigned int ADDRESS_SPACE_CONSTANT* i);
- __device__ int __ockl_image_channel_data_type_2D(unsigned int ADDRESS_SPACE_CONSTANT* i);
- __device__ int __ockl_image_channel_data_type_2Da(unsigned int ADDRESS_SPACE_CONSTANT* i);
- __device__ int __ockl_image_channel_data_type_2Dad(unsigned int ADDRESS_SPACE_CONSTANT* i);
- __device__ int __ockl_image_channel_data_type_2Dd(unsigned int ADDRESS_SPACE_CONSTANT* i);
- __device__ int __ockl_image_channel_data_type_3D(unsigned int ADDRESS_SPACE_CONSTANT* i);
- __device__ int __ockl_image_channel_data_type_CM(unsigned int ADDRESS_SPACE_CONSTANT* i);
- __device__ int __ockl_image_channel_data_type_CMa(unsigned int ADDRESS_SPACE_CONSTANT* i);
- __device__ int __ockl_image_channel_order_1D(unsigned int ADDRESS_SPACE_CONSTANT* i);
- __device__ int __ockl_image_channel_order_1Da(unsigned int ADDRESS_SPACE_CONSTANT* i);
- __device__ int __ockl_image_channel_order_1Db(unsigned int ADDRESS_SPACE_CONSTANT* i);
- __device__ int __ockl_image_channel_order_2D(unsigned int ADDRESS_SPACE_CONSTANT* i);
- __device__ int __ockl_image_channel_order_2Da(unsigned int ADDRESS_SPACE_CONSTANT* i);
- __device__ int __ockl_image_channel_order_2Dad(unsigned int ADDRESS_SPACE_CONSTANT* i);
- __device__ int __ockl_image_channel_order_2Dd(unsigned int ADDRESS_SPACE_CONSTANT* i);
- __device__ int __ockl_image_channel_order_3D(unsigned int ADDRESS_SPACE_CONSTANT* i);
- __device__ int __ockl_image_channel_order_CM(unsigned int ADDRESS_SPACE_CONSTANT* i);
- __device__ int __ockl_image_channel_order_CMa(unsigned int ADDRESS_SPACE_CONSTANT* i);
- }
- /*
- Copyright (c) 2015 - 2023 Advanced Micro Devices, Inc. All rights reserved.
- Permission is hereby granted, free of charge, to any person obtaining a copy
- of this software and associated documentation files (the "Software"), to deal
- in the Software without restriction, including without limitation the rights
- to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
- copies of the Software, and to permit persons to whom the Software is
- furnished to do so, subject to the following conditions:
- The above copyright notice and this permission notice shall be included in
- all copies or substantial portions of the Software.
- THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
- AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
- OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
- THE SOFTWARE.
- */
- #pragma once
- #if defined(__cplusplus)
- #if !defined(__HIPCC_RTC__)
- #include <hip/hip_vector_types.h>
- #include <hip/hip_texture_types.h>
- #include <hip/amd_detail/ockl_image.h>
- #include <type_traits>
- #endif // !defined(__HIPCC_RTC__)
- #define TEXTURE_PARAMETERS_INIT \
- unsigned int ADDRESS_SPACE_CONSTANT* i = (unsigned int ADDRESS_SPACE_CONSTANT*)t.textureObject; \
- unsigned int ADDRESS_SPACE_CONSTANT* s = i + HIP_SAMPLER_OBJECT_OFFSET_DWORD;
- template<typename T>
- struct __hip_is_tex_surf_scalar_channel_type
- {
- static constexpr bool value =
- std::is_same<T, char>::value ||
- std::is_same<T, unsigned char>::value ||
- std::is_same<T, short>::value ||
- std::is_same<T, unsigned short>::value ||
- std::is_same<T, int>::value ||
- std::is_same<T, unsigned int>::value ||
- std::is_same<T, float>::value;
- };
- template<typename T>
- struct __hip_is_tex_surf_channel_type
- {
- static constexpr bool value =
- __hip_is_tex_surf_scalar_channel_type<T>::value;
- };
- template<
- typename T,
- unsigned int rank>
- struct __hip_is_tex_surf_channel_type<HIP_vector_type<T, rank>>
- {
- static constexpr bool value =
- __hip_is_tex_surf_scalar_channel_type<T>::value &&
- ((rank == 1) ||
- (rank == 2) ||
- (rank == 4));
- };
- template<typename T>
- struct __hip_is_tex_normalized_channel_type
- {
- static constexpr bool value =
- std::is_same<T, char>::value ||
- std::is_same<T, unsigned char>::value ||
- std::is_same<T, short>::value ||
- std::is_same<T, unsigned short>::value;
- };
- template<
- typename T,
- unsigned int rank>
- struct __hip_is_tex_normalized_channel_type<HIP_vector_type<T, rank>>
- {
- static constexpr bool value =
- __hip_is_tex_normalized_channel_type<T>::value &&
- ((rank == 1) ||
- (rank == 2) ||
- (rank == 4));
- };
- template <
- typename T,
- hipTextureReadMode readMode,
- typename Enable = void>
- struct __hip_tex_ret
- {
- static_assert(std::is_same<Enable, void>::value, "Invalid channel type!");
- };
- /*
- * Map from device function return U to scalar texture type T
- */
- template<typename T, typename U>
- __forceinline__ __device__
- typename std::enable_if<
- __hip_is_tex_surf_scalar_channel_type<T>::value, const T>::type
- __hipMapFrom(const U &u) {
- if constexpr (sizeof(T) < sizeof(float)) {
- union {
- U u;
- int i;
- } d = { u };
- return static_cast<T>(d.i);
- } else { // sizeof(T) == sizeof(float)
- union {
- U u;
- T t;
- } d = { u };
- return d.t;
- }
- }
- /*
- * Map from device function return U to vector texture type T
- */
- template<typename T, typename U>
- __forceinline__ __device__
- typename std::enable_if<
- __hip_is_tex_surf_scalar_channel_type<typename T::value_type>::value, const T>::type
- __hipMapFrom(const U &u) {
- if constexpr (sizeof(typename T::value_type) < sizeof(float)) {
- union {
- U u;
- int4 i4;
- } d = { u };
- return __hipMapVector<typename T::value_type, sizeof(T)/sizeof(typename T::value_type)>(d.i4);
- } else { // sizeof(typename T::value_type) == sizeof(float)
- union {
- U u;
- T t;
- } d = { u };
- return d.t;
- }
- }
- /*
- * Map from scalar texture type T to device function input U
- */
- template<typename U, typename T>
- __forceinline__ __device__
- typename std::enable_if<
- __hip_is_tex_surf_scalar_channel_type<T>::value, const U>::type
- __hipMapTo(const T &t) {
- if constexpr (sizeof(T) < sizeof(float)) {
- union {
- U u;
- int i;
- } d = { 0 };
- d.i = static_cast<int>(t);
- return d.u;
- } else { // sizeof(T) == sizeof(float)
- union {
- U u;
- T t;
- } d = { 0 };
- d.t = t;
- return d.u;
- }
- }
- /*
- * Map from vector texture type T to device function input U
- */
- template<typename U, typename T>
- __forceinline__ __device__
- typename std::enable_if<
- __hip_is_tex_surf_scalar_channel_type<typename T::value_type>::value, const U>::type
- __hipMapTo(const T &t) {
- if constexpr (sizeof(typename T::value_type) < sizeof(float)) {
- union {
- U u;
- int4 i4;
- } d = { 0 };
- d.i4 = __hipMapVector<int, 4>(t);
- return d.u;
- } else { // sizeof(typename T::value_type) == sizeof(float)
- union {
- U u;
- T t;
- } d = { 0 };
- d.t = t;
- return d.u;
- }
- }
- template <
- typename T,
- hipTextureReadMode readMode>
- using __hip_tex_ret_t = typename __hip_tex_ret<T, readMode, bool>::type;
- template <typename T>
- struct __hip_tex_ret<
- T,
- hipReadModeElementType,
- typename std::enable_if<__hip_is_tex_surf_channel_type<T>::value, bool>::type>
- {
- using type = T;
- };
- template<
- typename T,
- unsigned int rank>
- struct __hip_tex_ret<
- HIP_vector_type<T, rank>,
- hipReadModeElementType,
- typename std::enable_if<__hip_is_tex_surf_channel_type<HIP_vector_type<T, rank>>::value, bool>::type>
- {
- using type = HIP_vector_type<__hip_tex_ret_t<T, hipReadModeElementType>, rank>;
- };
- template<typename T>
- struct __hip_tex_ret<
- T,
- hipReadModeNormalizedFloat,
- typename std::enable_if<__hip_is_tex_normalized_channel_type<T>::value, bool>::type>
- {
- using type = float;
- };
- template<
- typename T,
- unsigned int rank>
- struct __hip_tex_ret<
- HIP_vector_type<T, rank>,
- hipReadModeNormalizedFloat,
- typename std::enable_if<__hip_is_tex_normalized_channel_type<HIP_vector_type<T, rank>>::value, bool>::type>
- {
- using type = HIP_vector_type<__hip_tex_ret_t<T, hipReadModeNormalizedFloat>, rank>;
- };
- template <typename T, hipTextureReadMode readMode>
- static __forceinline__ __device__ __hip_img_chk__ __hip_tex_ret_t<T, readMode> tex1Dfetch(texture<T, hipTextureType1D, readMode> t, int x)
- {
- TEXTURE_PARAMETERS_INIT;
- auto tmp = __ockl_image_load_1Db(i, x);
- return __hipMapFrom<__hip_tex_ret_t<T, readMode>>(tmp);
- }
- template <typename T, hipTextureReadMode readMode>
- static __forceinline__ __device__ __hip_img_chk__ __hip_tex_ret_t<T, readMode> tex1D(texture<T, hipTextureType1D, readMode> t, float x)
- {
- TEXTURE_PARAMETERS_INIT;
- auto tmp = __ockl_image_sample_1D(i, s, x);
- return __hipMapFrom<__hip_tex_ret_t<T, readMode>>(tmp);
- }
- template <typename T, hipTextureReadMode readMode>
- static __forceinline__ __device__ __hip_img_chk__ __hip_tex_ret_t<T, readMode> tex2D(texture<T, hipTextureType2D, readMode> t, float x, float y)
- {
- TEXTURE_PARAMETERS_INIT;
- auto tmp = __ockl_image_sample_2D(i, s, float2(x, y).data);
- return __hipMapFrom<__hip_tex_ret_t<T, readMode>>(tmp);
- }
- template <typename T, hipTextureReadMode readMode>
- static __forceinline__ __device__ __hip_img_chk__ __hip_tex_ret_t<T, readMode> tex1DLayered(texture<T, hipTextureType1DLayered, readMode> t, float x, int layer)
- {
- TEXTURE_PARAMETERS_INIT;
- auto tmp = __ockl_image_sample_1Da(i, s, float2(x, layer).data);
- return __hipMapFrom<__hip_tex_ret_t<T, readMode>>(tmp);
- }
- template <typename T, hipTextureReadMode readMode>
- static __forceinline__ __device__ __hip_img_chk__ __hip_tex_ret_t<T, readMode> tex2DLayered(texture<T, hipTextureType2DLayered, readMode> t, float x, float y, int layer)
- {
- TEXTURE_PARAMETERS_INIT;
- auto tmp = __ockl_image_sample_2Da(i, s, float4(x, y, layer, 0.0f).data);
- return __hipMapFrom<__hip_tex_ret_t<T, readMode>>(tmp);
- }
- template <typename T, hipTextureReadMode readMode>
- static __forceinline__ __device__ __hip_img_chk__ __hip_tex_ret_t<T, readMode> tex3D(texture<T, hipTextureType3D, readMode> t, float x, float y, float z)
- {
- TEXTURE_PARAMETERS_INIT;
- auto tmp = __ockl_image_sample_3D(i, s, float4(x, y, z, 0.0f).data);
- return __hipMapFrom<__hip_tex_ret_t<T, readMode>>(tmp);
- }
- template <typename T, hipTextureReadMode readMode>
- static __forceinline__ __device__ __hip_img_chk__ __hip_tex_ret_t<T, readMode> texCubemap(texture<T, hipTextureTypeCubemap, readMode> t, float x, float y, float z)
- {
- TEXTURE_PARAMETERS_INIT;
- auto tmp = __ockl_image_sample_CM(i, s, float4(x, y, z, 0.0f).data);
- return __hipMapFrom<__hip_tex_ret_t<T, readMode>>(tmp);
- }
- template <typename T, hipTextureReadMode readMode>
- static __forceinline__ __device__ __hip_img_chk__ __hip_tex_ret_t<T, readMode> tex1DLod(texture<T, hipTextureType1D, readMode> t, float x, float level)
- {
- TEXTURE_PARAMETERS_INIT;
- auto tmp = __ockl_image_sample_lod_1D(i, s, x, level);
- return __hipMapFrom<__hip_tex_ret_t<T, readMode>>(tmp);
- }
- template <typename T, hipTextureReadMode readMode>
- static __forceinline__ __device__ __hip_img_chk__ __hip_tex_ret_t<T, readMode> tex2DLod(texture<T, hipTextureType2D, readMode> t, float x, float y, float level)
- {
- TEXTURE_PARAMETERS_INIT;
- auto tmp = __ockl_image_sample_lod_2D(i, s, float2(x, y).data, level);
- return __hipMapFrom<__hip_tex_ret_t<T, readMode>>(tmp);
- }
- template <typename T, hipTextureReadMode readMode>
- static __forceinline__ __device__ __hip_img_chk__ __hip_tex_ret_t<T, readMode> tex1DLayeredLod(texture<T, hipTextureType1DLayered, readMode> t, float x, int layer, float level)
- {
- TEXTURE_PARAMETERS_INIT;
- auto tmp = __ockl_image_sample_lod_1Da(i, s, float2(x, layer).data, level);
- return __hipMapFrom<__hip_tex_ret_t<T, readMode>>(tmp);
- }
- template <typename T, hipTextureReadMode readMode>
- static __forceinline__ __device__ __hip_img_chk__ __hip_tex_ret_t<T, readMode> tex2DLayeredLod(texture<T, hipTextureType2DLayered, readMode> t, float x, float y, int layer, float level)
- {
- TEXTURE_PARAMETERS_INIT;
- auto tmp = __ockl_image_sample_lod_2Da(i, s, float4(x, y, layer, 0.0f).data, level);
- return __hipMapFrom<__hip_tex_ret_t<T, readMode>>(tmp);
- }
- template <typename T, hipTextureReadMode readMode>
- static __forceinline__ __device__ __hip_img_chk__ __hip_tex_ret_t<T, readMode> tex3DLod(texture<T, hipTextureType3D, readMode> t, float x, float y, float z, float level)
- {
- TEXTURE_PARAMETERS_INIT;
- auto tmp = __ockl_image_sample_lod_3D(i, s, float4(x, y, z, 0.0f).data, level);
- return __hipMapFrom<__hip_tex_ret_t<T, readMode>>(tmp);
- }
- template <typename T, hipTextureReadMode readMode>
- static __forceinline__ __device__ __hip_img_chk__ __hip_tex_ret_t<T, readMode> texCubemapLod(texture<T, hipTextureTypeCubemap, readMode> t, float x, float y, float z, float level)
- {
- TEXTURE_PARAMETERS_INIT;
- auto tmp = __ockl_image_sample_lod_CM(i, s, float4(x, y, z, 0.0f).data, level);
- return __hipMapFrom<__hip_tex_ret_t<T, readMode>>(tmp);
- }
- template <typename T, hipTextureReadMode readMode>
- static __forceinline__ __device__ __hip_img_chk__ __hip_tex_ret_t<T, readMode> texCubemapLayered(texture<T, hipTextureTypeCubemapLayered, readMode> t, float x, float y, float z, int layer)
- {
- TEXTURE_PARAMETERS_INIT;
- auto tmp = __ockl_image_sample_CMa(i, s, float4(x, y, z, layer).data);
- return __hipMapFrom<__hip_tex_ret_t<T, readMode>>(tmp);
- }
- template <typename T, hipTextureReadMode readMode>
- static __forceinline__ __device__ __hip_img_chk__ __hip_tex_ret_t<T, readMode> texCubemapLayeredLod(texture<T, hipTextureTypeCubemapLayered, readMode> t, float x, float y, float z, int layer, float level)
- {
- TEXTURE_PARAMETERS_INIT;
- auto tmp = __ockl_image_sample_lod_CMa(i, s, float4(x, y, z, layer).data, level);
- return __hipMapFrom<__hip_tex_ret_t<T, readMode>>(tmp);
- }
- template <typename T, hipTextureReadMode readMode>
- static __forceinline__ __device__ __hip_img_chk__ __hip_tex_ret_t<T, readMode> texCubemapGrad(texture<T, hipTextureTypeCubemap, readMode> t, float x, float y, float z, float4 dPdx, float4 dPdy)
- {
- TEXTURE_PARAMETERS_INIT;
- // TODO missing in device libs.
- // auto tmp = __ockl_image_sample_grad_CM(i, s, float4(x, y, z, 0.0f).data, float4(dPdx.x, dPdx.y, dPdx.z, 0.0f).data, float4(dPdy.x, dPdy.y, dPdy.z, 0.0f).data);
- // return __hipMapFrom<__hip_tex_ret_t<T, readMode>>(tmp);
- return {};
- }
- template <typename T, hipTextureReadMode readMode>
- static __forceinline__ __device__ __hip_img_chk__ __hip_tex_ret_t<T, readMode> texCubemapLayeredGrad(texture<T, hipTextureTypeCubemapLayered, readMode> t, float x, float y, float z, int layer, float4 dPdx, float4 dPdy)
- {
- TEXTURE_PARAMETERS_INIT;
- // TODO missing in device libs.
- // auto tmp = __ockl_image_sample_grad_CMa(i, s, float4(x, y, z, layer).data, float4(dPdx.x, dPdx.y, dPdx.z, 0.0f).data, float4(dPdy.x, dPdy.y, dPdy.z, 0.0f).data);
- // return __hipMapFrom<__hip_tex_ret_t<T, readMode>>(tmp);
- return {};
- }
- template <typename T, hipTextureReadMode readMode>
- static __forceinline__ __device__ __hip_img_chk__ __hip_tex_ret_t<T, readMode> tex1DGrad(texture<T, hipTextureType1D, readMode> t, float x, float dPdx, float dPdy)
- {
- TEXTURE_PARAMETERS_INIT;
- auto tmp = __ockl_image_sample_grad_1D(i, s, x, dPdx, dPdy);
- return __hipMapFrom<__hip_tex_ret_t<T, readMode>>(tmp);
- }
- template <typename T, hipTextureReadMode readMode>
- static __forceinline__ __device__ __hip_img_chk__ __hip_tex_ret_t<T, readMode> tex2DGrad(texture<T, hipTextureType2D, readMode> t, float x, float y, float2 dPdx, float2 dPdy)
- {
- TEXTURE_PARAMETERS_INIT;
- auto tmp = __ockl_image_sample_grad_2D(i, s, float2(x, y).data, float2(dPdx.x, dPdx.y).data, float2(dPdy.x, dPdy.y).data);
- return __hipMapFrom<__hip_tex_ret_t<T, readMode>>(tmp);
- }
- template <typename T, hipTextureReadMode readMode>
- static __forceinline__ __device__ __hip_img_chk__ __hip_tex_ret_t<T, readMode> tex1DLayeredGrad(texture<T, hipTextureType1DLayered, readMode> t, float x, int layer, float dPdx, float dPdy)
- {
- TEXTURE_PARAMETERS_INIT;
- auto tmp = __ockl_image_sample_grad_1Da(i, s, float2(x, layer).data, dPdx, dPdy);
- return __hipMapFrom<__hip_tex_ret_t<T, readMode>>(tmp);
- }
- template <typename T, hipTextureReadMode readMode>
- static __forceinline__ __device__ __hip_img_chk__ __hip_tex_ret_t<T, readMode> tex2DLayeredGrad(texture<T, hipTextureType2DLayered, readMode> t, float x, float y, int layer, float2 dPdx, float2 dPdy)
- {
- TEXTURE_PARAMETERS_INIT;
- auto tmp = __ockl_image_sample_grad_2Da(i, s, float4(x, y, layer, 0.0f).data, float2(dPdx.x, dPdx.y).data, float2(dPdy.x, dPdy.y).data);
- return __hipMapFrom<__hip_tex_ret_t<T, readMode>>(tmp);
- }
- template <typename T, hipTextureReadMode readMode>
- static __forceinline__ __device__ __hip_img_chk__ __hip_tex_ret_t<T, readMode> tex3DGrad(texture<T, hipTextureType3D, readMode> t, float x, float y, float z, float4 dPdx, float4 dPdy)
- {
- TEXTURE_PARAMETERS_INIT;
- auto tmp = __ockl_image_sample_grad_3D(i, s, float4(x, y, z, 0.0f).data, float4(dPdx.x, dPdx.y, dPdx.z, 0.0f).data, float4(dPdy.x, dPdy.y, dPdy.z, 0.0f).data);
- return __hipMapFrom<__hip_tex_ret_t<T, readMode>>(tmp);
- }
- template <
- typename T,
- hipTextureReadMode readMode,
- typename Enable = void>
- struct __hip_tex2dgather_ret
- {
- static_assert(std::is_same<Enable, void>::value, "Invalid channel type!");
- };
- template <
- typename T,
- hipTextureReadMode readMode>
- using __hip_tex2dgather_ret_t = typename __hip_tex2dgather_ret<T, readMode, bool>::type;
- template <typename T>
- struct __hip_tex2dgather_ret<
- T,
- hipReadModeElementType,
- typename std::enable_if<__hip_is_tex_surf_channel_type<T>::value, bool>::type>
- {
- using type = HIP_vector_type<T, 4>;
- };
- template<
- typename T,
- unsigned int rank>
- struct __hip_tex2dgather_ret<
- HIP_vector_type<T, rank>,
- hipReadModeElementType,
- typename std::enable_if<__hip_is_tex_surf_channel_type<HIP_vector_type<T, rank>>::value, bool>::type>
- {
- using type = HIP_vector_type<T, 4>;
- };
- template <typename T>
- struct __hip_tex2dgather_ret<
- T,
- hipReadModeNormalizedFloat,
- typename std::enable_if<__hip_is_tex_normalized_channel_type<T>::value, bool>::type>
- {
- using type = float4;
- };
- template <typename T, hipTextureReadMode readMode>
- static __forceinline__ __device__ __hip_img_chk__ __hip_tex2dgather_ret_t<T, readMode> tex2Dgather(texture<T, hipTextureType2D, readMode> t, float x, float y, int comp=0)
- {
- TEXTURE_PARAMETERS_INIT;
- switch (comp) {
- case 1: {
- auto tmp = __ockl_image_gather4g_2D(i, s, float2(x, y).data);
- return __hipMapFrom<__hip_tex2dgather_ret_t<T, readMode>>(tmp);
- }
- case 2: {
- auto tmp = __ockl_image_gather4b_2D(i, s, float2(x, y).data);
- return __hipMapFrom<__hip_tex2dgather_ret_t<T, readMode>>(tmp);
- }
- case 3: {
- auto tmp = __ockl_image_gather4a_2D(i, s, float2(x, y).data);
- return __hipMapFrom<__hip_tex2dgather_ret_t<T, readMode>>(tmp);
- }
- default: {
- auto tmp = __ockl_image_gather4r_2D(i, s, float2(x, y).data);
- return __hipMapFrom<__hip_tex2dgather_ret_t<T, readMode>>(tmp);
- }
- }
- return {};
- }
- #endif
- /*
- Copyright (c) 2015 - 2023 Advanced Micro Devices, Inc. All rights reserved.
- Permission is hereby granted, free of charge, to any person obtaining a copy
- of this software and associated documentation files (the "Software"), to deal
- in the Software without restriction, including without limitation the rights
- to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
- copies of the Software, and to permit persons to whom the Software is
- furnished to do so, subject to the following conditions:
- The above copyright notice and this permission notice shall be included in
- all copies or substantial portions of the Software.
- THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
- AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
- OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
- THE SOFTWARE.
- */
- #pragma once
- #if defined(__cplusplus)
- #if !defined(__HIPCC_RTC__)
- #include <hip/hip_vector_types.h>
- #include <hip/hip_texture_types.h>
- #include <hip/amd_detail/texture_fetch_functions.h>
- #include <hip/amd_detail/ockl_image.h>
- #include <type_traits>
- #endif // !defined(__HIPCC_RTC__)
- #define TEXTURE_OBJECT_PARAMETERS_INIT \
- unsigned int ADDRESS_SPACE_CONSTANT* i = (unsigned int ADDRESS_SPACE_CONSTANT*)textureObject; \
- unsigned int ADDRESS_SPACE_CONSTANT* s = i + HIP_SAMPLER_OBJECT_OFFSET_DWORD;
- template <
- typename T,
- typename std::enable_if<__hip_is_tex_surf_channel_type<T>::value>::type* = nullptr>
- static __device__ __hip_img_chk__ T tex1Dfetch(hipTextureObject_t textureObject, int x)
- {
- TEXTURE_OBJECT_PARAMETERS_INIT
- auto tmp = __ockl_image_load_1Db(i, x);
- return __hipMapFrom<T>(tmp);
- }
- template <
- typename T,
- typename std::enable_if<__hip_is_tex_surf_channel_type<T>::value>::type* = nullptr>
- static __device__ __hip_img_chk__ void tex1Dfetch(T *ptr, hipTextureObject_t textureObject, int x)
- {
- *ptr = tex1Dfetch<T>(textureObject, x);
- }
- template <
- typename T,
- typename std::enable_if<__hip_is_tex_surf_channel_type<T>::value>::type* = nullptr>
- static __device__ __hip_img_chk__ T tex1D(hipTextureObject_t textureObject, float x)
- {
- TEXTURE_OBJECT_PARAMETERS_INIT
- auto tmp = __ockl_image_sample_1D(i, s, x);
- return __hipMapFrom<T>(tmp);
- }
- template <
- typename T,
- typename std::enable_if<__hip_is_tex_surf_channel_type<T>::value>::type* = nullptr>
- static __device__ __hip_img_chk__ void tex1D(T *ptr, hipTextureObject_t textureObject, float x)
- {
- *ptr = tex1D<T>(textureObject, x);
- }
- template <
- typename T,
- typename std::enable_if<__hip_is_tex_surf_channel_type<T>::value>::type* = nullptr>
- static __device__ __hip_img_chk__ T tex2D(hipTextureObject_t textureObject, float x, float y)
- {
- TEXTURE_OBJECT_PARAMETERS_INIT
- auto tmp = __ockl_image_sample_2D(i, s, float2(x, y).data);
- return __hipMapFrom<T>(tmp);
- }
- template <
- typename T,
- typename std::enable_if<__hip_is_tex_surf_channel_type<T>::value>::type* = nullptr>
- static __device__ __hip_img_chk__ void tex2D(T *ptr, hipTextureObject_t textureObject, float x, float y)
- {
- *ptr = tex2D<T>(textureObject, x, y);
- }
- template <
- typename T,
- typename std::enable_if<__hip_is_tex_surf_channel_type<T>::value>::type* = nullptr>
- static __device__ __hip_img_chk__ T tex3D(hipTextureObject_t textureObject, float x, float y, float z)
- {
- TEXTURE_OBJECT_PARAMETERS_INIT
- auto tmp = __ockl_image_sample_3D(i, s, float4(x, y, z, 0.0f).data);
- return __hipMapFrom<T>(tmp);
- }
- template <
- typename T,
- typename std::enable_if<__hip_is_tex_surf_channel_type<T>::value>::type* = nullptr>
- static __device__ __hip_img_chk__ void tex3D(T *ptr, hipTextureObject_t textureObject, float x, float y, float z)
- {
- *ptr = tex3D<T>(textureObject, x, y, z);
- }
- template <
- typename T,
- typename std::enable_if<__hip_is_tex_surf_channel_type<T>::value>::type* = nullptr>
- static __device__ __hip_img_chk__ T tex1DLayered(hipTextureObject_t textureObject, float x, int layer)
- {
- TEXTURE_OBJECT_PARAMETERS_INIT
- auto tmp = __ockl_image_sample_1Da(i, s, float2(x, layer).data);
- return __hipMapFrom<T>(tmp);
- }
- template <
- typename T,
- typename std::enable_if<__hip_is_tex_surf_channel_type<T>::value>::type* = nullptr>
- static __device__ __hip_img_chk__ void tex1DLayered(T *ptr, hipTextureObject_t textureObject, float x, int layer)
- {
- *ptr = tex1DLayered<T>(textureObject, x, layer);
- }
- template <
- typename T,
- typename std::enable_if<__hip_is_tex_surf_channel_type<T>::value>::type* = nullptr>
- static __device__ __hip_img_chk__ T tex2DLayered(hipTextureObject_t textureObject, float x, float y, int layer)
- {
- TEXTURE_OBJECT_PARAMETERS_INIT
- auto tmp = __ockl_image_sample_2Da(i, s, float4(x, y, layer, 0.0f).data);
- return __hipMapFrom<T>(tmp);
- }
- template <
- typename T,
- typename std::enable_if<__hip_is_tex_surf_channel_type<T>::value>::type* = nullptr>
- static __device__ __hip_img_chk__ void tex2DLayered(T *ptr, hipTextureObject_t textureObject, float x, float y, int layer)
- {
- *ptr = tex1DLayered<T>(textureObject, x, y, layer);
- }
- template <
- typename T,
- typename std::enable_if<__hip_is_tex_surf_channel_type<T>::value>::type* = nullptr>
- static __device__ __hip_img_chk__ T texCubemap(hipTextureObject_t textureObject, float x, float y, float z)
- {
- TEXTURE_OBJECT_PARAMETERS_INIT
- auto tmp = __ockl_image_sample_CM(i, s, float4(x, y, z, 0.0f).data);
- return __hipMapFrom<T>(tmp);
- }
- template <
- typename T,
- typename std::enable_if<__hip_is_tex_surf_channel_type<T>::value>::type* = nullptr>
- static __device__ __hip_img_chk__ void texCubemap(T *ptr, hipTextureObject_t textureObject, float x, float y, float z)
- {
- *ptr = texCubemap<T>(textureObject, x, y, z);
- }
- template <
- typename T,
- typename std::enable_if<__hip_is_tex_surf_channel_type<T>::value>::type* = nullptr>
- static __device__ __hip_img_chk__ T texCubemapLayered(hipTextureObject_t textureObject, float x, float y, float z, int layer)
- {
- TEXTURE_OBJECT_PARAMETERS_INIT
- auto tmp = __ockl_image_sample_CMa(i, s, float4(x, y, z, layer).data);
- return __hipMapFrom<T>(tmp);
- }
- template <
- typename T,
- typename std::enable_if<__hip_is_tex_surf_channel_type<T>::value>::type* = nullptr>
- static __device__ __hip_img_chk__ void texCubemapLayered(T *ptr, hipTextureObject_t textureObject, float x, float y, float z, int layer)
- {
- *ptr = texCubemapLayered<T>(textureObject, x, y, z, layer);
- }
- template <
- typename T,
- typename std::enable_if<__hip_is_tex_surf_channel_type<T>::value>::type* = nullptr>
- static __device__ __hip_img_chk__ T tex2Dgather(hipTextureObject_t textureObject, float x, float y, int comp = 0)
- {
- TEXTURE_OBJECT_PARAMETERS_INIT
- switch (comp) {
- case 1: {
- auto tmp = __ockl_image_gather4r_2D(i, s, float2(x, y).data);
- return __hipMapFrom<T>(tmp);
- break;
- }
- case 2: {
- auto tmp = __ockl_image_gather4g_2D(i, s, float2(x, y).data);
- return __hipMapFrom<T>(tmp);
- break;
- }
- case 3: {
- auto tmp = __ockl_image_gather4b_2D(i, s, float2(x, y).data);
- return __hipMapFrom<T>(tmp);
- break;
- }
- default: {
- auto tmp = __ockl_image_gather4a_2D(i, s, float2(x, y).data);
- return __hipMapFrom<T>(tmp);
- break;
- }
- }
- return {};
- }
- template <
- typename T,
- typename std::enable_if<__hip_is_tex_surf_channel_type<T>::value>::type* = nullptr>
- static __device__ __hip_img_chk__ void tex2Dgather(T *ptr, hipTextureObject_t textureObject, float x, float y, int comp = 0)
- {
- *ptr = texCubemapLayered<T>(textureObject, x, y, comp);
- }
- template <
- typename T,
- typename std::enable_if<__hip_is_tex_surf_channel_type<T>::value>::type* = nullptr>
- static __device__ __hip_img_chk__ T tex1DLod(hipTextureObject_t textureObject, float x, float level)
- {
- TEXTURE_OBJECT_PARAMETERS_INIT
- auto tmp = __ockl_image_sample_lod_1D(i, s, x, level);
- return __hipMapFrom<T>(tmp);
- }
- template <
- typename T,
- typename std::enable_if<__hip_is_tex_surf_channel_type<T>::value>::type* = nullptr>
- static __device__ __hip_img_chk__ void tex1DLod(T *ptr, hipTextureObject_t textureObject, float x, float level)
- {
- *ptr = tex1DLod<T>(textureObject, x, level);
- }
- template <
- typename T,
- typename std::enable_if<__hip_is_tex_surf_channel_type<T>::value>::type* = nullptr>
- static __device__ __hip_img_chk__ T tex2DLod(hipTextureObject_t textureObject, float x, float y, float level)
- {
- TEXTURE_OBJECT_PARAMETERS_INIT
- auto tmp = __ockl_image_sample_lod_2D(i, s, float2(x, y).data, level);
- return __hipMapFrom<T>(tmp);
- }
- template <
- typename T,
- typename std::enable_if<__hip_is_tex_surf_channel_type<T>::value>::type* = nullptr>
- static __device__ __hip_img_chk__ void tex2DLod(T *ptr, hipTextureObject_t textureObject, float x, float y, float level)
- {
- *ptr = tex2DLod<T>(textureObject, x, y, level);
- }
- template <
- typename T,
- typename std::enable_if<__hip_is_tex_surf_channel_type<T>::value>::type* = nullptr>
- static __device__ __hip_img_chk__ T tex3DLod(hipTextureObject_t textureObject, float x, float y, float z, float level)
- {
- TEXTURE_OBJECT_PARAMETERS_INIT
- auto tmp = __ockl_image_sample_lod_3D(i, s, float4(x, y, z, 0.0f).data, level);
- return __hipMapFrom<T>(tmp);
- }
- template <
- typename T,
- typename std::enable_if<__hip_is_tex_surf_channel_type<T>::value>::type* = nullptr>
- static __device__ __hip_img_chk__ void tex3DLod(T *ptr, hipTextureObject_t textureObject, float x, float y, float z, float level)
- {
- *ptr = tex3DLod<T>(textureObject, x, y, z, level);
- }
- template <
- typename T,
- typename std::enable_if<__hip_is_tex_surf_channel_type<T>::value>::type* = nullptr>
- static __device__ __hip_img_chk__ T tex1DLayeredLod(hipTextureObject_t textureObject, float x, int layer, float level)
- {
- TEXTURE_OBJECT_PARAMETERS_INIT
- auto tmp = __ockl_image_sample_1Da(i, s, float2(x, layer).data);
- return __hipMapFrom<T>(tmp);
- }
- template <
- typename T,
- typename std::enable_if<__hip_is_tex_surf_channel_type<T>::value>::type* = nullptr>
- static __device__ __hip_img_chk__ void tex1DLayeredLod(T *ptr, hipTextureObject_t textureObject, float x, int layer, float level)
- {
- *ptr = tex1DLayeredLod<T>(textureObject, x, layer, level);
- }
- template <
- typename T,
- typename std::enable_if<__hip_is_tex_surf_channel_type<T>::value>::type* = nullptr>
- static __device__ __hip_img_chk__ T tex2DLayeredLod(hipTextureObject_t textureObject, float x, float y, int layer, float level)
- {
- TEXTURE_OBJECT_PARAMETERS_INIT
- auto tmp = __ockl_image_sample_2Da(i, s, float4(x, y, layer, 0.0f).data);
- return __hipMapFrom<T>(tmp);
- }
- template <
- typename T,
- typename std::enable_if<__hip_is_tex_surf_channel_type<T>::value>::type* = nullptr>
- static __device__ __hip_img_chk__ void tex2DLayeredLod(T *ptr, hipTextureObject_t textureObject, float x, float y, int layer, float level)
- {
- *ptr = tex2DLayeredLod<T>(textureObject, x, y, layer, level);
- }
- template <
- typename T,
- typename std::enable_if<__hip_is_tex_surf_channel_type<T>::value>::type* = nullptr>
- static __device__ __hip_img_chk__ T texCubemapLod(hipTextureObject_t textureObject, float x, float y, float z, float level)
- {
- TEXTURE_OBJECT_PARAMETERS_INIT
- auto tmp = __ockl_image_sample_lod_CM(i, s, float4(x, y, z, 0.0f).data, level);
- return __hipMapFrom<T>(tmp);
- }
- template <
- typename T,
- typename std::enable_if<__hip_is_tex_surf_channel_type<T>::value>::type* = nullptr>
- static __device__ __hip_img_chk__ void texCubemapLod(T *ptr, hipTextureObject_t textureObject, float x, float y, float z, float level)
- {
- *ptr = texCubemapLod<T>(textureObject, x, y, z, level);
- }
- template <
- typename T,
- typename std::enable_if<__hip_is_tex_surf_channel_type<T>::value>::type* = nullptr>
- static __device__ __hip_img_chk__ T texCubemapGrad(hipTextureObject_t textureObject, float x, float y, float z, float4 dPdx, float4 dPdy)
- {
- TEXTURE_OBJECT_PARAMETERS_INIT
- // TODO missing in device libs.
- // auto tmp = __ockl_image_sample_grad_CM(i, s, float4(x, y, z, 0.0f).data, float4(dPdx.x, dPdx.y, dPdx.z, 0.0f).data, float4(dPdy.x, dPdy.y, dPdy.z, 0.0f).data);
- // return __hipMapFrom<T>(tmp);
- return {};
- }
- template <
- typename T,
- typename std::enable_if<__hip_is_tex_surf_channel_type<T>::value>::type* = nullptr>
- static __device__ __hip_img_chk__ void texCubemapGrad(T *ptr, hipTextureObject_t textureObject, float x, float y, float z, float4 dPdx, float4 dPdy)
- {
- *ptr = texCubemapGrad<T>(textureObject, x, y, z, dPdx, dPdy);
- }
- template <
- typename T,
- typename std::enable_if<__hip_is_tex_surf_channel_type<T>::value>::type* = nullptr>
- static __device__ __hip_img_chk__ T texCubemapLayeredLod(hipTextureObject_t textureObject, float x, float y, float z, int layer, float level)
- {
- TEXTURE_OBJECT_PARAMETERS_INIT
- auto tmp = __ockl_image_sample_lod_CMa(i, s, float4(x, y, z, layer).data, level);
- return __hipMapFrom<T>(tmp);
- }
- template <
- typename T,
- typename std::enable_if<__hip_is_tex_surf_channel_type<T>::value>::type* = nullptr>
- static __device__ __hip_img_chk__ void texCubemapLayeredLod(T *ptr, hipTextureObject_t textureObject, float x, float y, float z, int layer, float level)
- {
- *ptr = texCubemapLayeredLod<T>(textureObject, x, y, z, layer, level);
- }
- template <
- typename T,
- typename std::enable_if<__hip_is_tex_surf_channel_type<T>::value>::type* = nullptr>
- static __device__ __hip_img_chk__ T tex1DGrad(hipTextureObject_t textureObject, float x, float dPdx, float dPdy)
- {
- TEXTURE_OBJECT_PARAMETERS_INIT
- auto tmp = __ockl_image_sample_grad_1D(i, s, x, dPdx, dPdy);
- return __hipMapFrom<T>(tmp);
- }
- template <
- typename T,
- typename std::enable_if<__hip_is_tex_surf_channel_type<T>::value>::type* = nullptr>
- static __device__ __hip_img_chk__ void tex1DGrad(T *ptr, hipTextureObject_t textureObject, float x, float dPdx, float dPdy)
- {
- *ptr = tex1DGrad<T>(textureObject, x, dPdx, dPdy);
- }
- template <
- typename T,
- typename std::enable_if<__hip_is_tex_surf_channel_type<T>::value>::type* = nullptr>
- static __device__ __hip_img_chk__ T tex2DGrad(hipTextureObject_t textureObject, float x, float y, float2 dPdx, float2 dPdy)
- {
- TEXTURE_OBJECT_PARAMETERS_INIT
- auto tmp = __ockl_image_sample_grad_2D(i, s, float2(x, y).data, float2(dPdx.x, dPdx.y).data, float2(dPdy.x, dPdy.y).data);
- return __hipMapFrom<T>(tmp);
- }
- template <
- typename T,
- typename std::enable_if<__hip_is_tex_surf_channel_type<T>::value>::type* = nullptr>
- static __device__ __hip_img_chk__ void tex2DGrad(T *ptr, hipTextureObject_t textureObject, float x, float y, float2 dPdx, float2 dPdy)
- {
- *ptr = tex2DGrad<T>(textureObject, x, y, dPdx, dPdy);
- }
- template <
- typename T,
- typename std::enable_if<__hip_is_tex_surf_channel_type<T>::value>::type* = nullptr>
- static __device__ __hip_img_chk__ T tex3DGrad(hipTextureObject_t textureObject, float x, float y, float z, float4 dPdx, float4 dPdy)
- {
- TEXTURE_OBJECT_PARAMETERS_INIT
- auto tmp = __ockl_image_sample_grad_3D(i, s, float4(x, y, z, 0.0f).data, float4(dPdx.x, dPdx.y, dPdx.z, 0.0f).data, float4(dPdy.x, dPdy.y, dPdy.z, 0.0f).data);
- return __hipMapFrom<T>(tmp);
- }
- template <
- typename T,
- typename std::enable_if<__hip_is_tex_surf_channel_type<T>::value>::type* = nullptr>
- static __device__ __hip_img_chk__ void tex3DGrad(T *ptr, hipTextureObject_t textureObject, float x, float y, float z, float4 dPdx, float4 dPdy)
- {
- *ptr = tex3DGrad<T>(textureObject, x, y, z, dPdx, dPdy);
- }
- template <
- typename T,
- typename std::enable_if<__hip_is_tex_surf_channel_type<T>::value>::type* = nullptr>
- static __device__ __hip_img_chk__ T tex1DLayeredGrad(hipTextureObject_t textureObject, float x, int layer, float dPdx, float dPdy)
- {
- TEXTURE_OBJECT_PARAMETERS_INIT
- auto tmp = __ockl_image_sample_grad_1Da(i, s, float2(x, layer).data, dPdx, dPdy);
- return __hipMapFrom<T>(tmp);
- }
- template <
- typename T,
- typename std::enable_if<__hip_is_tex_surf_channel_type<T>::value>::type* = nullptr>
- static __device__ __hip_img_chk__ void tex1DLayeredGrad(T *ptr, hipTextureObject_t textureObject, float x, int layer, float dPdx, float dPdy)
- {
- *ptr = tex1DLayeredGrad<T>(textureObject, x, layer, dPdx, dPdy);
- }
- template <
- typename T,
- typename std::enable_if<__hip_is_tex_surf_channel_type<T>::value>::type* = nullptr>
- static __device__ __hip_img_chk__ T tex2DLayeredGrad(hipTextureObject_t textureObject, float x, float y, int layer, float2 dPdx, float2 dPdy)
- {
- TEXTURE_OBJECT_PARAMETERS_INIT
- auto tmp = __ockl_image_sample_grad_2Da(i, s, float4(x, y, layer, 0.0f).data, float2(dPdx.x, dPdx.y).data, float2(dPdy.x, dPdy.y).data);
- return __hipMapFrom<T>(tmp);
- }
- template <
- typename T,
- typename std::enable_if<__hip_is_tex_surf_channel_type<T>::value>::type* = nullptr>
- static __device__ __hip_img_chk__ void tex2DLayeredGrad(T *ptr, hipTextureObject_t textureObject, float x, float y, int layer, float2 dPdx, float2 dPdy)
- {
- *ptr = tex2DLayeredGrad<T>(textureObject, x, y, layer, dPdx, dPdy);
- }
- template <
- typename T,
- typename std::enable_if<__hip_is_tex_surf_channel_type<T>::value>::type* = nullptr>
- static __device__ __hip_img_chk__ T texCubemapLayeredGrad(hipTextureObject_t textureObject, float x, float y, float z, int layer, float4 dPdx, float4 dPdy)
- {
- TEXTURE_OBJECT_PARAMETERS_INIT
- // TODO missing in device libs.
- // auto tmp = __ockl_image_sample_grad_CMa(i, s, float4(x, y, z, layer).data, float4(dPdx.x, dPdx.y, dPdx.z, 0.0f).data, float4(dPdy.x, dPdy.y, dPdy.z, 0.0f).data);
- // return __hipMapFrom<T>(tmp);
- return {};
- }
- template <
- typename T,
- typename std::enable_if<__hip_is_tex_surf_channel_type<T>::value>::type* = nullptr>
- static __device__ __hip_img_chk__ void texCubemapLayeredGrad(T *ptr, hipTextureObject_t textureObject, float x, float y, float z, int layer, float4 dPdx, float4 dPdy)
- {
- *ptr = texCubemapLayeredGrad<T>(textureObject, x, y, z, layer, dPdx, dPdy);
- }
- #endif
- /*
- Copyright (c) 2018 - 2023 Advanced Micro Devices, Inc. All rights reserved.
- Permission is hereby granted, free of charge, to any person obtaining a copy
- of this software and associated documentation files (the "Software"), to deal
- in the Software without restriction, including without limitation the rights
- to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
- copies of the Software, and to permit persons to whom the Software is
- furnished to do so, subject to the following conditions:
- The above copyright notice and this permission notice shall be included in
- all copies or substantial portions of the Software.
- THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
- AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
- OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
- THE SOFTWARE.
- */
- #ifndef HIP_INCLUDE_HIP_AMD_DETAIL_SURFACE_FUNCTIONS_H
- #define HIP_INCLUDE_HIP_AMD_DETAIL_SURFACE_FUNCTIONS_H
- #if defined(__cplusplus)
- #if !defined(__HIPCC_RTC__)
- #include <hip/surface_types.h>
- #include <hip/hip_vector_types.h>
- #include <hip/amd_detail/texture_fetch_functions.h>
- #include <hip/amd_detail/ockl_image.h>
- #endif
- #if defined(__HIPCC_RTC__)
- #define __HOST_DEVICE__ __device__
- #else
- #define __HOST_DEVICE__ __host__ __device__
- #endif
- #define __HIP_SURFACE_OBJECT_PARAMETERS_INIT \
- unsigned int ADDRESS_SPACE_CONSTANT* i = (unsigned int ADDRESS_SPACE_CONSTANT*)surfObj;
- // CUDA is using byte address, need map to pixel address for HIP
- static __HOST_DEVICE__ __forceinline__ int __hipGetPixelAddr(int x, int format, int order) {
- /*
- * use below format index to generate format LUT
- typedef enum {
- HSA_EXT_IMAGE_CHANNEL_TYPE_SNORM_INT8 = 0,
- HSA_EXT_IMAGE_CHANNEL_TYPE_SNORM_INT16 = 1,
- HSA_EXT_IMAGE_CHANNEL_TYPE_UNORM_INT8 = 2,
- HSA_EXT_IMAGE_CHANNEL_TYPE_UNORM_INT16 = 3,
- HSA_EXT_IMAGE_CHANNEL_TYPE_UNORM_INT24 = 4,
- HSA_EXT_IMAGE_CHANNEL_TYPE_UNORM_SHORT_555 = 5,
- HSA_EXT_IMAGE_CHANNEL_TYPE_UNORM_SHORT_565 = 6,
- HSA_EXT_IMAGE_CHANNEL_TYPE_UNORM_SHORT_101010 = 7,
- HSA_EXT_IMAGE_CHANNEL_TYPE_SIGNED_INT8 = 8,
- HSA_EXT_IMAGE_CHANNEL_TYPE_SIGNED_INT16 = 9,
- HSA_EXT_IMAGE_CHANNEL_TYPE_SIGNED_INT32 = 10,
- HSA_EXT_IMAGE_CHANNEL_TYPE_UNSIGNED_INT8 = 11,
- HSA_EXT_IMAGE_CHANNEL_TYPE_UNSIGNED_INT16 = 12,
- HSA_EXT_IMAGE_CHANNEL_TYPE_UNSIGNED_INT32 = 13,
- HSA_EXT_IMAGE_CHANNEL_TYPE_HALF_FLOAT = 14,
- HSA_EXT_IMAGE_CHANNEL_TYPE_FLOAT = 15
- } hsa_ext_image_channel_type_t;
- */
- static const int FormatLUT[] = { 0, 1, 0, 1, 3, 1, 1, 1, 0, 1, 2, 0, 1, 2, 1, 2 };
- x = FormatLUT[format] == 3 ? x / FormatLUT[format] : x >> FormatLUT[format];
- /*
- * use below order index to generate order LUT
- typedef enum {
- HSA_EXT_IMAGE_CHANNEL_ORDER_A = 0,
- HSA_EXT_IMAGE_CHANNEL_ORDER_R = 1,
- HSA_EXT_IMAGE_CHANNEL_ORDER_RX = 2,
- HSA_EXT_IMAGE_CHANNEL_ORDER_RG = 3,
- HSA_EXT_IMAGE_CHANNEL_ORDER_RGX = 4,
- HSA_EXT_IMAGE_CHANNEL_ORDER_RA = 5,
- HSA_EXT_IMAGE_CHANNEL_ORDER_RGB = 6,
- HSA_EXT_IMAGE_CHANNEL_ORDER_RGBX = 7,
- HSA_EXT_IMAGE_CHANNEL_ORDER_RGBA = 8,
- HSA_EXT_IMAGE_CHANNEL_ORDER_BGRA = 9,
- HSA_EXT_IMAGE_CHANNEL_ORDER_ARGB = 10,
- HSA_EXT_IMAGE_CHANNEL_ORDER_ABGR = 11,
- HSA_EXT_IMAGE_CHANNEL_ORDER_SRGB = 12,
- HSA_EXT_IMAGE_CHANNEL_ORDER_SRGBX = 13,
- HSA_EXT_IMAGE_CHANNEL_ORDER_SRGBA = 14,
- HSA_EXT_IMAGE_CHANNEL_ORDER_SBGRA = 15,
- HSA_EXT_IMAGE_CHANNEL_ORDER_INTENSITY = 16,
- HSA_EXT_IMAGE_CHANNEL_ORDER_LUMINANCE = 17,
- HSA_EXT_IMAGE_CHANNEL_ORDER_DEPTH = 18,
- HSA_EXT_IMAGE_CHANNEL_ORDER_DEPTH_STENCIL = 19
- } hsa_ext_image_channel_order_t;
- */
- static const int OrderLUT[] = { 0, 0, 1, 1, 3, 1, 3, 2, 2, 2, 2, 2, 3, 2, 2, 2, 0, 0, 0, 0 };
- return x = OrderLUT[order] == 3 ? x / OrderLUT[order] : x >> OrderLUT[order];
- }
- template <
- typename T,
- typename std::enable_if<__hip_is_tex_surf_channel_type<T>::value>::type* = nullptr>
- static __device__ __hip_img_chk__ void surf1Dread(T* data, hipSurfaceObject_t surfObj, int x,
- int boundaryMode = hipBoundaryModeZero) {
- __HIP_SURFACE_OBJECT_PARAMETERS_INIT
- x = __hipGetPixelAddr(x, __ockl_image_channel_data_type_1D(i), __ockl_image_channel_order_1D(i));
- auto tmp = __ockl_image_load_1D(i, x);
- *data = __hipMapFrom<T>(tmp);
- }
- template <
- typename T,
- typename std::enable_if<__hip_is_tex_surf_channel_type<T>::value>::type* = nullptr>
- static __device__ __hip_img_chk__ void surf1Dwrite(T data, hipSurfaceObject_t surfObj, int x) {
- __HIP_SURFACE_OBJECT_PARAMETERS_INIT
- x = __hipGetPixelAddr(x, __ockl_image_channel_data_type_1D(i), __ockl_image_channel_order_1D(i));
- auto tmp = __hipMapTo<float4::Native_vec_>(data);
- __ockl_image_store_1D(i, x, tmp);
- }
- template <
- typename T,
- typename std::enable_if<__hip_is_tex_surf_channel_type<T>::value>::type* = nullptr>
- static __device__ __hip_img_chk__ void surf2Dread(T* data, hipSurfaceObject_t surfObj, int x, int y) {
- __HIP_SURFACE_OBJECT_PARAMETERS_INIT
- x = __hipGetPixelAddr(x, __ockl_image_channel_data_type_2D(i), __ockl_image_channel_order_2D(i));
- auto tmp = __ockl_image_load_2D(i, int2(x, y).data);
- *data = __hipMapFrom<T>(tmp);
- }
- template <
- typename T,
- typename std::enable_if<__hip_is_tex_surf_channel_type<T>::value>::type* = nullptr>
- static __device__ __hip_img_chk__ void surf2Dwrite(T data, hipSurfaceObject_t surfObj, int x, int y) {
- __HIP_SURFACE_OBJECT_PARAMETERS_INIT
- x = __hipGetPixelAddr(x, __ockl_image_channel_data_type_2D(i), __ockl_image_channel_order_2D(i));
- auto tmp = __hipMapTo<float4::Native_vec_>(data);
- __ockl_image_store_2D(i, int2(x, y).data, tmp);
- }
- template <
- typename T,
- typename std::enable_if<__hip_is_tex_surf_channel_type<T>::value>::type* = nullptr>
- static __device__ __hip_img_chk__ void surf3Dread(T* data, hipSurfaceObject_t surfObj, int x, int y, int z) {
- __HIP_SURFACE_OBJECT_PARAMETERS_INIT
- x = __hipGetPixelAddr(x, __ockl_image_channel_data_type_3D(i), __ockl_image_channel_order_3D(i));
- auto tmp = __ockl_image_load_3D(i, int4(x, y, z, 0).data);
- *data = __hipMapFrom<T>(tmp);
- }
- template <
- typename T,
- typename std::enable_if<__hip_is_tex_surf_channel_type<T>::value>::type* = nullptr>
- static __device__ __hip_img_chk__ void surf3Dwrite(T data, hipSurfaceObject_t surfObj, int x, int y, int z) {
- __HIP_SURFACE_OBJECT_PARAMETERS_INIT
- x = __hipGetPixelAddr(x, __ockl_image_channel_data_type_3D(i), __ockl_image_channel_order_3D(i));
- auto tmp = __hipMapTo<float4::Native_vec_>(data);
- __ockl_image_store_3D(i, int4(x, y, z, 0).data, tmp);
- }
- template <
- typename T,
- typename std::enable_if<__hip_is_tex_surf_channel_type<T>::value>::type* = nullptr>
- static __device__ __hip_img_chk__ void surf1DLayeredread(T* data, hipSurfaceObject_t surfObj, int x, int layer) {
- __HIP_SURFACE_OBJECT_PARAMETERS_INIT
- x = __hipGetPixelAddr(x, __ockl_image_channel_data_type_1D(i), __ockl_image_channel_order_1D(i));
- auto tmp = __ockl_image_load_lod_1D(i, x, layer);
- *data = __hipMapFrom<T>(tmp);
- }
- template <
- typename T,
- typename std::enable_if<__hip_is_tex_surf_channel_type<T>::value>::type* = nullptr>
- static __device__ __hip_img_chk__ void surf1DLayeredwrite(T data, hipSurfaceObject_t surfObj, int x, int layer) {
- __HIP_SURFACE_OBJECT_PARAMETERS_INIT
- x = __hipGetPixelAddr(x, __ockl_image_channel_data_type_1D(i), __ockl_image_channel_order_1D(i));
- auto tmp = __hipMapTo<float4::Native_vec_>(data);
- __ockl_image_store_lod_1D(i, x, layer, tmp);
- }
- template <
- typename T,
- typename std::enable_if<__hip_is_tex_surf_channel_type<T>::value>::type* = nullptr>
- static __device__ __hip_img_chk__ void surf2DLayeredread(T* data, hipSurfaceObject_t surfObj, int x, int y, int layer) {
- __HIP_SURFACE_OBJECT_PARAMETERS_INIT
- x = __hipGetPixelAddr(x, __ockl_image_channel_data_type_2D(i), __ockl_image_channel_order_2D(i));
- auto tmp = __ockl_image_load_lod_2D(i, int2(x, y).data, layer);
- *data = __hipMapFrom<T>(tmp);
- }
- template <
- typename T,
- typename std::enable_if<__hip_is_tex_surf_channel_type<T>::value>::type* = nullptr>
- static __device__ __hip_img_chk__ void surf2DLayeredwrite(T data, hipSurfaceObject_t surfObj, int x, int y, int layer) {
- __HIP_SURFACE_OBJECT_PARAMETERS_INIT
- x = __hipGetPixelAddr(x, __ockl_image_channel_data_type_2D(i), __ockl_image_channel_order_2D(i));
- auto tmp = __hipMapTo<float4::Native_vec_>(data);
- __ockl_image_store_lod_2D(i, int2(x, y).data, layer, tmp);
- }
- template <
- typename T,
- typename std::enable_if<__hip_is_tex_surf_channel_type<T>::value>::type* = nullptr>
- static __device__ __hip_img_chk__ void surfCubemapread(T* data, hipSurfaceObject_t surfObj, int x, int y, int face) {
- __HIP_SURFACE_OBJECT_PARAMETERS_INIT
- x = __hipGetPixelAddr(x, __ockl_image_channel_data_type_2D(i), __ockl_image_channel_order_2D(i));
- auto tmp = __ockl_image_load_CM(i, int2(x, y).data, face);
- *data = __hipMapFrom<T>(tmp);
- }
- template <
- typename T,
- typename std::enable_if<__hip_is_tex_surf_channel_type<T>::value>::type* = nullptr>
- static __device__ __hip_img_chk__ void surfCubemapwrite(T data, hipSurfaceObject_t surfObj, int x, int y, int face) {
- __HIP_SURFACE_OBJECT_PARAMETERS_INIT
- x = __hipGetPixelAddr(x, __ockl_image_channel_data_type_2D(i), __ockl_image_channel_order_2D(i));
- auto tmp = __hipMapTo<float4::Native_vec_>(data);
- __ockl_image_store_CM(i, int2(x, y).data, face, tmp);
- }
- template <
- typename T,
- typename std::enable_if<__hip_is_tex_surf_channel_type<T>::value>::type* = nullptr>
- static __device__ __hip_img_chk__ void surfCubemapLayeredread(T* data, hipSurfaceObject_t surfObj, int x, int y, int face,
- int layer) {
- __HIP_SURFACE_OBJECT_PARAMETERS_INIT
- x = __hipGetPixelAddr(x, __ockl_image_channel_data_type_2D(i), __ockl_image_channel_order_2D(i));
- auto tmp = __ockl_image_load_lod_CM(i, int2(x, y).data, face, layer);
- *data = __hipMapFrom<T>(tmp);
- }
- template <
- typename T,
- typename std::enable_if<__hip_is_tex_surf_channel_type<T>::value>::type* = nullptr>
- static __device__ __hip_img_chk__ void surfCubemapLayeredwrite(T* data, hipSurfaceObject_t surfObj, int x, int y, int face,
- int layer) {
- __HIP_SURFACE_OBJECT_PARAMETERS_INIT
- x = __hipGetPixelAddr(x, __ockl_image_channel_data_type_2D(i), __ockl_image_channel_order_2D(i));
- auto tmp = __hipMapTo<float4::Native_vec_>(data);
- __ockl_image_store_lod_CM(i, int2(x, y).data, face, layer, tmp);
- }
- #endif
- #endif
- /*
- Copyright (c) 2015 - 2023 Advanced Micro Devices, Inc. All rights reserved.
- Permission is hereby granted, free of charge, to any person obtaining a copy
- of this software and associated documentation files (the "Software"), to deal
- in the Software without restriction, including without limitation the rights
- to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
- copies of the Software, and to permit persons to whom the Software is
- furnished to do so, subject to the following conditions:
- The above copyright notice and this permission notice shall be included in
- all copies or substantial portions of the Software.
- THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
- AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
- OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
- THE SOFTWARE.
- */
- #ifndef HIP_INCLUDE_HIP_AMD_DETAIL_HIP_COMPLEX_H
- #define HIP_INCLUDE_HIP_AMD_DETAIL_HIP_COMPLEX_H
- #if !defined(__HIPCC_RTC__)
- #include "hip/amd_detail/amd_hip_vector_types.h"
- #endif
- #if defined(__HIPCC_RTC__)
- #define __HOST_DEVICE__ __device__
- #else
- #define __HOST_DEVICE__ __host__ __device__
- // TODO: Clang has a bug which allows device functions to call std functions
- // when std functions are introduced into default namespace by using statement.
- // math.h may be included after this bug is fixed.
- #if __cplusplus
- #include <cmath>
- #else
- #include "math.h"
- #endif
- #endif // !defined(__HIPCC_RTC__)
- #if __cplusplus
- #define COMPLEX_NEG_OP_OVERLOAD(type) \
- __HOST_DEVICE__ static inline type operator-(const type& op) { \
- type ret; \
- ret.x = -op.x; \
- ret.y = -op.y; \
- return ret; \
- }
- #define COMPLEX_EQ_OP_OVERLOAD(type) \
- __HOST_DEVICE__ static inline bool operator==(const type& lhs, const type& rhs) { \
- return lhs.x == rhs.x && lhs.y == rhs.y; \
- }
- #define COMPLEX_NE_OP_OVERLOAD(type) \
- __HOST_DEVICE__ static inline bool operator!=(const type& lhs, const type& rhs) { \
- return !(lhs == rhs); \
- }
- #define COMPLEX_ADD_OP_OVERLOAD(type) \
- __HOST_DEVICE__ static inline type operator+(const type& lhs, const type& rhs) { \
- type ret; \
- ret.x = lhs.x + rhs.x; \
- ret.y = lhs.y + rhs.y; \
- return ret; \
- }
- #define COMPLEX_SUB_OP_OVERLOAD(type) \
- __HOST_DEVICE__ static inline type operator-(const type& lhs, const type& rhs) { \
- type ret; \
- ret.x = lhs.x - rhs.x; \
- ret.y = lhs.y - rhs.y; \
- return ret; \
- }
- #define COMPLEX_MUL_OP_OVERLOAD(type) \
- __HOST_DEVICE__ static inline type operator*(const type& lhs, const type& rhs) { \
- type ret; \
- ret.x = lhs.x * rhs.x - lhs.y * rhs.y; \
- ret.y = lhs.x * rhs.y + lhs.y * rhs.x; \
- return ret; \
- }
- #define COMPLEX_DIV_OP_OVERLOAD(type) \
- __HOST_DEVICE__ static inline type operator/(const type& lhs, const type& rhs) { \
- type ret; \
- ret.x = (lhs.x * rhs.x + lhs.y * rhs.y); \
- ret.y = (rhs.x * lhs.y - lhs.x * rhs.y); \
- ret.x = ret.x / (rhs.x * rhs.x + rhs.y * rhs.y); \
- ret.y = ret.y / (rhs.x * rhs.x + rhs.y * rhs.y); \
- return ret; \
- }
- #define COMPLEX_ADD_PREOP_OVERLOAD(type) \
- __HOST_DEVICE__ static inline type& operator+=(type& lhs, const type& rhs) { \
- lhs.x += rhs.x; \
- lhs.y += rhs.y; \
- return lhs; \
- }
- #define COMPLEX_SUB_PREOP_OVERLOAD(type) \
- __HOST_DEVICE__ static inline type& operator-=(type& lhs, const type& rhs) { \
- lhs.x -= rhs.x; \
- lhs.y -= rhs.y; \
- return lhs; \
- }
- #define COMPLEX_MUL_PREOP_OVERLOAD(type) \
- __HOST_DEVICE__ static inline type& operator*=(type& lhs, const type& rhs) { \
- type temp{lhs}; \
- lhs.x = rhs.x * temp.x - rhs.y * temp.y; \
- lhs.y = rhs.y * temp.x + rhs.x * temp.y; \
- return lhs; \
- }
- #define COMPLEX_DIV_PREOP_OVERLOAD(type) \
- __HOST_DEVICE__ static inline type& operator/=(type& lhs, const type& rhs) { \
- type temp; \
- temp.x = (lhs.x*rhs.x + lhs.y * rhs.y) / (rhs.x*rhs.x + rhs.y*rhs.y); \
- temp.y = (lhs.y * rhs.x - lhs.x * rhs.y) / (rhs.x*rhs.x + rhs.y*rhs.y); \
- lhs = temp; \
- return lhs; \
- }
- #define COMPLEX_SCALAR_PRODUCT(type, type1) \
- __HOST_DEVICE__ static inline type operator*(const type& lhs, type1 rhs) { \
- type ret; \
- ret.x = lhs.x * rhs; \
- ret.y = lhs.y * rhs; \
- return ret; \
- }
- #endif
- typedef float2 hipFloatComplex;
- __HOST_DEVICE__ static inline float hipCrealf(hipFloatComplex z) { return z.x; }
- __HOST_DEVICE__ static inline float hipCimagf(hipFloatComplex z) { return z.y; }
- __HOST_DEVICE__ static inline hipFloatComplex make_hipFloatComplex(float a, float b) {
- hipFloatComplex z;
- z.x = a;
- z.y = b;
- return z;
- }
- __HOST_DEVICE__ static inline hipFloatComplex hipConjf(hipFloatComplex z) {
- hipFloatComplex ret;
- ret.x = z.x;
- ret.y = -z.y;
- return ret;
- }
- __HOST_DEVICE__ static inline float hipCsqabsf(hipFloatComplex z) {
- return z.x * z.x + z.y * z.y;
- }
- __HOST_DEVICE__ static inline hipFloatComplex hipCaddf(hipFloatComplex p, hipFloatComplex q) {
- return make_hipFloatComplex(p.x + q.x, p.y + q.y);
- }
- __HOST_DEVICE__ static inline hipFloatComplex hipCsubf(hipFloatComplex p, hipFloatComplex q) {
- return make_hipFloatComplex(p.x - q.x, p.y - q.y);
- }
- __HOST_DEVICE__ static inline hipFloatComplex hipCmulf(hipFloatComplex p, hipFloatComplex q) {
- return make_hipFloatComplex(p.x * q.x - p.y * q.y, p.y * q.x + p.x * q.y);
- }
- __HOST_DEVICE__ static inline hipFloatComplex hipCdivf(hipFloatComplex p, hipFloatComplex q) {
- float sqabs = hipCsqabsf(q);
- hipFloatComplex ret;
- ret.x = (p.x * q.x + p.y * q.y) / sqabs;
- ret.y = (p.y * q.x - p.x * q.y) / sqabs;
- return ret;
- }
- __HOST_DEVICE__ static inline float hipCabsf(hipFloatComplex z) { return sqrtf(hipCsqabsf(z)); }
- typedef double2 hipDoubleComplex;
- __HOST_DEVICE__ static inline double hipCreal(hipDoubleComplex z) { return z.x; }
- __HOST_DEVICE__ static inline double hipCimag(hipDoubleComplex z) { return z.y; }
- __HOST_DEVICE__ static inline hipDoubleComplex make_hipDoubleComplex(double a, double b) {
- hipDoubleComplex z;
- z.x = a;
- z.y = b;
- return z;
- }
- __HOST_DEVICE__ static inline hipDoubleComplex hipConj(hipDoubleComplex z) {
- hipDoubleComplex ret;
- ret.x = z.x;
- ret.y = -z.y;
- return ret;
- }
- __HOST_DEVICE__ static inline double hipCsqabs(hipDoubleComplex z) {
- return z.x * z.x + z.y * z.y;
- }
- __HOST_DEVICE__ static inline hipDoubleComplex hipCadd(hipDoubleComplex p, hipDoubleComplex q) {
- return make_hipDoubleComplex(p.x + q.x, p.y + q.y);
- }
- __HOST_DEVICE__ static inline hipDoubleComplex hipCsub(hipDoubleComplex p, hipDoubleComplex q) {
- return make_hipDoubleComplex(p.x - q.x, p.y - q.y);
- }
- __HOST_DEVICE__ static inline hipDoubleComplex hipCmul(hipDoubleComplex p, hipDoubleComplex q) {
- return make_hipDoubleComplex(p.x * q.x - p.y * q.y, p.y * q.x + p.x * q.y);
- }
- __HOST_DEVICE__ static inline hipDoubleComplex hipCdiv(hipDoubleComplex p, hipDoubleComplex q) {
- double sqabs = hipCsqabs(q);
- hipDoubleComplex ret;
- ret.x = (p.x * q.x + p.y * q.y) / sqabs;
- ret.y = (p.y * q.x - p.x * q.y) / sqabs;
- return ret;
- }
- __HOST_DEVICE__ static inline double hipCabs(hipDoubleComplex z) { return sqrt(hipCsqabs(z)); }
- #if __cplusplus
- COMPLEX_NEG_OP_OVERLOAD(hipFloatComplex)
- COMPLEX_EQ_OP_OVERLOAD(hipFloatComplex)
- COMPLEX_NE_OP_OVERLOAD(hipFloatComplex)
- COMPLEX_ADD_OP_OVERLOAD(hipFloatComplex)
- COMPLEX_SUB_OP_OVERLOAD(hipFloatComplex)
- COMPLEX_MUL_OP_OVERLOAD(hipFloatComplex)
- COMPLEX_DIV_OP_OVERLOAD(hipFloatComplex)
- COMPLEX_ADD_PREOP_OVERLOAD(hipFloatComplex)
- COMPLEX_SUB_PREOP_OVERLOAD(hipFloatComplex)
- COMPLEX_MUL_PREOP_OVERLOAD(hipFloatComplex)
- COMPLEX_DIV_PREOP_OVERLOAD(hipFloatComplex)
- COMPLEX_SCALAR_PRODUCT(hipFloatComplex, unsigned short)
- COMPLEX_SCALAR_PRODUCT(hipFloatComplex, signed short)
- COMPLEX_SCALAR_PRODUCT(hipFloatComplex, unsigned int)
- COMPLEX_SCALAR_PRODUCT(hipFloatComplex, signed int)
- COMPLEX_SCALAR_PRODUCT(hipFloatComplex, float)
- COMPLEX_SCALAR_PRODUCT(hipFloatComplex, unsigned long)
- COMPLEX_SCALAR_PRODUCT(hipFloatComplex, signed long)
- COMPLEX_SCALAR_PRODUCT(hipFloatComplex, double)
- COMPLEX_SCALAR_PRODUCT(hipFloatComplex, signed long long)
- COMPLEX_SCALAR_PRODUCT(hipFloatComplex, unsigned long long)
- COMPLEX_NEG_OP_OVERLOAD(hipDoubleComplex)
- COMPLEX_EQ_OP_OVERLOAD(hipDoubleComplex)
- COMPLEX_NE_OP_OVERLOAD(hipDoubleComplex)
- COMPLEX_ADD_OP_OVERLOAD(hipDoubleComplex)
- COMPLEX_SUB_OP_OVERLOAD(hipDoubleComplex)
- COMPLEX_MUL_OP_OVERLOAD(hipDoubleComplex)
- COMPLEX_DIV_OP_OVERLOAD(hipDoubleComplex)
- COMPLEX_ADD_PREOP_OVERLOAD(hipDoubleComplex)
- COMPLEX_SUB_PREOP_OVERLOAD(hipDoubleComplex)
- COMPLEX_MUL_PREOP_OVERLOAD(hipDoubleComplex)
- COMPLEX_DIV_PREOP_OVERLOAD(hipDoubleComplex)
- COMPLEX_SCALAR_PRODUCT(hipDoubleComplex, unsigned short)
- COMPLEX_SCALAR_PRODUCT(hipDoubleComplex, signed short)
- COMPLEX_SCALAR_PRODUCT(hipDoubleComplex, unsigned int)
- COMPLEX_SCALAR_PRODUCT(hipDoubleComplex, signed int)
- COMPLEX_SCALAR_PRODUCT(hipDoubleComplex, float)
- COMPLEX_SCALAR_PRODUCT(hipDoubleComplex, unsigned long)
- COMPLEX_SCALAR_PRODUCT(hipDoubleComplex, signed long)
- COMPLEX_SCALAR_PRODUCT(hipDoubleComplex, double)
- COMPLEX_SCALAR_PRODUCT(hipDoubleComplex, signed long long)
- COMPLEX_SCALAR_PRODUCT(hipDoubleComplex, unsigned long long)
- #endif
- typedef hipFloatComplex hipComplex;
- __HOST_DEVICE__ static inline hipComplex make_hipComplex(float x, float y) {
- return make_hipFloatComplex(x, y);
- }
- __HOST_DEVICE__ static inline hipFloatComplex hipComplexDoubleToFloat(hipDoubleComplex z) {
- return make_hipFloatComplex((float)z.x, (float)z.y);
- }
- __HOST_DEVICE__ static inline hipDoubleComplex hipComplexFloatToDouble(hipFloatComplex z) {
- return make_hipDoubleComplex((double)z.x, (double)z.y);
- }
- __HOST_DEVICE__ static inline hipComplex hipCfmaf(hipComplex p, hipComplex q, hipComplex r) {
- float real = (p.x * q.x) + r.x;
- float imag = (q.x * p.y) + r.y;
- real = -(p.y * q.y) + real;
- imag = (p.x * q.y) + imag;
- return make_hipComplex(real, imag);
- }
- __HOST_DEVICE__ static inline hipDoubleComplex hipCfma(hipDoubleComplex p, hipDoubleComplex q,
- hipDoubleComplex r) {
- double real = (p.x * q.x) + r.x;
- double imag = (q.x * p.y) + r.y;
- real = -(p.y * q.y) + real;
- imag = (p.x * q.y) + imag;
- return make_hipDoubleComplex(real, imag);
- }
- #endif //HIP_INCLUDE_HIP_AMD_DETAIL_HIP_COMPLEX_H
- /*
- Copyright (c) 2015 - 2023 Advanced Micro Devices, Inc. All rights reserved.
- Permission is hereby granted, free of charge, to any person obtaining a copy
- of this software and associated documentation files (the "Software"), to deal
- in the Software without restriction, including without limitation the rights
- to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
- copies of the Software, and to permit persons to whom the Software is
- furnished to do so, subject to the following conditions:
- The above copyright notice and this permission notice shall be included in
- all copies or substantial portions of the Software.
- THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
- AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
- OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
- THE SOFTWARE.
- */
- #ifndef AMD_HIP_MATH_CONSTANTS_H
- #define AMD_HIP_MATH_CONSTANTS_H
- // single precision constants
- #define HIP_INF_F __int_as_float(0x7f800000U)
- #define HIP_NAN_F __int_as_float(0x7fffffffU)
- #define HIP_MIN_DENORM_F __int_as_float(0x00000001U)
- #define HIP_MAX_NORMAL_F __int_as_float(0x7f7fffffU)
- #define HIP_NEG_ZERO_F __int_as_float(0x80000000U)
- #define HIP_ZERO_F 0.0F
- #define HIP_ONE_F 1.0F
- #define HIP_SQRT_HALF_F 0.707106781F
- #define HIP_SQRT_HALF_HI_F 0.707106781F
- #define HIP_SQRT_HALF_LO_F 1.210161749e-08F
- #define HIP_SQRT_TWO_F 1.414213562F
- #define HIP_THIRD_F 0.333333333F
- #define HIP_PIO4_F 0.785398163F
- #define HIP_PIO2_F 1.570796327F
- #define HIP_3PIO4_F 2.356194490F
- #define HIP_2_OVER_PI_F 0.636619772F
- #define HIP_SQRT_2_OVER_PI_F 0.797884561F
- #define HIP_PI_F 3.141592654F
- #define HIP_L2E_F 1.442695041F
- #define HIP_L2T_F 3.321928094F
- #define HIP_LG2_F 0.301029996F
- #define HIP_LGE_F 0.434294482F
- #define HIP_LN2_F 0.693147181F
- #define HIP_LNT_F 2.302585093F
- #define HIP_LNPI_F 1.144729886F
- #define HIP_TWO_TO_M126_F 1.175494351e-38F
- #define HIP_TWO_TO_126_F 8.507059173e37F
- #define HIP_NORM_HUGE_F 3.402823466e38F
- #define HIP_TWO_TO_23_F 8388608.0F
- #define HIP_TWO_TO_24_F 16777216.0F
- #define HIP_TWO_TO_31_F 2147483648.0F
- #define HIP_TWO_TO_32_F 4294967296.0F
- #define HIP_REMQUO_BITS_F 3U
- #define HIP_REMQUO_MASK_F (~((~0U)<<HIP_REMQUO_BITS_F))
- #define HIP_TRIG_PLOSS_F 105615.0F
- // double precision constants
- #define HIP_INF __longlong_as_double(0x7ff0000000000000ULL)
- #define HIP_NAN __longlong_as_double(0xfff8000000000000ULL)
- #define HIP_NEG_ZERO __longlong_as_double(0x8000000000000000ULL)
- #define HIP_MIN_DENORM __longlong_as_double(0x0000000000000001ULL)
- #define HIP_ZERO 0.0
- #define HIP_ONE 1.0
- #define HIP_SQRT_TWO 1.4142135623730951e+0
- #define HIP_SQRT_HALF 7.0710678118654757e-1
- #define HIP_SQRT_HALF_HI 7.0710678118654757e-1
- #define HIP_SQRT_HALF_LO (-4.8336466567264567e-17)
- #define HIP_THIRD 3.3333333333333333e-1
- #define HIP_TWOTHIRD 6.6666666666666667e-1
- #define HIP_PIO4 7.8539816339744828e-1
- #define HIP_PIO4_HI 7.8539816339744828e-1
- #define HIP_PIO4_LO 3.0616169978683830e-17
- #define HIP_PIO2 1.5707963267948966e+0
- #define HIP_PIO2_HI 1.5707963267948966e+0
- #define HIP_PIO2_LO 6.1232339957367660e-17
- #define HIP_3PIO4 2.3561944901923448e+0
- #define HIP_2_OVER_PI 6.3661977236758138e-1
- #define HIP_PI 3.1415926535897931e+0
- #define HIP_PI_HI 3.1415926535897931e+0
- #define HIP_PI_LO 1.2246467991473532e-16
- #define HIP_SQRT_2PI 2.5066282746310007e+0
- #define HIP_SQRT_2PI_HI 2.5066282746310007e+0
- #define HIP_SQRT_2PI_LO (-1.8328579980459167e-16)
- #define HIP_SQRT_PIO2 1.2533141373155003e+0
- #define HIP_SQRT_PIO2_HI 1.2533141373155003e+0
- #define HIP_SQRT_PIO2_LO (-9.1642899902295834e-17)
- #define HIP_SQRT_2OPI 7.9788456080286536e-1
- #define HIP_L2E 1.4426950408889634e+0
- #define HIP_L2E_HI 1.4426950408889634e+0
- #define HIP_L2E_LO 2.0355273740931033e-17
- #define HIP_L2T 3.3219280948873622e+0
- #define HIP_LG2 3.0102999566398120e-1
- #define HIP_LG2_HI 3.0102999566398120e-1
- #define HIP_LG2_LO (-2.8037281277851704e-18)
- #define HIP_LGE 4.3429448190325182e-1
- #define HIP_LGE_HI 4.3429448190325182e-1
- #define HIP_LGE_LO 1.09831965021676510e-17
- #define HIP_LN2 6.9314718055994529e-1
- #define HIP_LN2_HI 6.9314718055994529e-1
- #define HIP_LN2_LO 2.3190468138462996e-17
- #define HIP_LNT 2.3025850929940459e+0
- #define HIP_LNT_HI 2.3025850929940459e+0
- #define HIP_LNT_LO (-2.1707562233822494e-16)
- #define HIP_LNPI 1.1447298858494002e+0
- #define HIP_LN2_X_1024 7.0978271289338397e+2
- #define HIP_LN2_X_1025 7.1047586007394398e+2
- #define HIP_LN2_X_1075 7.4513321910194122e+2
- #define HIP_LG2_X_1024 3.0825471555991675e+2
- #define HIP_LG2_X_1075 3.2360724533877976e+2
- #define HIP_TWO_TO_23 8388608.0
- #define HIP_TWO_TO_52 4503599627370496.0
- #define HIP_TWO_TO_53 9007199254740992.0
- #define HIP_TWO_TO_54 18014398509481984.0
- #define HIP_TWO_TO_M54 5.5511151231257827e-17
- #define HIP_TWO_TO_M1022 2.22507385850720140e-308
- #define HIP_TRIG_PLOSS 2147483648.0
- #define HIP_DBL2INT_CVT 6755399441055744.0
- #endif
- /*
- Copyright (c) 2015 - 2023 Advanced Micro Devices, Inc. All rights reserved.
- Permission is hereby granted, free of charge, to any person obtaining a copy
- of this software and associated documentation files (the "Software"), to deal
- in the Software without restriction, including without limitation the rights
- to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
- copies of the Software, and to permit persons to whom the Software is
- furnished to do so, subject to the following conditions:
- The above copyright notice and this permission notice shall be included in
- all copies or substantial portions of the Software.
- THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
- AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
- OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
- THE SOFTWARE.
- */
- #pragma once
- #if !defined(__HIPCC_RTC__)
- #include "host_defines.h"
- #include "amd_hip_vector_types.h" // For Native_vec_
- #endif
- #if defined(__cplusplus)
- extern "C" {
- #endif
- // DOT FUNCTIONS
- #if defined(__clang__) && defined(__HIP__)
- __device__
- __attribute__((const))
- int __ockl_sdot2(
- HIP_vector_base<short, 2>::Native_vec_,
- HIP_vector_base<short, 2>::Native_vec_,
- int, bool);
- __device__
- __attribute__((const))
- unsigned int __ockl_udot2(
- HIP_vector_base<unsigned short, 2>::Native_vec_,
- HIP_vector_base<unsigned short, 2>::Native_vec_,
- unsigned int, bool);
- __device__
- __attribute__((const))
- int __ockl_sdot4(
- HIP_vector_base<char, 4>::Native_vec_,
- HIP_vector_base<char, 4>::Native_vec_,
- int, bool);
- __device__
- __attribute__((const))
- unsigned int __ockl_udot4(
- HIP_vector_base<unsigned char, 4>::Native_vec_,
- HIP_vector_base<unsigned char, 4>::Native_vec_,
- unsigned int, bool);
- __device__
- __attribute__((const))
- int __ockl_sdot8(int, int, int, bool);
- __device__
- __attribute__((const))
- unsigned int __ockl_udot8(unsigned int, unsigned int, unsigned int, bool);
- #endif
- #if !__CLANG_HIP_RUNTIME_WRAPPER_INCLUDED__
- // BEGIN FLOAT
- __device__
- __attribute__((const))
- float __ocml_acos_f32(float);
- __device__
- __attribute__((pure))
- float __ocml_acosh_f32(float);
- __device__
- __attribute__((const))
- float __ocml_asin_f32(float);
- __device__
- __attribute__((pure))
- float __ocml_asinh_f32(float);
- __device__
- __attribute__((const))
- float __ocml_atan2_f32(float, float);
- __device__
- __attribute__((const))
- float __ocml_atan_f32(float);
- __device__
- __attribute__((pure))
- float __ocml_atanh_f32(float);
- __device__
- __attribute__((pure))
- float __ocml_cbrt_f32(float);
- __device__
- __attribute__((const))
- float __ocml_ceil_f32(float);
- __device__
- __attribute__((const))
- __device__
- float __ocml_copysign_f32(float, float);
- __device__
- float __ocml_cos_f32(float);
- __device__
- float __ocml_native_cos_f32(float);
- __device__
- __attribute__((pure))
- __device__
- float __ocml_cosh_f32(float);
- __device__
- float __ocml_cospi_f32(float);
- __device__
- float __ocml_i0_f32(float);
- __device__
- float __ocml_i1_f32(float);
- __device__
- __attribute__((pure))
- float __ocml_erfc_f32(float);
- __device__
- __attribute__((pure))
- float __ocml_erfcinv_f32(float);
- __device__
- __attribute__((pure))
- float __ocml_erfcx_f32(float);
- __device__
- __attribute__((pure))
- float __ocml_erf_f32(float);
- __device__
- __attribute__((pure))
- float __ocml_erfinv_f32(float);
- __device__
- __attribute__((pure))
- float __ocml_exp10_f32(float);
- __device__
- __attribute__((pure))
- float __ocml_native_exp10_f32(float);
- __device__
- __attribute__((pure))
- float __ocml_exp2_f32(float);
- __device__
- __attribute__((pure))
- float __ocml_exp_f32(float);
- __device__
- __attribute__((pure))
- float __ocml_native_exp_f32(float);
- __device__
- __attribute__((pure))
- float __ocml_expm1_f32(float);
- __device__
- __attribute__((const))
- float __ocml_fabs_f32(float);
- __device__
- __attribute__((const))
- float __ocml_fdim_f32(float, float);
- __device__
- __attribute__((const))
- float __ocml_floor_f32(float);
- __device__
- __attribute__((const))
- float __ocml_fma_f32(float, float, float);
- __device__
- __attribute__((const))
- float __ocml_fmax_f32(float, float);
- __device__
- __attribute__((const))
- float __ocml_fmin_f32(float, float);
- __device__
- __attribute__((const))
- __device__
- float __ocml_fmod_f32(float, float);
- __device__
- float __ocml_frexp_f32(float, __attribute__((address_space(5))) int*);
- __device__
- __attribute__((const))
- float __ocml_hypot_f32(float, float);
- __device__
- __attribute__((const))
- int __ocml_ilogb_f32(float);
- __device__
- __attribute__((const))
- int __ocml_isfinite_f32(float);
- __device__
- __attribute__((const))
- int __ocml_isinf_f32(float);
- __device__
- __attribute__((const))
- int __ocml_isnan_f32(float);
- __device__
- float __ocml_j0_f32(float);
- __device__
- float __ocml_j1_f32(float);
- __device__
- __attribute__((const))
- float __ocml_ldexp_f32(float, int);
- __device__
- float __ocml_lgamma_f32(float);
- __device__
- __attribute__((pure))
- float __ocml_log10_f32(float);
- __device__
- __attribute__((pure))
- float __ocml_native_log10_f32(float);
- __device__
- __attribute__((pure))
- float __ocml_log1p_f32(float);
- __device__
- __attribute__((pure))
- float __ocml_log2_f32(float);
- __device__
- __attribute__((pure))
- float __ocml_native_log2_f32(float);
- __device__
- __attribute__((const))
- float __ocml_logb_f32(float);
- __device__
- __attribute__((pure))
- float __ocml_log_f32(float);
- __device__
- __attribute__((pure))
- float __ocml_native_log_f32(float);
- __device__
- float __ocml_modf_f32(float, __attribute__((address_space(5))) float*);
- __device__
- __attribute__((const))
- float __ocml_nearbyint_f32(float);
- __device__
- __attribute__((const))
- float __ocml_nextafter_f32(float, float);
- __device__
- __attribute__((const))
- float __ocml_len3_f32(float, float, float);
- __device__
- __attribute__((const))
- float __ocml_len4_f32(float, float, float, float);
- __device__
- __attribute__((pure))
- float __ocml_ncdf_f32(float);
- __device__
- __attribute__((pure))
- float __ocml_ncdfinv_f32(float);
- __device__
- __attribute__((pure))
- float __ocml_pow_f32(float, float);
- __device__
- __attribute__((pure))
- float __ocml_pown_f32(float, int);
- __device__
- __attribute__((pure))
- float __ocml_rcbrt_f32(float);
- __device__
- __attribute__((const))
- float __ocml_remainder_f32(float, float);
- __device__
- float __ocml_remquo_f32(float, float, __attribute__((address_space(5))) int*);
- __device__
- __attribute__((const))
- float __ocml_rhypot_f32(float, float);
- __device__
- __attribute__((const))
- float __ocml_rint_f32(float);
- __device__
- __attribute__((const))
- float __ocml_rlen3_f32(float, float, float);
- __device__
- __attribute__((const))
- float __ocml_rlen4_f32(float, float, float, float);
- __device__
- __attribute__((const))
- float __ocml_round_f32(float);
- __device__
- __attribute__((pure))
- float __ocml_rsqrt_f32(float);
- __device__
- __attribute__((const))
- float __ocml_scalb_f32(float, float);
- __device__
- __attribute__((const))
- float __ocml_scalbn_f32(float, int);
- __device__
- __attribute__((const))
- int __ocml_signbit_f32(float);
- __device__
- float __ocml_sincos_f32(float, __attribute__((address_space(5))) float*);
- __device__
- float __ocml_sincospi_f32(float, __attribute__((address_space(5))) float*);
- __device__
- float __ocml_sin_f32(float);
- __device__
- float __ocml_native_sin_f32(float);
- __device__
- __attribute__((pure))
- float __ocml_sinh_f32(float);
- __device__
- float __ocml_sinpi_f32(float);
- __device__
- __attribute__((const))
- float __ocml_sqrt_f32(float);
- __device__
- __attribute__((const))
- float __ocml_native_sqrt_f32(float);
- __device__
- float __ocml_tan_f32(float);
- __device__
- __attribute__((pure))
- float __ocml_tanh_f32(float);
- __device__
- float __ocml_tgamma_f32(float);
- __device__
- __attribute__((const))
- float __ocml_trunc_f32(float);
- __device__
- float __ocml_y0_f32(float);
- __device__
- float __ocml_y1_f32(float);
- // BEGIN INTRINSICS
- __device__
- __attribute__((const))
- float __ocml_add_rte_f32(float, float);
- __device__
- __attribute__((const))
- float __ocml_add_rtn_f32(float, float);
- __device__
- __attribute__((const))
- float __ocml_add_rtp_f32(float, float);
- __device__
- __attribute__((const))
- float __ocml_add_rtz_f32(float, float);
- __device__
- __attribute__((const))
- float __ocml_sub_rte_f32(float, float);
- __device__
- __attribute__((const))
- float __ocml_sub_rtn_f32(float, float);
- __device__
- __attribute__((const))
- float __ocml_sub_rtp_f32(float, float);
- __device__
- __attribute__((const))
- float __ocml_sub_rtz_f32(float, float);
- __device__
- __attribute__((const))
- float __ocml_mul_rte_f32(float, float);
- __device__
- __attribute__((const))
- float __ocml_mul_rtn_f32(float, float);
- __device__
- __attribute__((const))
- float __ocml_mul_rtp_f32(float, float);
- __device__
- __attribute__((const))
- float __ocml_mul_rtz_f32(float, float);
- __device__
- __attribute__((const))
- float __ocml_div_rte_f32(float, float);
- __device__
- __attribute__((const))
- float __ocml_div_rtn_f32(float, float);
- __device__
- __attribute__((const))
- float __ocml_div_rtp_f32(float, float);
- __device__
- __attribute__((const))
- float __ocml_div_rtz_f32(float, float);
- __device__
- __attribute__((const))
- float __ocml_sqrt_rte_f32(float);
- __device__
- __attribute__((const))
- float __ocml_sqrt_rtn_f32(float);
- __device__
- __attribute__((const))
- float __ocml_sqrt_rtp_f32(float);
- __device__
- __attribute__((const))
- float __ocml_sqrt_rtz_f32(float);
- __device__
- __attribute__((const))
- float __ocml_fma_rte_f32(float, float, float);
- __device__
- __attribute__((const))
- float __ocml_fma_rtn_f32(float, float, float);
- __device__
- __attribute__((const))
- float __ocml_fma_rtp_f32(float, float, float);
- __device__
- __attribute__((const))
- float __ocml_fma_rtz_f32(float, float, float);
- // END INTRINSICS
- // END FLOAT
- // BEGIN DOUBLE
- __device__
- __attribute__((const))
- double __ocml_acos_f64(double);
- __device__
- __attribute__((pure))
- double __ocml_acosh_f64(double);
- __device__
- __attribute__((const))
- double __ocml_asin_f64(double);
- __device__
- __attribute__((pure))
- double __ocml_asinh_f64(double);
- __device__
- __attribute__((const))
- double __ocml_atan2_f64(double, double);
- __device__
- __attribute__((const))
- double __ocml_atan_f64(double);
- __device__
- __attribute__((pure))
- double __ocml_atanh_f64(double);
- __device__
- __attribute__((pure))
- double __ocml_cbrt_f64(double);
- __device__
- __attribute__((const))
- double __ocml_ceil_f64(double);
- __device__
- __attribute__((const))
- double __ocml_copysign_f64(double, double);
- __device__
- double __ocml_cos_f64(double);
- __device__
- __attribute__((pure))
- double __ocml_cosh_f64(double);
- __device__
- double __ocml_cospi_f64(double);
- __device__
- double __ocml_i0_f64(double);
- __device__
- double __ocml_i1_f64(double);
- __device__
- __attribute__((pure))
- double __ocml_erfc_f64(double);
- __device__
- __attribute__((pure))
- double __ocml_erfcinv_f64(double);
- __device__
- __attribute__((pure))
- double __ocml_erfcx_f64(double);
- __device__
- __attribute__((pure))
- double __ocml_erf_f64(double);
- __device__
- __attribute__((pure))
- double __ocml_erfinv_f64(double);
- __device__
- __attribute__((pure))
- double __ocml_exp10_f64(double);
- __device__
- __attribute__((pure))
- double __ocml_exp2_f64(double);
- __device__
- __attribute__((pure))
- double __ocml_exp_f64(double);
- __device__
- __attribute__((pure))
- double __ocml_expm1_f64(double);
- __device__
- __attribute__((const))
- double __ocml_fabs_f64(double);
- __device__
- __attribute__((const))
- double __ocml_fdim_f64(double, double);
- __device__
- __attribute__((const))
- double __ocml_floor_f64(double);
- __device__
- __attribute__((const))
- double __ocml_fma_f64(double, double, double);
- __device__
- __attribute__((const))
- double __ocml_fmax_f64(double, double);
- __device__
- __attribute__((const))
- double __ocml_fmin_f64(double, double);
- __device__
- __attribute__((const))
- double __ocml_fmod_f64(double, double);
- __device__
- double __ocml_frexp_f64(double, __attribute__((address_space(5))) int*);
- __device__
- __attribute__((const))
- double __ocml_hypot_f64(double, double);
- __device__
- __attribute__((const))
- int __ocml_ilogb_f64(double);
- __device__
- __attribute__((const))
- int __ocml_isfinite_f64(double);
- __device__
- __attribute__((const))
- int __ocml_isinf_f64(double);
- __device__
- __attribute__((const))
- int __ocml_isnan_f64(double);
- __device__
- double __ocml_j0_f64(double);
- __device__
- double __ocml_j1_f64(double);
- __device__
- __attribute__((const))
- double __ocml_ldexp_f64(double, int);
- __device__
- double __ocml_lgamma_f64(double);
- __device__
- __attribute__((pure))
- double __ocml_log10_f64(double);
- __device__
- __attribute__((pure))
- double __ocml_log1p_f64(double);
- __device__
- __attribute__((pure))
- double __ocml_log2_f64(double);
- __device__
- __attribute__((const))
- double __ocml_logb_f64(double);
- __device__
- __attribute__((pure))
- double __ocml_log_f64(double);
- __device__
- double __ocml_modf_f64(double, __attribute__((address_space(5))) double*);
- __device__
- __attribute__((const))
- double __ocml_nearbyint_f64(double);
- __device__
- __attribute__((const))
- double __ocml_nextafter_f64(double, double);
- __device__
- __attribute__((const))
- double __ocml_len3_f64(double, double, double);
- __device__
- __attribute__((const))
- double __ocml_len4_f64(double, double, double, double);
- __device__
- __attribute__((pure))
- double __ocml_ncdf_f64(double);
- __device__
- __attribute__((pure))
- double __ocml_ncdfinv_f64(double);
- __device__
- __attribute__((pure))
- double __ocml_pow_f64(double, double);
- __device__
- __attribute__((pure))
- double __ocml_pown_f64(double, int);
- __device__
- __attribute__((pure))
- double __ocml_rcbrt_f64(double);
- __device__
- __attribute__((const))
- double __ocml_remainder_f64(double, double);
- __device__
- double __ocml_remquo_f64(
- double, double, __attribute__((address_space(5))) int*);
- __device__
- __attribute__((const))
- double __ocml_rhypot_f64(double, double);
- __device__
- __attribute__((const))
- double __ocml_rint_f64(double);
- __device__
- __attribute__((const))
- double __ocml_rlen3_f64(double, double, double);
- __device__
- __attribute__((const))
- double __ocml_rlen4_f64(double, double, double, double);
- __device__
- __attribute__((const))
- double __ocml_round_f64(double);
- __device__
- __attribute__((pure))
- double __ocml_rsqrt_f64(double);
- __device__
- __attribute__((const))
- double __ocml_scalb_f64(double, double);
- __device__
- __attribute__((const))
- double __ocml_scalbn_f64(double, int);
- __device__
- __attribute__((const))
- int __ocml_signbit_f64(double);
- __device__
- double __ocml_sincos_f64(double, __attribute__((address_space(5))) double*);
- __device__
- double __ocml_sincospi_f64(double, __attribute__((address_space(5))) double*);
- __device__
- double __ocml_sin_f64(double);
- __device__
- __attribute__((pure))
- double __ocml_sinh_f64(double);
- __device__
- double __ocml_sinpi_f64(double);
- __device__
- __attribute__((const))
- double __ocml_sqrt_f64(double);
- __device__
- double __ocml_tan_f64(double);
- __device__
- __attribute__((pure))
- double __ocml_tanh_f64(double);
- __device__
- double __ocml_tgamma_f64(double);
- __device__
- __attribute__((const))
- double __ocml_trunc_f64(double);
- __device__
- double __ocml_y0_f64(double);
- __device__
- double __ocml_y1_f64(double);
- // BEGIN INTRINSICS
- __device__
- __attribute__((const))
- double __ocml_add_rte_f64(double, double);
- __device__
- __attribute__((const))
- double __ocml_add_rtn_f64(double, double);
- __device__
- __attribute__((const))
- double __ocml_add_rtp_f64(double, double);
- __device__
- __attribute__((const))
- double __ocml_add_rtz_f64(double, double);
- __device__
- __attribute__((const))
- double __ocml_sub_rte_f64(double, double);
- __device__
- __attribute__((const))
- double __ocml_sub_rtn_f64(double, double);
- __device__
- __attribute__((const))
- double __ocml_sub_rtp_f64(double, double);
- __device__
- __attribute__((const))
- double __ocml_sub_rtz_f64(double, double);
- __device__
- __attribute__((const))
- double __ocml_mul_rte_f64(double, double);
- __device__
- __attribute__((const))
- double __ocml_mul_rtn_f64(double, double);
- __device__
- __attribute__((const))
- double __ocml_mul_rtp_f64(double, double);
- __device__
- __attribute__((const))
- double __ocml_mul_rtz_f64(double, double);
- __device__
- __attribute__((const))
- double __ocml_div_rte_f64(double, double);
- __device__
- __attribute__((const))
- double __ocml_div_rtn_f64(double, double);
- __device__
- __attribute__((const))
- double __ocml_div_rtp_f64(double, double);
- __device__
- __attribute__((const))
- double __ocml_div_rtz_f64(double, double);
- __device__
- __attribute__((const))
- double __ocml_sqrt_rte_f64(double);
- __device__
- __attribute__((const))
- double __ocml_sqrt_rtn_f64(double);
- __device__
- __attribute__((const))
- double __ocml_sqrt_rtp_f64(double);
- __device__
- __attribute__((const))
- double __ocml_sqrt_rtz_f64(double);
- __device__
- __attribute__((const))
- double __ocml_fma_rte_f64(double, double, double);
- __device__
- __attribute__((const))
- double __ocml_fma_rtn_f64(double, double, double);
- __device__
- __attribute__((const))
- double __ocml_fma_rtp_f64(double, double, double);
- __device__
- __attribute__((const))
- double __ocml_fma_rtz_f64(double, double, double);
- // END INTRINSICS
- // END DOUBLE
- #endif // !__CLANG_HIP_RUNTIME_WRAPPER_INCLUDED__
- #if defined(__cplusplus)
- } // extern "C"
- #endif
- /*
- Copyright (c) 2015 - 2023 Advanced Micro Devices, Inc. All rights reserved.
- Permission is hereby granted, free of charge, to any person obtaining a copy
- of this software and associated documentation files (the "Software"), to deal
- in the Software without restriction, including without limitation the rights
- to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
- copies of the Software, and to permit persons to whom the Software is
- furnished to do so, subject to the following conditions:
- The above copyright notice and this permission notice shall be included in
- all copies or substantial portions of the Software.
- THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
- AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
- OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
- THE SOFTWARE.
- */
- /**
- * @file amd_detail/device_library_decls.h
- * @brief Contains declarations for types and functions in device library.
- * Uses int64_t and uint64_t instead of long, long long, unsigned
- * long and unsigned long long types for device library API
- * declarations.
- */
- #ifndef HIP_INCLUDE_HIP_AMD_DETAIL_DEVICE_LIBRARY_DECLS_H
- #define HIP_INCLUDE_HIP_AMD_DETAIL_DEVICE_LIBRARY_DECLS_H
- #if !defined(__HIPCC_RTC__)
- #include "hip/amd_detail/host_defines.h"
- #endif
- typedef unsigned char uchar;
- typedef unsigned short ushort;
- typedef unsigned int uint;
- typedef unsigned long ulong;
- typedef unsigned long long ullong;
- extern "C" __device__ __attribute__((const)) bool __ockl_wfany_i32(int);
- extern "C" __device__ __attribute__((const)) bool __ockl_wfall_i32(int);
- extern "C" __device__ uint __ockl_activelane_u32(void);
- extern "C" __device__ __attribute__((const)) uint __ockl_mul24_u32(uint, uint);
- extern "C" __device__ __attribute__((const)) int __ockl_mul24_i32(int, int);
- extern "C" __device__ __attribute__((const)) uint __ockl_mul_hi_u32(uint, uint);
- extern "C" __device__ __attribute__((const)) int __ockl_mul_hi_i32(int, int);
- extern "C" __device__ __attribute__((const)) uint __ockl_sadd_u32(uint, uint, uint);
- extern "C" __device__ __attribute__((const)) uchar __ockl_clz_u8(uchar);
- extern "C" __device__ __attribute__((const)) ushort __ockl_clz_u16(ushort);
- extern "C" __device__ __attribute__((const)) uint __ockl_clz_u32(uint);
- extern "C" __device__ __attribute__((const)) uint64_t __ockl_clz_u64(uint64_t);
- extern "C" __device__ __attribute__((const)) float __ocml_floor_f32(float);
- extern "C" __device__ __attribute__((const)) float __ocml_rint_f32(float);
- extern "C" __device__ __attribute__((const)) float __ocml_ceil_f32(float);
- extern "C" __device__ __attribute__((const)) float __ocml_trunc_f32(float);
- extern "C" __device__ __attribute__((const)) float __ocml_fmin_f32(float, float);
- extern "C" __device__ __attribute__((const)) float __ocml_fmax_f32(float, float);
- extern "C" __device__ __attribute__((const)) float __ocml_cvtrtn_f32_f64(double);
- extern "C" __device__ __attribute__((const)) float __ocml_cvtrtp_f32_f64(double);
- extern "C" __device__ __attribute__((const)) float __ocml_cvtrtz_f32_f64(double);
- extern "C" __device__ __attribute__((const)) _Float16 __ocml_cvtrtn_f16_f32(float);
- extern "C" __device__ __attribute__((const)) _Float16 __ocml_cvtrtp_f16_f32(float);
- extern "C" __device__ __attribute__((const)) _Float16 __ocml_cvtrtz_f16_f32(float);
- extern "C" __device__ __attribute__((const)) float __ocml_cvtrtn_f32_s32(int);
- extern "C" __device__ __attribute__((const)) float __ocml_cvtrtp_f32_s32(int);
- extern "C" __device__ __attribute__((const)) float __ocml_cvtrtz_f32_s32(int);
- extern "C" __device__ __attribute__((const)) float __ocml_cvtrtn_f32_u32(uint32_t);
- extern "C" __device__ __attribute__((const)) float __ocml_cvtrtp_f32_u32(uint32_t);
- extern "C" __device__ __attribute__((const)) float __ocml_cvtrtz_f32_u32(uint32_t);
- extern "C" __device__ __attribute__((const)) float __ocml_cvtrtn_f32_s64(int64_t);
- extern "C" __device__ __attribute__((const)) float __ocml_cvtrtp_f32_s64(int64_t);
- extern "C" __device__ __attribute__((const)) float __ocml_cvtrtz_f32_s64(int64_t);
- extern "C" __device__ __attribute__((const)) float __ocml_cvtrtn_f32_u64(uint64_t);
- extern "C" __device__ __attribute__((const)) float __ocml_cvtrtp_f32_u64(uint64_t);
- extern "C" __device__ __attribute__((const)) float __ocml_cvtrtz_f32_u64(uint64_t);
- extern "C" __device__ __attribute__((const)) double __ocml_cvtrtn_f64_s64(int64_t);
- extern "C" __device__ __attribute__((const)) double __ocml_cvtrtp_f64_s64(int64_t);
- extern "C" __device__ __attribute__((const)) double __ocml_cvtrtz_f64_s64(int64_t);
- extern "C" __device__ __attribute__((const)) double __ocml_cvtrtn_f64_u64(uint64_t);
- extern "C" __device__ __attribute__((const)) double __ocml_cvtrtp_f64_u64(uint64_t);
- extern "C" __device__ __attribute__((const)) double __ocml_cvtrtz_f64_u64(uint64_t);
- extern "C" __device__ __attribute__((convergent)) void __ockl_gws_init(uint nwm1, uint rid);
- extern "C" __device__ __attribute__((convergent)) void __ockl_gws_barrier(uint nwm1, uint rid);
- extern "C" __device__ __attribute__((const)) uint32_t __ockl_lane_u32();
- extern "C" __device__ __attribute__((const)) int __ockl_grid_is_valid(void);
- extern "C" __device__ __attribute__((convergent)) void __ockl_grid_sync(void);
- extern "C" __device__ __attribute__((const)) uint __ockl_multi_grid_num_grids(void);
- extern "C" __device__ __attribute__((const)) uint __ockl_multi_grid_grid_rank(void);
- extern "C" __device__ __attribute__((const)) uint __ockl_multi_grid_size(void);
- extern "C" __device__ __attribute__((const)) uint __ockl_multi_grid_thread_rank(void);
- extern "C" __device__ __attribute__((const)) int __ockl_multi_grid_is_valid(void);
- extern "C" __device__ __attribute__((convergent)) void __ockl_multi_grid_sync(void);
- extern "C" __device__ void __ockl_atomic_add_noret_f32(float*, float);
- extern "C" __device__ __attribute__((convergent)) int __ockl_wgred_add_i32(int a);
- extern "C" __device__ __attribute__((convergent)) int __ockl_wgred_and_i32(int a);
- extern "C" __device__ __attribute__((convergent)) int __ockl_wgred_or_i32(int a);
- extern "C" __device__ uint64_t __ockl_fprintf_stderr_begin();
- extern "C" __device__ uint64_t __ockl_fprintf_append_args(uint64_t msg_desc, uint32_t num_args,
- uint64_t value0, uint64_t value1,
- uint64_t value2, uint64_t value3,
- uint64_t value4, uint64_t value5,
- uint64_t value6, uint32_t is_last);
- extern "C" __device__ uint64_t __ockl_fprintf_append_string_n(uint64_t msg_desc, const char* data,
- uint64_t length, uint32_t is_last);
- // Introduce local address space
- #define __local __attribute__((address_space(3)))
- #ifdef __HIP_DEVICE_COMPILE__
- __device__ inline static __local void* __to_local(unsigned x) { return (__local void*)x; }
- #endif //__HIP_DEVICE_COMPILE__
- // Using hip.amdgcn.bc - sync threads
- #define __CLK_LOCAL_MEM_FENCE 0x01
- typedef unsigned __cl_mem_fence_flags;
- #endif
- /*
- Copyright (c) 2015 - 2023 Advanced Micro Devices, Inc. All rights reserved.
- Permission is hereby granted, free of charge, to any person obtaining a copy
- of this software and associated documentation files (the "Software"), to deal
- in the Software without restriction, including without limitation the rights
- to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
- copies of the Software, and to permit persons to whom the Software is
- furnished to do so, subject to the following conditions:
- The above copyright notice and this permission notice shall be included in
- all copies or substantial portions of the Software.
- THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
- AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
- OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
- THE SOFTWARE.
- */
- #ifndef HIP_INCLUDE_HIP_AMD_DETAIL_DEVICE_FUNCTIONS_H
- #define HIP_INCLUDE_HIP_AMD_DETAIL_DEVICE_FUNCTIONS_H
- #if !defined(__HIPCC_RTC__)
- #include <hip/amd_detail/amd_hip_common.h>
- #include "host_defines.h"
- #include "math_fwd.h"
- #include <hip/hip_runtime_api.h>
- #include <stddef.h>
- #include <hip/hip_vector_types.h>
- #include <hip/amd_detail/device_library_decls.h>
- #endif // !defined(__HIPCC_RTC__)
- #if defined(__clang__) && defined(__HIP__)
- extern "C" __device__ int printf(const char *fmt, ...);
- #else
- template <typename... All>
- static inline __device__ void printf(const char* format, All... all) {}
- #endif // __HIP_CLANG_ONLY__
- extern "C" __device__ unsigned long long __ockl_steadyctr_u64();
- /*
- Integer Intrinsics
- */
- // integer intrinsic function __poc __clz __ffs __brev
- __device__ static inline unsigned int __popc(unsigned int input) {
- return __builtin_popcount(input);
- }
- __device__ static inline unsigned int __popcll(unsigned long long int input) {
- return __builtin_popcountll(input);
- }
- __device__ static inline int __clz(int input) {
- return __ockl_clz_u32((uint)input);
- }
- __device__ static inline int __clzll(long long int input) {
- return __ockl_clz_u64((uint64_t)input);
- }
- __device__ static inline unsigned int __ffs(unsigned int input) {
- return ( input == 0 ? -1 : __builtin_ctz(input) ) + 1;
- }
- __device__ static inline unsigned int __ffsll(unsigned long long int input) {
- return ( input == 0 ? -1 : __builtin_ctzll(input) ) + 1;
- }
- __device__ static inline unsigned int __ffs(int input) {
- return ( input == 0 ? -1 : __builtin_ctz(input) ) + 1;
- }
- __device__ static inline unsigned int __ffsll(long long int input) {
- return ( input == 0 ? -1 : __builtin_ctzll(input) ) + 1;
- }
- // Given a 32/64-bit value exec mask and an integer value base (between 0 and WAVEFRONT_SIZE),
- // find the n-th (given by offset) set bit in the exec mask from the base bit, and return the bit position.
- // If not found, return -1.
- __device__ static int32_t __fns64(uint64_t mask, uint32_t base, int32_t offset) {
- uint64_t temp_mask = mask;
- int32_t temp_offset = offset;
- if (offset == 0) {
- temp_mask &= (1 << base);
- temp_offset = 1;
- }
- else if (offset < 0) {
- temp_mask = __builtin_bitreverse64(mask);
- base = 63 - base;
- temp_offset = -offset;
- }
- temp_mask = temp_mask & ((~0ULL) << base);
- if (__builtin_popcountll(temp_mask) < temp_offset)
- return -1;
- int32_t total = 0;
- for (int i = 0x20; i > 0; i >>= 1) {
- uint64_t temp_mask_lo = temp_mask & ((1ULL << i) - 1);
- int32_t pcnt = __builtin_popcountll(temp_mask_lo);
- if (pcnt < temp_offset) {
- temp_mask = temp_mask >> i;
- temp_offset -= pcnt;
- total += i;
- }
- else {
- temp_mask = temp_mask_lo;
- }
- }
- if (offset < 0)
- return 63 - total;
- else
- return total;
- }
- __device__ static int32_t __fns32(uint64_t mask, uint32_t base, int32_t offset) {
- uint64_t temp_mask = mask;
- int32_t temp_offset = offset;
- if (offset == 0) {
- temp_mask &= (1 << base);
- temp_offset = 1;
- }
- else if (offset < 0) {
- temp_mask = __builtin_bitreverse64(mask);
- base = 63 - base;
- temp_offset = -offset;
- }
- temp_mask = temp_mask & ((~0ULL) << base);
- if (__builtin_popcountll(temp_mask) < temp_offset)
- return -1;
- int32_t total = 0;
- for (int i = 0x20; i > 0; i >>= 1) {
- uint64_t temp_mask_lo = temp_mask & ((1ULL << i) - 1);
- int32_t pcnt = __builtin_popcountll(temp_mask_lo);
- if (pcnt < temp_offset) {
- temp_mask = temp_mask >> i;
- temp_offset -= pcnt;
- total += i;
- }
- else {
- temp_mask = temp_mask_lo;
- }
- }
- if (offset < 0)
- return 63 - total;
- else
- return total;
- }
- __device__ static inline unsigned int __brev(unsigned int input) {
- return __builtin_bitreverse32(input);
- }
- __device__ static inline unsigned long long int __brevll(unsigned long long int input) {
- return __builtin_bitreverse64(input);
- }
- __device__ static inline unsigned int __lastbit_u32_u64(uint64_t input) {
- return input == 0 ? -1 : __builtin_ctzl(input);
- }
- __device__ static inline unsigned int __bitextract_u32(unsigned int src0, unsigned int src1, unsigned int src2) {
- uint32_t offset = src1 & 31;
- uint32_t width = src2 & 31;
- return width == 0 ? 0 : (src0 << (32 - offset - width)) >> (32 - width);
- }
- __device__ static inline uint64_t __bitextract_u64(uint64_t src0, unsigned int src1, unsigned int src2) {
- uint64_t offset = src1 & 63;
- uint64_t width = src2 & 63;
- return width == 0 ? 0 : (src0 << (64 - offset - width)) >> (64 - width);
- }
- __device__ static inline unsigned int __bitinsert_u32(unsigned int src0, unsigned int src1, unsigned int src2, unsigned int src3) {
- uint32_t offset = src2 & 31;
- uint32_t width = src3 & 31;
- uint32_t mask = (1 << width) - 1;
- return ((src0 & ~(mask << offset)) | ((src1 & mask) << offset));
- }
- __device__ static inline uint64_t __bitinsert_u64(uint64_t src0, uint64_t src1, unsigned int src2, unsigned int src3) {
- uint64_t offset = src2 & 63;
- uint64_t width = src3 & 63;
- uint64_t mask = (1ULL << width) - 1;
- return ((src0 & ~(mask << offset)) | ((src1 & mask) << offset));
- }
- __device__ inline unsigned int __funnelshift_l(unsigned int lo, unsigned int hi, unsigned int shift)
- {
- uint32_t mask_shift = shift & 31;
- return mask_shift == 0 ? hi : __builtin_amdgcn_alignbit(hi, lo, 32 - mask_shift);
- }
- __device__ inline unsigned int __funnelshift_lc(unsigned int lo, unsigned int hi, unsigned int shift)
- {
- uint32_t min_shift = shift >= 32 ? 32 : shift;
- return min_shift == 0 ? hi : __builtin_amdgcn_alignbit(hi, lo, 32 - min_shift);
- }
- __device__ inline unsigned int __funnelshift_r(unsigned int lo, unsigned int hi, unsigned int shift)
- {
- return __builtin_amdgcn_alignbit(hi, lo, shift);
- }
- __device__ inline unsigned int __funnelshift_rc(unsigned int lo, unsigned int hi, unsigned int shift)
- {
- return shift >= 32 ? hi : __builtin_amdgcn_alignbit(hi, lo, shift);
- }
- __device__ static unsigned int __byte_perm(unsigned int x, unsigned int y, unsigned int s);
- __device__ static unsigned int __hadd(int x, int y);
- __device__ static int __mul24(int x, int y);
- __device__ static long long int __mul64hi(long long int x, long long int y);
- __device__ static int __mulhi(int x, int y);
- __device__ static int __rhadd(int x, int y);
- __device__ static unsigned int __sad(int x, int y,unsigned int z);
- __device__ static unsigned int __uhadd(unsigned int x, unsigned int y);
- __device__ static int __umul24(unsigned int x, unsigned int y);
- __device__ static unsigned long long int __umul64hi(unsigned long long int x, unsigned long long int y);
- __device__ static unsigned int __umulhi(unsigned int x, unsigned int y);
- __device__ static unsigned int __urhadd(unsigned int x, unsigned int y);
- __device__ static unsigned int __usad(unsigned int x, unsigned int y, unsigned int z);
- struct ucharHolder {
- union {
- unsigned char c[4];
- unsigned int ui;
- };
- } __attribute__((aligned(4)));
- struct uchar2Holder {
- union {
- unsigned int ui[2];
- unsigned char c[8];
- };
- } __attribute__((aligned(8)));
- __device__
- static inline unsigned int __byte_perm(unsigned int x, unsigned int y, unsigned int s) {
- struct uchar2Holder cHoldVal;
- struct ucharHolder cHoldKey;
- cHoldKey.ui = s;
- cHoldVal.ui[0] = x;
- cHoldVal.ui[1] = y;
- unsigned int result;
- result = cHoldVal.c[cHoldKey.c[0] & 0x07];
- result += (cHoldVal.c[(cHoldKey.c[0] & 0x70) >> 4] << 8);
- result += (cHoldVal.c[cHoldKey.c[1] & 0x07] << 16);
- result += (cHoldVal.c[(cHoldKey.c[1] & 0x70) >> 4] << 24);
- return result;
- }
- __device__ static inline unsigned int __hadd(int x, int y) {
- int z = x + y;
- int sign = z & 0x8000000;
- int value = z & 0x7FFFFFFF;
- return ((value) >> 1 || sign);
- }
- __device__ static inline int __mul24(int x, int y) {
- return __ockl_mul24_i32(x, y);
- }
- __device__ static inline long long __mul64hi(long long int x, long long int y) {
- ulong x0 = (ulong)x & 0xffffffffUL;
- long x1 = x >> 32;
- ulong y0 = (ulong)y & 0xffffffffUL;
- long y1 = y >> 32;
- ulong z0 = x0*y0;
- long t = x1*y0 + (z0 >> 32);
- long z1 = t & 0xffffffffL;
- long z2 = t >> 32;
- z1 = x0*y1 + z1;
- return x1*y1 + z2 + (z1 >> 32);
- }
- __device__ static inline int __mulhi(int x, int y) {
- return __ockl_mul_hi_i32(x, y);
- }
- __device__ static inline int __rhadd(int x, int y) {
- int z = x + y + 1;
- int sign = z & 0x8000000;
- int value = z & 0x7FFFFFFF;
- return ((value) >> 1 || sign);
- }
- __device__ static inline unsigned int __sad(int x, int y, unsigned int z) {
- return x > y ? x - y + z : y - x + z;
- }
- __device__ static inline unsigned int __uhadd(unsigned int x, unsigned int y) {
- return (x + y) >> 1;
- }
- __device__ static inline int __umul24(unsigned int x, unsigned int y) {
- return __ockl_mul24_u32(x, y);
- }
- __device__
- static inline unsigned long long __umul64hi(unsigned long long int x, unsigned long long int y) {
- ulong x0 = x & 0xffffffffUL;
- ulong x1 = x >> 32;
- ulong y0 = y & 0xffffffffUL;
- ulong y1 = y >> 32;
- ulong z0 = x0*y0;
- ulong t = x1*y0 + (z0 >> 32);
- ulong z1 = t & 0xffffffffUL;
- ulong z2 = t >> 32;
- z1 = x0*y1 + z1;
- return x1*y1 + z2 + (z1 >> 32);
- }
- __device__ static inline unsigned int __umulhi(unsigned int x, unsigned int y) {
- return __ockl_mul_hi_u32(x, y);
- }
- __device__ static inline unsigned int __urhadd(unsigned int x, unsigned int y) {
- return (x + y + 1) >> 1;
- }
- __device__ static inline unsigned int __usad(unsigned int x, unsigned int y, unsigned int z) {
- return __ockl_sadd_u32(x, y, z);
- }
- __device__ static inline unsigned int __lane_id() {
- return __builtin_amdgcn_mbcnt_hi(
- -1, __builtin_amdgcn_mbcnt_lo(-1, 0));
- }
- __device__
- static inline unsigned int __mbcnt_lo(unsigned int x, unsigned int y) {return __builtin_amdgcn_mbcnt_lo(x,y);};
- __device__
- static inline unsigned int __mbcnt_hi(unsigned int x, unsigned int y) {return __builtin_amdgcn_mbcnt_hi(x,y);};
- /*
- HIP specific device functions
- */
- #if !defined(__HIPCC_RTC__)
- #include "amd_warp_functions.h"
- #endif
- #define MASK1 0x00ff00ff
- #define MASK2 0xff00ff00
- __device__ static inline char4 __hip_hc_add8pk(char4 in1, char4 in2) {
- char4 out;
- unsigned one1 = in1.w & MASK1;
- unsigned one2 = in2.w & MASK1;
- out.w = (one1 + one2) & MASK1;
- one1 = in1.w & MASK2;
- one2 = in2.w & MASK2;
- out.w = out.w | ((one1 + one2) & MASK2);
- return out;
- }
- __device__ static inline char4 __hip_hc_sub8pk(char4 in1, char4 in2) {
- char4 out;
- unsigned one1 = in1.w & MASK1;
- unsigned one2 = in2.w & MASK1;
- out.w = (one1 - one2) & MASK1;
- one1 = in1.w & MASK2;
- one2 = in2.w & MASK2;
- out.w = out.w | ((one1 - one2) & MASK2);
- return out;
- }
- __device__ static inline char4 __hip_hc_mul8pk(char4 in1, char4 in2) {
- char4 out;
- unsigned one1 = in1.w & MASK1;
- unsigned one2 = in2.w & MASK1;
- out.w = (one1 * one2) & MASK1;
- one1 = in1.w & MASK2;
- one2 = in2.w & MASK2;
- out.w = out.w | ((one1 * one2) & MASK2);
- return out;
- }
- __device__ static inline float __double2float_rd(double x) {
- return __ocml_cvtrtn_f32_f64(x);
- }
- __device__ static inline float __double2float_rn(double x) { return x; }
- __device__ static inline float __double2float_ru(double x) {
- return __ocml_cvtrtp_f32_f64(x);
- }
- __device__ static inline float __double2float_rz(double x) {
- return __ocml_cvtrtz_f32_f64(x);
- }
- __device__ static inline int __double2hiint(double x) {
- static_assert(sizeof(double) == 2 * sizeof(int), "");
- int tmp[2];
- __builtin_memcpy(tmp, &x, sizeof(tmp));
- return tmp[1];
- }
- __device__ static inline int __double2loint(double x) {
- static_assert(sizeof(double) == 2 * sizeof(int), "");
- int tmp[2];
- __builtin_memcpy(tmp, &x, sizeof(tmp));
- return tmp[0];
- }
- __device__ static inline int __double2int_rd(double x) { return (int)__ocml_floor_f64(x); }
- __device__ static inline int __double2int_rn(double x) { return (int)__ocml_rint_f64(x); }
- __device__ static inline int __double2int_ru(double x) { return (int)__ocml_ceil_f64(x); }
- __device__ static inline int __double2int_rz(double x) { return (int)x; }
- __device__ static inline long long int __double2ll_rd(double x) {
- return (long long)__ocml_floor_f64(x);
- }
- __device__ static inline long long int __double2ll_rn(double x) {
- return (long long)__ocml_rint_f64(x);
- }
- __device__ static inline long long int __double2ll_ru(double x) {
- return (long long)__ocml_ceil_f64(x);
- }
- __device__ static inline long long int __double2ll_rz(double x) { return (long long)x; }
- __device__ static inline unsigned int __double2uint_rd(double x) {
- return (unsigned int)__ocml_floor_f64(x);
- }
- __device__ static inline unsigned int __double2uint_rn(double x) {
- return (unsigned int)__ocml_rint_f64(x);
- }
- __device__ static inline unsigned int __double2uint_ru(double x) {
- return (unsigned int)__ocml_ceil_f64(x);
- }
- __device__ static inline unsigned int __double2uint_rz(double x) { return (unsigned int)x; }
- __device__ static inline unsigned long long int __double2ull_rd(double x) {
- return (unsigned long long int)__ocml_floor_f64(x);
- }
- __device__ static inline unsigned long long int __double2ull_rn(double x) {
- return (unsigned long long int)__ocml_rint_f64(x);
- }
- __device__ static inline unsigned long long int __double2ull_ru(double x) {
- return (unsigned long long int)__ocml_ceil_f64(x);
- }
- __device__ static inline unsigned long long int __double2ull_rz(double x) {
- return (unsigned long long int)x;
- }
- __device__ static inline long long int __double_as_longlong(double x) {
- static_assert(sizeof(long long) == sizeof(double), "");
- long long tmp;
- __builtin_memcpy(&tmp, &x, sizeof(tmp));
- return tmp;
- }
- /*
- __device__ unsigned short __float2half_rn(float x);
- __device__ float __half2float(unsigned short);
- The above device function are not a valid .
- Use
- __device__ __half __float2half_rn(float x);
- __device__ float __half2float(__half);
- from hip_fp16.h
- CUDA implements half as unsigned short whereas, HIP doesn't.
- */
- __device__ static inline int __float2int_rd(float x) { return (int)__ocml_floor_f32(x); }
- __device__ static inline int __float2int_rn(float x) { return (int)__ocml_rint_f32(x); }
- __device__ static inline int __float2int_ru(float x) { return (int)__ocml_ceil_f32(x); }
- __device__ static inline int __float2int_rz(float x) { return (int)__ocml_trunc_f32(x); }
- __device__ static inline long long int __float2ll_rd(float x) {
- return (long long int)__ocml_floor_f32(x);
- }
- __device__ static inline long long int __float2ll_rn(float x) {
- return (long long int)__ocml_rint_f32(x);
- }
- __device__ static inline long long int __float2ll_ru(float x) {
- return (long long int)__ocml_ceil_f32(x);
- }
- __device__ static inline long long int __float2ll_rz(float x) { return (long long int)x; }
- __device__ static inline unsigned int __float2uint_rd(float x) {
- return (unsigned int)__ocml_floor_f32(x);
- }
- __device__ static inline unsigned int __float2uint_rn(float x) {
- return (unsigned int)__ocml_rint_f32(x);
- }
- __device__ static inline unsigned int __float2uint_ru(float x) {
- return (unsigned int)__ocml_ceil_f32(x);
- }
- __device__ static inline unsigned int __float2uint_rz(float x) { return (unsigned int)x; }
- __device__ static inline unsigned long long int __float2ull_rd(float x) {
- return (unsigned long long int)__ocml_floor_f32(x);
- }
- __device__ static inline unsigned long long int __float2ull_rn(float x) {
- return (unsigned long long int)__ocml_rint_f32(x);
- }
- __device__ static inline unsigned long long int __float2ull_ru(float x) {
- return (unsigned long long int)__ocml_ceil_f32(x);
- }
- __device__ static inline unsigned long long int __float2ull_rz(float x) {
- return (unsigned long long int)x;
- }
- __device__ static inline int __float_as_int(float x) {
- static_assert(sizeof(int) == sizeof(float), "");
- int tmp;
- __builtin_memcpy(&tmp, &x, sizeof(tmp));
- return tmp;
- }
- __device__ static inline unsigned int __float_as_uint(float x) {
- static_assert(sizeof(unsigned int) == sizeof(float), "");
- unsigned int tmp;
- __builtin_memcpy(&tmp, &x, sizeof(tmp));
- return tmp;
- }
- __device__ static inline double __hiloint2double(int hi, int lo) {
- static_assert(sizeof(double) == sizeof(uint64_t), "");
- uint64_t tmp0 = (static_cast<uint64_t>(hi) << 32ull) | static_cast<uint32_t>(lo);
- double tmp1;
- __builtin_memcpy(&tmp1, &tmp0, sizeof(tmp0));
- return tmp1;
- }
- __device__ static inline double __int2double_rn(int x) { return (double)x; }
- __device__ static inline float __int2float_rd(int x) {
- return __ocml_cvtrtn_f32_s32(x);
- }
- __device__ static inline float __int2float_rn(int x) { return (float)x; }
- __device__ static inline float __int2float_ru(int x) {
- return __ocml_cvtrtp_f32_s32(x);
- }
- __device__ static inline float __int2float_rz(int x) {
- return __ocml_cvtrtz_f32_s32(x);
- }
- __device__ static inline float __int_as_float(int x) {
- static_assert(sizeof(float) == sizeof(int), "");
- float tmp;
- __builtin_memcpy(&tmp, &x, sizeof(tmp));
- return tmp;
- }
- __device__ static inline double __ll2double_rd(long long int x) {
- return __ocml_cvtrtn_f64_s64(x);
- }
- __device__ static inline double __ll2double_rn(long long int x) { return (double)x; }
- __device__ static inline double __ll2double_ru(long long int x) {
- return __ocml_cvtrtp_f64_s64(x);
- }
- __device__ static inline double __ll2double_rz(long long int x) {
- return __ocml_cvtrtz_f64_s64(x);
- }
- __device__ static inline float __ll2float_rd(long long int x) {
- return __ocml_cvtrtn_f32_s64(x);
- }
- __device__ static inline float __ll2float_rn(long long int x) { return (float)x; }
- __device__ static inline float __ll2float_ru(long long int x) {
- return __ocml_cvtrtp_f32_s64(x);
- }
- __device__ static inline float __ll2float_rz(long long int x) {
- return __ocml_cvtrtz_f32_s64(x);
- }
- __device__ static inline double __longlong_as_double(long long int x) {
- static_assert(sizeof(double) == sizeof(long long), "");
- double tmp;
- __builtin_memcpy(&tmp, &x, sizeof(tmp));
- return tmp;
- }
- __device__ static inline double __uint2double_rn(unsigned int x) { return (double)x; }
- __device__ static inline float __uint2float_rd(unsigned int x) {
- return __ocml_cvtrtn_f32_u32(x);
- }
- __device__ static inline float __uint2float_rn(unsigned int x) { return (float)x; }
- __device__ static inline float __uint2float_ru(unsigned int x) {
- return __ocml_cvtrtp_f32_u32(x);
- }
- __device__ static inline float __uint2float_rz(unsigned int x) {
- return __ocml_cvtrtz_f32_u32(x);
- }
- __device__ static inline float __uint_as_float(unsigned int x) {
- static_assert(sizeof(float) == sizeof(unsigned int), "");
- float tmp;
- __builtin_memcpy(&tmp, &x, sizeof(tmp));
- return tmp;
- }
- __device__ static inline double __ull2double_rd(unsigned long long int x) {
- return __ocml_cvtrtn_f64_u64(x);
- }
- __device__ static inline double __ull2double_rn(unsigned long long int x) { return (double)x; }
- __device__ static inline double __ull2double_ru(unsigned long long int x) {
- return __ocml_cvtrtp_f64_u64(x);
- }
- __device__ static inline double __ull2double_rz(unsigned long long int x) {
- return __ocml_cvtrtz_f64_u64(x);
- }
- __device__ static inline float __ull2float_rd(unsigned long long int x) {
- return __ocml_cvtrtn_f32_u64(x);
- }
- __device__ static inline float __ull2float_rn(unsigned long long int x) { return (float)x; }
- __device__ static inline float __ull2float_ru(unsigned long long int x) {
- return __ocml_cvtrtp_f32_u64(x);
- }
- __device__ static inline float __ull2float_rz(unsigned long long int x) {
- return __ocml_cvtrtz_f32_u64(x);
- }
- #if defined(__clang__) && defined(__HIP__)
- // Clock functions
- __device__ long long int __clock64();
- __device__ long long int __clock();
- __device__ long long int clock64();
- __device__ long long int clock();
- __device__ long long int wall_clock64();
- // hip.amdgcn.bc - named sync
- __device__ void __named_sync();
- #ifdef __HIP_DEVICE_COMPILE__
- // Clock function to return GPU core cycle count.
- // GPU can change its core clock frequency at runtime. The maximum frequency can be queried
- // through hipDeviceAttributeClockRate attribute.
- __device__
- inline __attribute((always_inline))
- long long int __clock64() {
- #if __has_builtin(__builtin_amdgcn_s_memtime)
- // Exists on gfx8, gfx9, gfx10.1, gfx10.2, gfx10.3
- return (long long int) __builtin_amdgcn_s_memtime();
- #else
- // Subject to change when better solution available
- return (long long int) __builtin_readcyclecounter();
- #endif
- }
- __device__
- inline __attribute((always_inline))
- long long int __clock() { return __clock64(); }
- // Clock function to return wall clock count at a constant frequency that can be queried
- // through hipDeviceAttributeWallClockRate attribute.
- __device__
- inline __attribute__((always_inline))
- long long int wall_clock64() {
- return (long long int) __ockl_steadyctr_u64();
- }
- __device__
- inline __attribute__((always_inline))
- long long int clock64() { return __clock64(); }
- __device__
- inline __attribute__((always_inline))
- long long int clock() { return __clock(); }
- // hip.amdgcn.bc - named sync
- __device__
- inline
- void __named_sync() { __builtin_amdgcn_s_barrier(); }
- #endif // __HIP_DEVICE_COMPILE__
- // warp vote function __all __any __ballot
- __device__
- inline
- int __all(int predicate) {
- return __ockl_wfall_i32(predicate);
- }
- __device__
- inline
- int __any(int predicate) {
- return __ockl_wfany_i32(predicate);
- }
- // XXX from llvm/include/llvm/IR/InstrTypes.h
- #define ICMP_NE 33
- __device__
- inline
- unsigned long long int __ballot(int predicate) {
- return __builtin_amdgcn_uicmp(predicate, 0, ICMP_NE);
- }
- __device__
- inline
- unsigned long long int __ballot64(int predicate) {
- return __builtin_amdgcn_uicmp(predicate, 0, ICMP_NE);
- }
- // hip.amdgcn.bc - lanemask
- __device__
- inline
- uint64_t __lanemask_gt()
- {
- uint32_t lane = __ockl_lane_u32();
- if (lane == 63)
- return 0;
- uint64_t ballot = __ballot64(1);
- uint64_t mask = (~((uint64_t)0)) << (lane + 1);
- return mask & ballot;
- }
- __device__
- inline
- uint64_t __lanemask_lt()
- {
- uint32_t lane = __ockl_lane_u32();
- int64_t ballot = __ballot64(1);
- uint64_t mask = ((uint64_t)1 << lane) - (uint64_t)1;
- return mask & ballot;
- }
- __device__
- inline
- uint64_t __lanemask_eq()
- {
- uint32_t lane = __ockl_lane_u32();
- int64_t mask = ((uint64_t)1 << lane);
- return mask;
- }
- __device__ inline void* __local_to_generic(void* p) { return p; }
- #ifdef __HIP_DEVICE_COMPILE__
- __device__
- inline
- void* __get_dynamicgroupbaseptr()
- {
- // Get group segment base pointer.
- return (char*)__local_to_generic((void*)__to_local(__builtin_amdgcn_groupstaticsize()));
- }
- #else
- __device__
- void* __get_dynamicgroupbaseptr();
- #endif // __HIP_DEVICE_COMPILE__
- __device__
- inline
- void *__amdgcn_get_dynamicgroupbaseptr() {
- return __get_dynamicgroupbaseptr();
- }
- // Memory Fence Functions
- __device__
- inline
- static void __threadfence()
- {
- __builtin_amdgcn_fence(__ATOMIC_SEQ_CST, "agent");
- }
- __device__
- inline
- static void __threadfence_block()
- {
- __builtin_amdgcn_fence(__ATOMIC_SEQ_CST, "workgroup");
- }
- __device__
- inline
- static void __threadfence_system()
- {
- __builtin_amdgcn_fence(__ATOMIC_SEQ_CST, "");
- }
- // abort
- __device__
- inline
- __attribute__((weak))
- void abort() {
- return __builtin_trap();
- }
- // The noinline attribute helps encapsulate the printf expansion,
- // which otherwise has a performance impact just by increasing the
- // size of the calling function. Additionally, the weak attribute
- // allows the function to exist as a global although its definition is
- // included in every compilation unit.
- #if defined(_WIN32) || defined(_WIN64)
- extern "C" __device__ __attribute__((noinline)) __attribute__((weak))
- void _wassert(const wchar_t *_msg, const wchar_t *_file, unsigned _line) {
- // FIXME: Need `wchar_t` support to generate assertion message.
- __builtin_trap();
- }
- #else /* defined(_WIN32) || defined(_WIN64) */
- extern "C" __device__ __attribute__((noinline)) __attribute__((weak))
- void __assert_fail(const char *assertion,
- const char *file,
- unsigned int line,
- const char *function)
- {
- const char fmt[] = "%s:%u: %s: Device-side assertion `%s' failed.\n";
- // strlen is not available as a built-in yet, so we create our own
- // loop in a macro. With a string literal argument, the compiler
- // usually manages to replace the loop with a constant.
- //
- // The macro does not check for null pointer, since all the string
- // arguments are defined to be constant literals when called from
- // the assert() macro.
- //
- // NOTE: The loop below includes the null terminator in the length
- // as required by append_string_n().
- #define __hip_get_string_length(LEN, STR) \
- do { \
- const char *tmp = STR; \
- while (*tmp++); \
- LEN = tmp - STR; \
- } while (0)
- auto msg = __ockl_fprintf_stderr_begin();
- int len = 0;
- __hip_get_string_length(len, fmt);
- msg = __ockl_fprintf_append_string_n(msg, fmt, len, 0);
- __hip_get_string_length(len, file);
- msg = __ockl_fprintf_append_string_n(msg, file, len, 0);
- msg = __ockl_fprintf_append_args(msg, 1, line, 0, 0, 0, 0, 0, 0, 0);
- __hip_get_string_length(len, function);
- msg = __ockl_fprintf_append_string_n(msg, function, len, 0);
- __hip_get_string_length(len, assertion);
- __ockl_fprintf_append_string_n(msg, assertion, len, /* is_last = */ 1);
- #undef __hip_get_string_length
- __builtin_trap();
- }
- extern "C" __device__ __attribute__((noinline)) __attribute__((weak))
- void __assertfail()
- {
- // ignore all the args for now.
- __builtin_trap();
- }
- #endif /* defined(_WIN32) || defined(_WIN64) */
- __device__ inline static void __work_group_barrier(__cl_mem_fence_flags flags) {
- if (flags) {
- __builtin_amdgcn_fence(__ATOMIC_RELEASE, "workgroup");
- __builtin_amdgcn_s_barrier();
- __builtin_amdgcn_fence(__ATOMIC_ACQUIRE, "workgroup");
- } else {
- __builtin_amdgcn_s_barrier();
- }
- }
- __device__
- inline
- static void __barrier(int n)
- {
- __work_group_barrier((__cl_mem_fence_flags)n);
- }
- __device__
- inline
- __attribute__((convergent))
- void __syncthreads()
- {
- __barrier(__CLK_LOCAL_MEM_FENCE);
- }
- __device__
- inline
- __attribute__((convergent))
- int __syncthreads_count(int predicate)
- {
- return __ockl_wgred_add_i32(!!predicate);
- }
- __device__
- inline
- __attribute__((convergent))
- int __syncthreads_and(int predicate)
- {
- return __ockl_wgred_and_i32(!!predicate);
- }
- __device__
- inline
- __attribute__((convergent))
- int __syncthreads_or(int predicate)
- {
- return __ockl_wgred_or_i32(!!predicate);
- }
- // hip.amdgcn.bc - device routine
- /*
- HW_ID Register bit structure for RDNA2 & RDNA3
- WAVE_ID 4:0 Wave id within the SIMD.
- SIMD_ID 9:8 SIMD_ID within the WGP: [0] = row, [1] = column.
- WGP_ID 13:10 Physical WGP ID.
- SA_ID 16 Shader Array ID
- SE_ID 20:18 Shader Engine the wave is assigned to for gfx11
- SE_ID 19:18 Shader Engine the wave is assigned to for gfx10
- DP_RATE 31:29 Number of double-precision float units per SIMD
- HW_ID Register bit structure for GCN and CDNA
- WAVE_ID 3:0 Wave buffer slot number. 0-9.
- SIMD_ID 5:4 SIMD which the wave is assigned to within the CU.
- PIPE_ID 7:6 Pipeline from which the wave was dispatched.
- CU_ID 11:8 Compute Unit the wave is assigned to.
- SH_ID 12 Shader Array (within an SE) the wave is assigned to.
- SE_ID 15:13 Shader Engine the wave is assigned to for gfx908, gfx90a, gfx940-942
- 14:13 Shader Engine the wave is assigned to for Vega.
- TG_ID 19:16 Thread-group ID
- VM_ID 23:20 Virtual Memory ID
- QUEUE_ID 26:24 Queue from which this wave was dispatched.
- STATE_ID 29:27 State ID (graphics only, not compute).
- ME_ID 31:30 Micro-engine ID.
- XCC_ID Register bit structure for gfx940
- XCC_ID 3:0 XCC the wave is assigned to.
- */
- #if (defined (__GFX10__) || defined (__GFX11__))
- #define HW_ID 23
- #else
- #define HW_ID 4
- #endif
- #if (defined(__GFX10__) || defined(__GFX11__))
- #define HW_ID_WGP_ID_SIZE 4
- #define HW_ID_WGP_ID_OFFSET 10
- #else
- #define HW_ID_CU_ID_SIZE 4
- #define HW_ID_CU_ID_OFFSET 8
- #endif
- #if (defined(__gfx908__) || defined(__gfx90a__) || \
- defined(__GFX11__))
- #define HW_ID_SE_ID_SIZE 3
- #else //4 SEs/XCC for gfx940-942
- #define HW_ID_SE_ID_SIZE 2
- #endif
- #if (defined(__GFX10__) || defined(__GFX11__))
- #define HW_ID_SE_ID_OFFSET 18
- #define HW_ID_SA_ID_OFFSET 16
- #define HW_ID_SA_ID_SIZE 1
- #else
- #define HW_ID_SE_ID_OFFSET 13
- #endif
- #if (defined(__gfx940__) || defined(__gfx941__) || defined(__gfx942__))
- #define XCC_ID 20
- #define XCC_ID_XCC_ID_SIZE 4
- #define XCC_ID_XCC_ID_OFFSET 0
- #endif
- #if (!defined(__HIP_NO_IMAGE_SUPPORT) && \
- (defined(__gfx940__) || defined(__gfx941__) || defined(__gfx942__)))
- #define __HIP_NO_IMAGE_SUPPORT 1
- #endif
- /*
- Encoding of parameter bitmask
- HW_ID 5:0 HW_ID
- OFFSET 10:6 Range: 0..31
- SIZE 15:11 Range: 1..32
- */
- #define GETREG_IMMED(SZ,OFF,REG) (((SZ) << 11) | ((OFF) << 6) | (REG))
- /*
- __smid returns the wave's assigned Compute Unit and Shader Engine.
- The Compute Unit, CU_ID returned in bits 3:0, and Shader Engine, SE_ID in bits 5:4.
- Note: the results vary over time.
- SZ minus 1 since SIZE is 1-based.
- */
- __device__
- inline
- unsigned __smid(void)
- {
- unsigned se_id = __builtin_amdgcn_s_getreg(
- GETREG_IMMED(HW_ID_SE_ID_SIZE-1, HW_ID_SE_ID_OFFSET, HW_ID));
- #if (defined(__GFX10__) || defined(__GFX11__))
- unsigned wgp_id = __builtin_amdgcn_s_getreg(
- GETREG_IMMED(HW_ID_WGP_ID_SIZE - 1, HW_ID_WGP_ID_OFFSET, HW_ID));
- unsigned sa_id = __builtin_amdgcn_s_getreg(
- GETREG_IMMED(HW_ID_SA_ID_SIZE - 1, HW_ID_SA_ID_OFFSET, HW_ID));
- #else
- #if (defined(__gfx940__) || defined(__gfx941__) || defined(__gfx942__))
- unsigned xcc_id = __builtin_amdgcn_s_getreg(
- GETREG_IMMED(XCC_ID_XCC_ID_SIZE - 1, XCC_ID_XCC_ID_OFFSET, XCC_ID));
- #endif
- unsigned cu_id = __builtin_amdgcn_s_getreg(
- GETREG_IMMED(HW_ID_CU_ID_SIZE - 1, HW_ID_CU_ID_OFFSET, HW_ID));
- #endif
- #if (defined(__GFX10__) || defined(__GFX11__))
- unsigned temp = se_id;
- temp = (temp << HW_ID_SA_ID_SIZE) | sa_id;
- temp = (temp << HW_ID_WGP_ID_SIZE) | wgp_id;
- return temp;
- //TODO : CU Mode impl
- #elif (defined(__gfx940__) || defined(__gfx941__) || defined(__gfx942__))
- unsigned temp = xcc_id;
- temp = (temp << HW_ID_SE_ID_SIZE) | se_id;
- temp = (temp << HW_ID_CU_ID_SIZE) | cu_id;
- return temp;
- #else
- return (se_id << HW_ID_CU_ID_SIZE) + cu_id;
- #endif
- }
- /**
- * Map HIP_DYNAMIC_SHARED to "extern __shared__" for compatibility with old HIP applications
- * To be removed in a future release.
- */
- #define HIP_DYNAMIC_SHARED(type, var) extern __shared__ type var[];
- #define HIP_DYNAMIC_SHARED_ATTRIBUTE
- #endif //defined(__clang__) && defined(__HIP__)
- // loop unrolling
- static inline __device__ void* __hip_hc_memcpy(void* dst, const void* src, size_t size) {
- auto dstPtr = static_cast<unsigned char*>(dst);
- auto srcPtr = static_cast<const unsigned char*>(src);
- while (size >= 4u) {
- dstPtr[0] = srcPtr[0];
- dstPtr[1] = srcPtr[1];
- dstPtr[2] = srcPtr[2];
- dstPtr[3] = srcPtr[3];
- size -= 4u;
- srcPtr += 4u;
- dstPtr += 4u;
- }
- switch (size) {
- case 3:
- dstPtr[2] = srcPtr[2];
- case 2:
- dstPtr[1] = srcPtr[1];
- case 1:
- dstPtr[0] = srcPtr[0];
- }
- return dst;
- }
- static inline __device__ void* __hip_hc_memset(void* dst, unsigned char val, size_t size) {
- auto dstPtr = static_cast<unsigned char*>(dst);
- while (size >= 4u) {
- dstPtr[0] = val;
- dstPtr[1] = val;
- dstPtr[2] = val;
- dstPtr[3] = val;
- size -= 4u;
- dstPtr += 4u;
- }
- switch (size) {
- case 3:
- dstPtr[2] = val;
- case 2:
- dstPtr[1] = val;
- case 1:
- dstPtr[0] = val;
- }
- return dst;
- }
- #ifndef __OPENMP_AMDGCN__
- static inline __device__ void* memcpy(void* dst, const void* src, size_t size) {
- return __hip_hc_memcpy(dst, src, size);
- }
- static inline __device__ void* memset(void* ptr, int val, size_t size) {
- unsigned char val8 = static_cast<unsigned char>(val);
- return __hip_hc_memset(ptr, val8, size);
- }
- #endif // !__OPENMP_AMDGCN__
- #endif
- /*
- Copyright (c) 2022 - 2023 Advanced Micro Devices, Inc. All rights reserved.
- Permission is hereby granted, free of charge, to any person obtaining a copy
- of this software and associated documentation files (the "Software"), to deal
- in the Software without restriction, including without limitation the rights
- to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
- copies of the Software, and to permit persons to whom the Software is
- furnished to do so, subject to the following conditions:
- The above copyright notice and this permission notice shall be included in
- all copies or substantial portions of the Software.
- THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
- AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
- OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
- THE SOFTWARE.
- */
- #ifndef HIP_INCLUDE_HIP_AMD_DETAIL_WARP_FUNCTIONS_H
- #define HIP_INCLUDE_HIP_AMD_DETAIL_WARP_FUNCTIONS_H
- __device__ static inline unsigned __hip_ds_bpermute(int index, unsigned src) {
- union { int i; unsigned u; float f; } tmp; tmp.u = src;
- tmp.i = __builtin_amdgcn_ds_bpermute(index, tmp.i);
- return tmp.u;
- }
- __device__ static inline float __hip_ds_bpermutef(int index, float src) {
- union { int i; unsigned u; float f; } tmp; tmp.f = src;
- tmp.i = __builtin_amdgcn_ds_bpermute(index, tmp.i);
- return tmp.f;
- }
- __device__ static inline unsigned __hip_ds_permute(int index, unsigned src) {
- union { int i; unsigned u; float f; } tmp; tmp.u = src;
- tmp.i = __builtin_amdgcn_ds_permute(index, tmp.i);
- return tmp.u;
- }
- __device__ static inline float __hip_ds_permutef(int index, float src) {
- union { int i; unsigned u; float f; } tmp; tmp.f = src;
- tmp.i = __builtin_amdgcn_ds_permute(index, tmp.i);
- return tmp.f;
- }
- #define __hip_ds_swizzle(src, pattern) __hip_ds_swizzle_N<(pattern)>((src))
- #define __hip_ds_swizzlef(src, pattern) __hip_ds_swizzlef_N<(pattern)>((src))
- template <int pattern>
- __device__ static inline unsigned __hip_ds_swizzle_N(unsigned int src) {
- union { int i; unsigned u; float f; } tmp; tmp.u = src;
- tmp.i = __builtin_amdgcn_ds_swizzle(tmp.i, pattern);
- return tmp.u;
- }
- template <int pattern>
- __device__ static inline float __hip_ds_swizzlef_N(float src) {
- union { int i; unsigned u; float f; } tmp; tmp.f = src;
- tmp.i = __builtin_amdgcn_ds_swizzle(tmp.i, pattern);
- return tmp.f;
- }
- #define __hip_move_dpp(src, dpp_ctrl, row_mask, bank_mask, bound_ctrl) \
- __hip_move_dpp_N<(dpp_ctrl), (row_mask), (bank_mask), (bound_ctrl)>((src))
- template <int dpp_ctrl, int row_mask, int bank_mask, bool bound_ctrl>
- __device__ static inline int __hip_move_dpp_N(int src) {
- return __builtin_amdgcn_mov_dpp(src, dpp_ctrl, row_mask, bank_mask,
- bound_ctrl);
- }
- static constexpr int warpSize = __AMDGCN_WAVEFRONT_SIZE;
- __device__
- inline
- int __shfl(int var, int src_lane, int width = warpSize) {
- int self = __lane_id();
- int index = (src_lane & (width - 1)) + (self & ~(width-1));
- return __builtin_amdgcn_ds_bpermute(index<<2, var);
- }
- __device__
- inline
- unsigned int __shfl(unsigned int var, int src_lane, int width = warpSize) {
- union { int i; unsigned u; float f; } tmp; tmp.u = var;
- tmp.i = __shfl(tmp.i, src_lane, width);
- return tmp.u;
- }
- __device__
- inline
- float __shfl(float var, int src_lane, int width = warpSize) {
- union { int i; unsigned u; float f; } tmp; tmp.f = var;
- tmp.i = __shfl(tmp.i, src_lane, width);
- return tmp.f;
- }
- __device__
- inline
- double __shfl(double var, int src_lane, int width = warpSize) {
- static_assert(sizeof(double) == 2 * sizeof(int), "");
- static_assert(sizeof(double) == sizeof(uint64_t), "");
- int tmp[2]; __builtin_memcpy(tmp, &var, sizeof(tmp));
- tmp[0] = __shfl(tmp[0], src_lane, width);
- tmp[1] = __shfl(tmp[1], src_lane, width);
- uint64_t tmp0 = (static_cast<uint64_t>(tmp[1]) << 32ull) | static_cast<uint32_t>(tmp[0]);
- double tmp1; __builtin_memcpy(&tmp1, &tmp0, sizeof(tmp0));
- return tmp1;
- }
- __device__
- inline
- long __shfl(long var, int src_lane, int width = warpSize)
- {
- #ifndef _MSC_VER
- static_assert(sizeof(long) == 2 * sizeof(int), "");
- static_assert(sizeof(long) == sizeof(uint64_t), "");
- int tmp[2]; __builtin_memcpy(tmp, &var, sizeof(tmp));
- tmp[0] = __shfl(tmp[0], src_lane, width);
- tmp[1] = __shfl(tmp[1], src_lane, width);
- uint64_t tmp0 = (static_cast<uint64_t>(tmp[1]) << 32ull) | static_cast<uint32_t>(tmp[0]);
- long tmp1; __builtin_memcpy(&tmp1, &tmp0, sizeof(tmp0));
- return tmp1;
- #else
- static_assert(sizeof(long) == sizeof(int), "");
- return static_cast<long>(__shfl(static_cast<int>(var), src_lane, width));
- #endif
- }
- __device__
- inline
- unsigned long __shfl(unsigned long var, int src_lane, int width = warpSize) {
- #ifndef _MSC_VER
- static_assert(sizeof(unsigned long) == 2 * sizeof(unsigned int), "");
- static_assert(sizeof(unsigned long) == sizeof(uint64_t), "");
- unsigned int tmp[2]; __builtin_memcpy(tmp, &var, sizeof(tmp));
- tmp[0] = __shfl(tmp[0], src_lane, width);
- tmp[1] = __shfl(tmp[1], src_lane, width);
- uint64_t tmp0 = (static_cast<uint64_t>(tmp[1]) << 32ull) | static_cast<uint32_t>(tmp[0]);
- unsigned long tmp1; __builtin_memcpy(&tmp1, &tmp0, sizeof(tmp0));
- return tmp1;
- #else
- static_assert(sizeof(unsigned long) == sizeof(unsigned int), "");
- return static_cast<unsigned long>(__shfl(static_cast<unsigned int>(var), src_lane, width));
- #endif
- }
- __device__
- inline
- long long __shfl(long long var, int src_lane, int width = warpSize)
- {
- static_assert(sizeof(long long) == 2 * sizeof(int), "");
- static_assert(sizeof(long long) == sizeof(uint64_t), "");
- int tmp[2]; __builtin_memcpy(tmp, &var, sizeof(tmp));
- tmp[0] = __shfl(tmp[0], src_lane, width);
- tmp[1] = __shfl(tmp[1], src_lane, width);
- uint64_t tmp0 = (static_cast<uint64_t>(tmp[1]) << 32ull) | static_cast<uint32_t>(tmp[0]);
- long long tmp1; __builtin_memcpy(&tmp1, &tmp0, sizeof(tmp0));
- return tmp1;
- }
- __device__
- inline
- unsigned long long __shfl(unsigned long long var, int src_lane, int width = warpSize) {
- static_assert(sizeof(unsigned long long) == 2 * sizeof(unsigned int), "");
- static_assert(sizeof(unsigned long long) == sizeof(uint64_t), "");
- unsigned int tmp[2]; __builtin_memcpy(tmp, &var, sizeof(tmp));
- tmp[0] = __shfl(tmp[0], src_lane, width);
- tmp[1] = __shfl(tmp[1], src_lane, width);
- uint64_t tmp0 = (static_cast<uint64_t>(tmp[1]) << 32ull) | static_cast<uint32_t>(tmp[0]);
- unsigned long long tmp1; __builtin_memcpy(&tmp1, &tmp0, sizeof(tmp0));
- return tmp1;
- }
- __device__
- inline
- int __shfl_up(int var, unsigned int lane_delta, int width = warpSize) {
- int self = __lane_id();
- int index = self - lane_delta;
- index = (index < (self & ~(width-1)))?self:index;
- return __builtin_amdgcn_ds_bpermute(index<<2, var);
- }
- __device__
- inline
- unsigned int __shfl_up(unsigned int var, unsigned int lane_delta, int width = warpSize) {
- union { int i; unsigned u; float f; } tmp; tmp.u = var;
- tmp.i = __shfl_up(tmp.i, lane_delta, width);
- return tmp.u;
- }
- __device__
- inline
- float __shfl_up(float var, unsigned int lane_delta, int width = warpSize) {
- union { int i; unsigned u; float f; } tmp; tmp.f = var;
- tmp.i = __shfl_up(tmp.i, lane_delta, width);
- return tmp.f;
- }
- __device__
- inline
- double __shfl_up(double var, unsigned int lane_delta, int width = warpSize) {
- static_assert(sizeof(double) == 2 * sizeof(int), "");
- static_assert(sizeof(double) == sizeof(uint64_t), "");
- int tmp[2]; __builtin_memcpy(tmp, &var, sizeof(tmp));
- tmp[0] = __shfl_up(tmp[0], lane_delta, width);
- tmp[1] = __shfl_up(tmp[1], lane_delta, width);
- uint64_t tmp0 = (static_cast<uint64_t>(tmp[1]) << 32ull) | static_cast<uint32_t>(tmp[0]);
- double tmp1; __builtin_memcpy(&tmp1, &tmp0, sizeof(tmp0));
- return tmp1;
- }
- __device__
- inline
- long __shfl_up(long var, unsigned int lane_delta, int width = warpSize)
- {
- #ifndef _MSC_VER
- static_assert(sizeof(long) == 2 * sizeof(int), "");
- static_assert(sizeof(long) == sizeof(uint64_t), "");
- int tmp[2]; __builtin_memcpy(tmp, &var, sizeof(tmp));
- tmp[0] = __shfl_up(tmp[0], lane_delta, width);
- tmp[1] = __shfl_up(tmp[1], lane_delta, width);
- uint64_t tmp0 = (static_cast<uint64_t>(tmp[1]) << 32ull) | static_cast<uint32_t>(tmp[0]);
- long tmp1; __builtin_memcpy(&tmp1, &tmp0, sizeof(tmp0));
- return tmp1;
- #else
- static_assert(sizeof(long) == sizeof(int), "");
- return static_cast<long>(__shfl_up(static_cast<int>(var), lane_delta, width));
- #endif
- }
- __device__
- inline
- unsigned long __shfl_up(unsigned long var, unsigned int lane_delta, int width = warpSize)
- {
- #ifndef _MSC_VER
- static_assert(sizeof(unsigned long) == 2 * sizeof(unsigned int), "");
- static_assert(sizeof(unsigned long) == sizeof(uint64_t), "");
- unsigned int tmp[2]; __builtin_memcpy(tmp, &var, sizeof(tmp));
- tmp[0] = __shfl_up(tmp[0], lane_delta, width);
- tmp[1] = __shfl_up(tmp[1], lane_delta, width);
- uint64_t tmp0 = (static_cast<uint64_t>(tmp[1]) << 32ull) | static_cast<uint32_t>(tmp[0]);
- unsigned long tmp1; __builtin_memcpy(&tmp1, &tmp0, sizeof(tmp0));
- return tmp1;
- #else
- static_assert(sizeof(unsigned long) == sizeof(unsigned int), "");
- return static_cast<unsigned long>(__shfl_up(static_cast<unsigned int>(var), lane_delta, width));
- #endif
- }
- __device__
- inline
- long long __shfl_up(long long var, unsigned int lane_delta, int width = warpSize)
- {
- static_assert(sizeof(long long) == 2 * sizeof(int), "");
- static_assert(sizeof(long long) == sizeof(uint64_t), "");
- int tmp[2]; __builtin_memcpy(tmp, &var, sizeof(tmp));
- tmp[0] = __shfl_up(tmp[0], lane_delta, width);
- tmp[1] = __shfl_up(tmp[1], lane_delta, width);
- uint64_t tmp0 = (static_cast<uint64_t>(tmp[1]) << 32ull) | static_cast<uint32_t>(tmp[0]);
- long long tmp1; __builtin_memcpy(&tmp1, &tmp0, sizeof(tmp0));
- return tmp1;
- }
- __device__
- inline
- unsigned long long __shfl_up(unsigned long long var, unsigned int lane_delta, int width = warpSize)
- {
- static_assert(sizeof(unsigned long long) == 2 * sizeof(unsigned int), "");
- static_assert(sizeof(unsigned long long) == sizeof(uint64_t), "");
- unsigned int tmp[2]; __builtin_memcpy(tmp, &var, sizeof(tmp));
- tmp[0] = __shfl_up(tmp[0], lane_delta, width);
- tmp[1] = __shfl_up(tmp[1], lane_delta, width);
- uint64_t tmp0 = (static_cast<uint64_t>(tmp[1]) << 32ull) | static_cast<uint32_t>(tmp[0]);
- unsigned long long tmp1; __builtin_memcpy(&tmp1, &tmp0, sizeof(tmp0));
- return tmp1;
- }
- __device__
- inline
- int __shfl_down(int var, unsigned int lane_delta, int width = warpSize) {
- int self = __lane_id();
- int index = self + lane_delta;
- index = (int)((self&(width-1))+lane_delta) >= width?self:index;
- return __builtin_amdgcn_ds_bpermute(index<<2, var);
- }
- __device__
- inline
- unsigned int __shfl_down(unsigned int var, unsigned int lane_delta, int width = warpSize) {
- union { int i; unsigned u; float f; } tmp; tmp.u = var;
- tmp.i = __shfl_down(tmp.i, lane_delta, width);
- return tmp.u;
- }
- __device__
- inline
- float __shfl_down(float var, unsigned int lane_delta, int width = warpSize) {
- union { int i; unsigned u; float f; } tmp; tmp.f = var;
- tmp.i = __shfl_down(tmp.i, lane_delta, width);
- return tmp.f;
- }
- __device__
- inline
- double __shfl_down(double var, unsigned int lane_delta, int width = warpSize) {
- static_assert(sizeof(double) == 2 * sizeof(int), "");
- static_assert(sizeof(double) == sizeof(uint64_t), "");
- int tmp[2]; __builtin_memcpy(tmp, &var, sizeof(tmp));
- tmp[0] = __shfl_down(tmp[0], lane_delta, width);
- tmp[1] = __shfl_down(tmp[1], lane_delta, width);
- uint64_t tmp0 = (static_cast<uint64_t>(tmp[1]) << 32ull) | static_cast<uint32_t>(tmp[0]);
- double tmp1; __builtin_memcpy(&tmp1, &tmp0, sizeof(tmp0));
- return tmp1;
- }
- __device__
- inline
- long __shfl_down(long var, unsigned int lane_delta, int width = warpSize)
- {
- #ifndef _MSC_VER
- static_assert(sizeof(long) == 2 * sizeof(int), "");
- static_assert(sizeof(long) == sizeof(uint64_t), "");
- int tmp[2]; __builtin_memcpy(tmp, &var, sizeof(tmp));
- tmp[0] = __shfl_down(tmp[0], lane_delta, width);
- tmp[1] = __shfl_down(tmp[1], lane_delta, width);
- uint64_t tmp0 = (static_cast<uint64_t>(tmp[1]) << 32ull) | static_cast<uint32_t>(tmp[0]);
- long tmp1; __builtin_memcpy(&tmp1, &tmp0, sizeof(tmp0));
- return tmp1;
- #else
- static_assert(sizeof(long) == sizeof(int), "");
- return static_cast<long>(__shfl_down(static_cast<int>(var), lane_delta, width));
- #endif
- }
- __device__
- inline
- unsigned long __shfl_down(unsigned long var, unsigned int lane_delta, int width = warpSize)
- {
- #ifndef _MSC_VER
- static_assert(sizeof(unsigned long) == 2 * sizeof(unsigned int), "");
- static_assert(sizeof(unsigned long) == sizeof(uint64_t), "");
- unsigned int tmp[2]; __builtin_memcpy(tmp, &var, sizeof(tmp));
- tmp[0] = __shfl_down(tmp[0], lane_delta, width);
- tmp[1] = __shfl_down(tmp[1], lane_delta, width);
- uint64_t tmp0 = (static_cast<uint64_t>(tmp[1]) << 32ull) | static_cast<uint32_t>(tmp[0]);
- unsigned long tmp1; __builtin_memcpy(&tmp1, &tmp0, sizeof(tmp0));
- return tmp1;
- #else
- static_assert(sizeof(unsigned long) == sizeof(unsigned int), "");
- return static_cast<unsigned long>(__shfl_down(static_cast<unsigned int>(var), lane_delta, width));
- #endif
- }
- __device__
- inline
- long long __shfl_down(long long var, unsigned int lane_delta, int width = warpSize)
- {
- static_assert(sizeof(long long) == 2 * sizeof(int), "");
- static_assert(sizeof(long long) == sizeof(uint64_t), "");
- int tmp[2]; __builtin_memcpy(tmp, &var, sizeof(tmp));
- tmp[0] = __shfl_down(tmp[0], lane_delta, width);
- tmp[1] = __shfl_down(tmp[1], lane_delta, width);
- uint64_t tmp0 = (static_cast<uint64_t>(tmp[1]) << 32ull) | static_cast<uint32_t>(tmp[0]);
- long long tmp1; __builtin_memcpy(&tmp1, &tmp0, sizeof(tmp0));
- return tmp1;
- }
- __device__
- inline
- unsigned long long __shfl_down(unsigned long long var, unsigned int lane_delta, int width = warpSize)
- {
- static_assert(sizeof(unsigned long long) == 2 * sizeof(unsigned int), "");
- static_assert(sizeof(unsigned long long) == sizeof(uint64_t), "");
- unsigned int tmp[2]; __builtin_memcpy(tmp, &var, sizeof(tmp));
- tmp[0] = __shfl_down(tmp[0], lane_delta, width);
- tmp[1] = __shfl_down(tmp[1], lane_delta, width);
- uint64_t tmp0 = (static_cast<uint64_t>(tmp[1]) << 32ull) | static_cast<uint32_t>(tmp[0]);
- unsigned long long tmp1; __builtin_memcpy(&tmp1, &tmp0, sizeof(tmp0));
- return tmp1;
- }
- __device__
- inline
- int __shfl_xor(int var, int lane_mask, int width = warpSize) {
- int self = __lane_id();
- int index = self^lane_mask;
- index = index >= ((self+width)&~(width-1))?self:index;
- return __builtin_amdgcn_ds_bpermute(index<<2, var);
- }
- __device__
- inline
- unsigned int __shfl_xor(unsigned int var, int lane_mask, int width = warpSize) {
- union { int i; unsigned u; float f; } tmp; tmp.u = var;
- tmp.i = __shfl_xor(tmp.i, lane_mask, width);
- return tmp.u;
- }
- __device__
- inline
- float __shfl_xor(float var, int lane_mask, int width = warpSize) {
- union { int i; unsigned u; float f; } tmp; tmp.f = var;
- tmp.i = __shfl_xor(tmp.i, lane_mask, width);
- return tmp.f;
- }
- __device__
- inline
- double __shfl_xor(double var, int lane_mask, int width = warpSize) {
- static_assert(sizeof(double) == 2 * sizeof(int), "");
- static_assert(sizeof(double) == sizeof(uint64_t), "");
- int tmp[2]; __builtin_memcpy(tmp, &var, sizeof(tmp));
- tmp[0] = __shfl_xor(tmp[0], lane_mask, width);
- tmp[1] = __shfl_xor(tmp[1], lane_mask, width);
- uint64_t tmp0 = (static_cast<uint64_t>(tmp[1]) << 32ull) | static_cast<uint32_t>(tmp[0]);
- double tmp1; __builtin_memcpy(&tmp1, &tmp0, sizeof(tmp0));
- return tmp1;
- }
- __device__
- inline
- long __shfl_xor(long var, int lane_mask, int width = warpSize)
- {
- #ifndef _MSC_VER
- static_assert(sizeof(long) == 2 * sizeof(int), "");
- static_assert(sizeof(long) == sizeof(uint64_t), "");
- int tmp[2]; __builtin_memcpy(tmp, &var, sizeof(tmp));
- tmp[0] = __shfl_xor(tmp[0], lane_mask, width);
- tmp[1] = __shfl_xor(tmp[1], lane_mask, width);
- uint64_t tmp0 = (static_cast<uint64_t>(tmp[1]) << 32ull) | static_cast<uint32_t>(tmp[0]);
- long tmp1; __builtin_memcpy(&tmp1, &tmp0, sizeof(tmp0));
- return tmp1;
- #else
- static_assert(sizeof(long) == sizeof(int), "");
- return static_cast<long>(__shfl_xor(static_cast<int>(var), lane_mask, width));
- #endif
- }
- __device__
- inline
- unsigned long __shfl_xor(unsigned long var, int lane_mask, int width = warpSize)
- {
- #ifndef _MSC_VER
- static_assert(sizeof(unsigned long) == 2 * sizeof(unsigned int), "");
- static_assert(sizeof(unsigned long) == sizeof(uint64_t), "");
- unsigned int tmp[2]; __builtin_memcpy(tmp, &var, sizeof(tmp));
- tmp[0] = __shfl_xor(tmp[0], lane_mask, width);
- tmp[1] = __shfl_xor(tmp[1], lane_mask, width);
- uint64_t tmp0 = (static_cast<uint64_t>(tmp[1]) << 32ull) | static_cast<uint32_t>(tmp[0]);
- unsigned long tmp1; __builtin_memcpy(&tmp1, &tmp0, sizeof(tmp0));
- return tmp1;
- #else
- static_assert(sizeof(unsigned long) == sizeof(unsigned int), "");
- return static_cast<unsigned long>(__shfl_xor(static_cast<unsigned int>(var), lane_mask, width));
- #endif
- }
- __device__
- inline
- long long __shfl_xor(long long var, int lane_mask, int width = warpSize)
- {
- static_assert(sizeof(long long) == 2 * sizeof(int), "");
- static_assert(sizeof(long long) == sizeof(uint64_t), "");
- int tmp[2]; __builtin_memcpy(tmp, &var, sizeof(tmp));
- tmp[0] = __shfl_xor(tmp[0], lane_mask, width);
- tmp[1] = __shfl_xor(tmp[1], lane_mask, width);
- uint64_t tmp0 = (static_cast<uint64_t>(tmp[1]) << 32ull) | static_cast<uint32_t>(tmp[0]);
- long long tmp1; __builtin_memcpy(&tmp1, &tmp0, sizeof(tmp0));
- return tmp1;
- }
- __device__
- inline
- unsigned long long __shfl_xor(unsigned long long var, int lane_mask, int width = warpSize)
- {
- static_assert(sizeof(unsigned long long) == 2 * sizeof(unsigned int), "");
- static_assert(sizeof(unsigned long long) == sizeof(uint64_t), "");
- unsigned int tmp[2]; __builtin_memcpy(tmp, &var, sizeof(tmp));
- tmp[0] = __shfl_xor(tmp[0], lane_mask, width);
- tmp[1] = __shfl_xor(tmp[1], lane_mask, width);
- uint64_t tmp0 = (static_cast<uint64_t>(tmp[1]) << 32ull) | static_cast<uint32_t>(tmp[0]);
- unsigned long long tmp1; __builtin_memcpy(&tmp1, &tmp0, sizeof(tmp0));
- return tmp1;
- }
- #endif
- /*
- Copyright (c) 2015 - 2023 Advanced Micro Devices, Inc. All rights reserved.
- Permission is hereby granted, free of charge, to any person obtaining a copy
- of this software and associated documentation files (the "Software"), to deal
- in the Software without restriction, including without limitation the rights
- to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
- copies of the Software, and to permit persons to whom the Software is
- furnished to do so, subject to the following conditions:
- The above copyright notice and this permission notice shall be included in
- all copies or substantial portions of the Software.
- THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
- AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
- OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
- THE SOFTWARE.
- */
- /**
- * @file amd_detail/hip_cooperative_groups_helper.h
- *
- * @brief Device side implementation of cooperative group feature.
- *
- * Defines helper constructs and APIs which aid the types and device API
- * wrappers defined within `amd_detail/hip_cooperative_groups.h`.
- */
- #ifndef HIP_INCLUDE_HIP_AMD_DETAIL_HIP_COOPERATIVE_GROUPS_HELPER_H
- #define HIP_INCLUDE_HIP_AMD_DETAIL_HIP_COOPERATIVE_GROUPS_HELPER_H
- #if __cplusplus
- #if !defined(__HIPCC_RTC__)
- #include <hip/amd_detail/amd_hip_runtime.h> // threadId, blockId
- #include <hip/amd_detail/amd_device_functions.h>
- #endif
- #if !defined(__align__)
- #define __align__(x) __attribute__((aligned(x)))
- #endif
- #if !defined(__CG_QUALIFIER__)
- #define __CG_QUALIFIER__ __device__ __forceinline__
- #endif
- #if !defined(__CG_STATIC_QUALIFIER__)
- #define __CG_STATIC_QUALIFIER__ __device__ static __forceinline__
- #endif
- #if !defined(_CG_STATIC_CONST_DECL_)
- #define _CG_STATIC_CONST_DECL_ static constexpr
- #endif
- #if __AMDGCN_WAVEFRONT_SIZE == 32
- using lane_mask = unsigned int;
- #else
- using lane_mask = unsigned long long int;
- #endif
- namespace cooperative_groups {
- /* Global scope */
- template <unsigned int size>
- using is_power_of_2 = std::integral_constant<bool, (size & (size - 1)) == 0>;
- template <unsigned int size>
- using is_valid_wavefront = std::integral_constant<bool, (size <= __AMDGCN_WAVEFRONT_SIZE)>;
- template <unsigned int size>
- using is_valid_tile_size =
- std::integral_constant<bool, is_power_of_2<size>::value && is_valid_wavefront<size>::value>;
- template <typename T>
- using is_valid_type =
- std::integral_constant<bool, std::is_integral<T>::value || std::is_floating_point<T>::value>;
- namespace internal {
- /**
- * @brief Enums representing different cooperative group types
- * @note This enum is only applicable on Linux.
- *
- */
- typedef enum {
- cg_invalid,
- cg_multi_grid,
- cg_grid,
- cg_workgroup,
- cg_tiled_group,
- cg_coalesced_group
- } group_type;
- /**
- * @ingroup CooperativeG
- * @{
- * This section describes the cooperative groups functions of HIP runtime API.
- *
- * The cooperative groups provides flexible thread parallel programming algorithms, threads
- * cooperate and share data to perform collective computations.
- *
- * @note Cooperative groups feature is implemented on Linux, under developement
- * on Windows.
- *
- */
- /**
- *
- * @brief Functionalities related to multi-grid cooperative group type
- * @note The following cooperative groups functions are only applicable on Linux.
- *
- */
- namespace multi_grid {
- __CG_STATIC_QUALIFIER__ uint32_t num_grids() {
- return static_cast<uint32_t>(__ockl_multi_grid_num_grids()); }
- __CG_STATIC_QUALIFIER__ uint32_t grid_rank() {
- return static_cast<uint32_t>(__ockl_multi_grid_grid_rank()); }
- __CG_STATIC_QUALIFIER__ uint32_t size() { return static_cast<uint32_t>(__ockl_multi_grid_size()); }
- __CG_STATIC_QUALIFIER__ uint32_t thread_rank() {
- return static_cast<uint32_t>(__ockl_multi_grid_thread_rank()); }
- __CG_STATIC_QUALIFIER__ bool is_valid() { return static_cast<bool>(__ockl_multi_grid_is_valid()); }
- __CG_STATIC_QUALIFIER__ void sync() { __ockl_multi_grid_sync(); }
- } // namespace multi_grid
- /**
- * @brief Functionalities related to grid cooperative group type
- * @note The following cooperative groups functions are only applicable on Linux.
- */
- namespace grid {
- __CG_STATIC_QUALIFIER__ uint32_t size() {
- return static_cast<uint32_t>((blockDim.z * gridDim.z) * (blockDim.y * gridDim.y) *
- (blockDim.x * gridDim.x));
- }
- __CG_STATIC_QUALIFIER__ uint32_t thread_rank() {
- // Compute global id of the workgroup to which the current thread belongs to
- uint32_t blkIdx = static_cast<uint32_t>((blockIdx.z * gridDim.y * gridDim.x) +
- (blockIdx.y * gridDim.x) + (blockIdx.x));
- // Compute total number of threads being passed to reach current workgroup
- // within grid
- uint32_t num_threads_till_current_workgroup =
- static_cast<uint32_t>(blkIdx * (blockDim.x * blockDim.y * blockDim.z));
- // Compute thread local rank within current workgroup
- uint32_t local_thread_rank = static_cast<uint32_t>((threadIdx.z * blockDim.y * blockDim.x) +
- (threadIdx.y * blockDim.x) + (threadIdx.x));
- return (num_threads_till_current_workgroup + local_thread_rank);
- }
- __CG_STATIC_QUALIFIER__ bool is_valid() { return static_cast<bool>(__ockl_grid_is_valid()); }
- __CG_STATIC_QUALIFIER__ void sync() { __ockl_grid_sync(); }
- } // namespace grid
- /**
- * @brief Functionalities related to `workgroup` (thread_block in CUDA terminology)
- * cooperative group type
- * @note The following cooperative groups functions are only applicable on Linux.
- */
- namespace workgroup {
- __CG_STATIC_QUALIFIER__ dim3 group_index() {
- return (dim3(static_cast<uint32_t>(blockIdx.x), static_cast<uint32_t>(blockIdx.y),
- static_cast<uint32_t>(blockIdx.z)));
- }
- __CG_STATIC_QUALIFIER__ dim3 thread_index() {
- return (dim3(static_cast<uint32_t>(threadIdx.x), static_cast<uint32_t>(threadIdx.y),
- static_cast<uint32_t>(threadIdx.z)));
- }
- __CG_STATIC_QUALIFIER__ uint32_t size() {
- return (static_cast<uint32_t>(blockDim.x * blockDim.y * blockDim.z));
- }
- __CG_STATIC_QUALIFIER__ uint32_t thread_rank() {
- return (static_cast<uint32_t>((threadIdx.z * blockDim.y * blockDim.x) +
- (threadIdx.y * blockDim.x) + (threadIdx.x)));
- }
- __CG_STATIC_QUALIFIER__ bool is_valid() {
- return true;
- }
- __CG_STATIC_QUALIFIER__ void sync() { __syncthreads(); }
- __CG_STATIC_QUALIFIER__ dim3 block_dim() {
- return (dim3(static_cast<uint32_t>(blockDim.x), static_cast<uint32_t>(blockDim.y),
- static_cast<uint32_t>(blockDim.z)));
- }
- } // namespace workgroup
- namespace tiled_group {
- // enforce ordering for memory intructions
- __CG_STATIC_QUALIFIER__ void sync() { __builtin_amdgcn_fence(__ATOMIC_ACQ_REL, "agent"); }
- } // namespace tiled_group
- namespace coalesced_group {
- // enforce ordering for memory intructions
- __CG_STATIC_QUALIFIER__ void sync() { __builtin_amdgcn_fence(__ATOMIC_ACQ_REL, "agent"); }
- // Masked bit count
- //
- // For each thread, this function returns the number of active threads which
- // have i-th bit of x set and come before the current thread.
- __CG_STATIC_QUALIFIER__ unsigned int masked_bit_count(lane_mask x, unsigned int add = 0) {
- unsigned int counter=0;
- #if __AMDGCN_WAVEFRONT_SIZE == 32
- counter = __builtin_amdgcn_mbcnt_lo(x, add);
- #else
- counter = __builtin_amdgcn_mbcnt_lo(static_cast<lane_mask>(x), add);
- counter = __builtin_amdgcn_mbcnt_hi(static_cast<lane_mask>(x >> 32), counter);
- #endif
- return counter;
- }
- } // namespace coalesced_group
- } // namespace internal
- } // namespace cooperative_groups
- /**
- * @}
- */
- #endif // __cplusplus
- #endif // HIP_INCLUDE_HIP_AMD_DETAIL_HIP_COOPERATIVE_GROUPS_HELPER_H
- /*
- Copyright (c) 2015 - 2023 Advanced Micro Devices, Inc. All rights reserved.
- Permission is hereby granted, free of charge, to any person obtaining a copy
- of this software and associated documentation files (the "Software"), to deal
- in the Software without restriction, including without limitation the rights
- to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
- copies of the Software, and to permit persons to whom the Software is
- furnished to do so, subject to the following conditions:
- The above copyright notice and this permission notice shall be included in
- all copies or substantial portions of the Software.
- THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
- AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
- OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
- THE SOFTWARE.
- */
- /**
- * @file amd_detail/hip_cooperative_groups.h
- *
- * @brief Device side implementation of `Cooperative Group` feature.
- *
- * Defines new types and device API wrappers related to `Cooperative Group`
- * feature, which the programmer can directly use in his kernel(s) in order to
- * make use of this feature.
- */
- #ifndef HIP_INCLUDE_HIP_AMD_DETAIL_HIP_COOPERATIVE_GROUPS_H
- #define HIP_INCLUDE_HIP_AMD_DETAIL_HIP_COOPERATIVE_GROUPS_H
- #if __cplusplus
- #if !defined(__HIPCC_RTC__)
- #include <hip/amd_detail/hip_cooperative_groups_helper.h>
- #endif
- #define __hip_abort() \
- { abort(); }
- #if defined(NDEBUG)
- #define __hip_assert(COND)
- #else
- #define __hip_assert(COND) \
- { \
- if (!COND) { \
- __hip_abort(); \
- } \
- }
- #endif
- namespace cooperative_groups {
- /** @brief The base type of all cooperative group types
- *
- * \details Holds the key properties of a constructed cooperative group types
- * object, like the group type, its size, etc
- *
- * @note Cooperative groups feature is implemented on Linux, under developement
- * on Windows.
- */
- class thread_group {
- protected:
- uint32_t _type; // thread_group type
- uint32_t _size; // total number of threads in the tread_group
- uint64_t _mask; // Lanemask for coalesced and tiled partitioned group types,
- // LSB represents lane 0, and MSB represents lane 63
- // Construct a thread group, and set thread group type and other essential
- // thread group properties. This generic thread group is directly constructed
- // only when the group is supposed to contain only the calling the thread
- // (throurh the API - `this_thread()`), and in all other cases, this thread
- // group object is a sub-object of some other derived thread group object
- __CG_QUALIFIER__ thread_group(internal::group_type type, uint32_t size = static_cast<uint64_t>(0),
- uint64_t mask = static_cast<uint64_t>(0)) {
- _type = type;
- _size = size;
- _mask = mask;
- }
- struct _tiled_info {
- bool is_tiled;
- unsigned int size;
- unsigned int meta_group_rank;
- unsigned int meta_group_size;
- };
- struct _coalesced_info {
- lane_mask member_mask;
- unsigned int size;
- struct _tiled_info tiled_info;
- } coalesced_info;
- friend __CG_QUALIFIER__ thread_group tiled_partition(const thread_group& parent,
- unsigned int tile_size);
- friend class thread_block;
- public:
- // Total number of threads in the thread group, and this serves the purpose
- // for all derived cooperative group types since their `size` is directly
- // saved during the construction
- __CG_QUALIFIER__ uint32_t size() const { return _size; }
- __CG_QUALIFIER__ unsigned int cg_type() const { return _type; }
- // Rank of the calling thread within [0, size())
- __CG_QUALIFIER__ uint32_t thread_rank() const;
- // Is this cooperative group type valid?
- __CG_QUALIFIER__ bool is_valid() const;
- // synchronize the threads in the thread group
- __CG_QUALIFIER__ void sync() const;
- };
- /**
- *-------------------------------------------------------------------------------------------------
- *-------------------------------------------------------------------------------------------------
- * @defgroup CooperativeG Cooperative Groups
- * @ingroup API
- * @{
- * This section describes the cooperative groups functions of HIP runtime API.
- *
- * The cooperative groups provides flexible thread parallel programming algorithms, threads
- * cooperate and share data to perform collective computations.
- *
- * @note Cooperative groups feature is implemented on Linux, under developement
- * on Windows.
- *
- */
- /** \brief The multi-grid cooperative group type
- *
- * \details Represents an inter-device cooperative group type where the
- * participating threads within the group spans across multple
- * devices, running the (same) kernel on these devices
- * @note The multi-grid cooperative group type is implemented on Linux, under developement
- * on Windows.
- */
- class multi_grid_group : public thread_group {
- // Only these friend functions are allowed to construct an object of this class
- // and access its resources
- friend __CG_QUALIFIER__ multi_grid_group this_multi_grid();
- protected:
- // Construct mutli-grid thread group (through the API this_multi_grid())
- explicit __CG_QUALIFIER__ multi_grid_group(uint32_t size)
- : thread_group(internal::cg_multi_grid, size) {}
- public:
- // Number of invocations participating in this multi-grid group. In other
- // words, the number of GPUs
- __CG_QUALIFIER__ uint32_t num_grids() { return internal::multi_grid::num_grids(); }
- // Rank of this invocation. In other words, an ID number within the range
- // [0, num_grids()) of the GPU, this kernel is running on
- __CG_QUALIFIER__ uint32_t grid_rank() { return internal::multi_grid::grid_rank(); }
- __CG_QUALIFIER__ uint32_t thread_rank() const { return internal::multi_grid::thread_rank(); }
- __CG_QUALIFIER__ bool is_valid() const { return internal::multi_grid::is_valid(); }
- __CG_QUALIFIER__ void sync() const { internal::multi_grid::sync(); }
- };
- /** @brief User exposed API interface to construct multi-grid cooperative
- * group type object - `multi_grid_group`
- *
- * \details User is not allowed to directly construct an object of type
- * `multi_grid_group`. Instead, he should construct it through this
- * API function
- * @note This multi-grid cooperative API type is implemented on Linux, under developement
- * on Windows.
- */
- __CG_QUALIFIER__ multi_grid_group this_multi_grid() {
- return multi_grid_group(internal::multi_grid::size());
- }
- /** @brief The grid cooperative group type
- *
- * \details Represents an inter-workgroup cooperative group type where the
- * participating threads within the group spans across multiple
- * workgroups running the (same) kernel on the same device
- * @note This is implemented on Linux, under developement
- * on Windows.
- */
- class grid_group : public thread_group {
- // Only these friend functions are allowed to construct an object of this class
- // and access its resources
- friend __CG_QUALIFIER__ grid_group this_grid();
- protected:
- // Construct grid thread group (through the API this_grid())
- explicit __CG_QUALIFIER__ grid_group(uint32_t size) : thread_group(internal::cg_grid, size) {}
- public:
- __CG_QUALIFIER__ uint32_t thread_rank() const { return internal::grid::thread_rank(); }
- __CG_QUALIFIER__ bool is_valid() const { return internal::grid::is_valid(); }
- __CG_QUALIFIER__ void sync() const { internal::grid::sync(); }
- };
- /** @brief User exposed API interface to construct grid cooperative group type
- * object - `grid_group`
- *
- * \details User is not allowed to directly construct an object of type
- * `multi_grid_group`. Instead, he should construct it through this
- * API function
- * @note This function is implemented on Linux, under developement
- * on Windows.
- */
- __CG_QUALIFIER__ grid_group this_grid() { return grid_group(internal::grid::size()); }
- /** @brief The workgroup (thread-block in CUDA terminology) cooperative group
- * type
- *
- * \details Represents an intra-workgroup cooperative group type where the
- * participating threads within the group are exactly the same threads
- * which are participated in the currently executing `workgroup`
- * @note This is implemented on Linux, under developement
- * on Windows.
- */
- class thread_block : public thread_group {
- // Only these friend functions are allowed to construct an object of thi
- // class and access its resources
- friend __CG_QUALIFIER__ thread_block this_thread_block();
- friend __CG_QUALIFIER__ thread_group tiled_partition(const thread_group& parent,
- unsigned int tile_size);
- friend __CG_QUALIFIER__ thread_group tiled_partition(const thread_block& parent,
- unsigned int tile_size);
- protected:
- // Construct a workgroup thread group (through the API this_thread_block())
- explicit __CG_QUALIFIER__ thread_block(uint32_t size)
- : thread_group(internal::cg_workgroup, size) {}
- __CG_QUALIFIER__ thread_group new_tiled_group(unsigned int tile_size) const {
- const bool pow2 = ((tile_size & (tile_size - 1)) == 0);
- // Invalid tile size, assert
- if (!tile_size || (tile_size > __AMDGCN_WAVEFRONT_SIZE) || !pow2) {
- __hip_assert(false && "invalid tile size")
- }
- thread_group tiledGroup = thread_group(internal::cg_tiled_group, tile_size);
- tiledGroup.coalesced_info.tiled_info.size = tile_size;
- tiledGroup.coalesced_info.tiled_info.is_tiled = true;
- tiledGroup.coalesced_info.tiled_info.meta_group_rank = thread_rank() / tile_size;
- tiledGroup.coalesced_info.tiled_info.meta_group_size = (size() + tile_size - 1) / tile_size;
- return tiledGroup;
- }
- public:
- // 3-dimensional block index within the grid
- __CG_STATIC_QUALIFIER__ dim3 group_index() { return internal::workgroup::group_index(); }
- // 3-dimensional thread index within the block
- __CG_STATIC_QUALIFIER__ dim3 thread_index() { return internal::workgroup::thread_index(); }
- __CG_STATIC_QUALIFIER__ uint32_t thread_rank() { return internal::workgroup::thread_rank(); }
- __CG_STATIC_QUALIFIER__ uint32_t size() { return internal::workgroup::size(); }
- __CG_STATIC_QUALIFIER__ bool is_valid() { return internal::workgroup::is_valid(); }
- __CG_STATIC_QUALIFIER__ void sync() { internal::workgroup::sync(); }
- __CG_QUALIFIER__ dim3 group_dim() { return internal::workgroup::block_dim(); }
- };
- /** \brief User exposed API interface to construct workgroup cooperative
- * group type object - `thread_block`.
- *
- * \details User is not allowed to directly construct an object of type
- * `thread_block`. Instead, he should construct it through this API
- * function.
- * @note This function is implemented on Linux, under developement
- * on Windows.
- */
- __CG_QUALIFIER__ thread_block this_thread_block() {
- return thread_block(internal::workgroup::size());
- }
- /** \brief The tiled_group cooperative group type
- *
- * \details Represents one tiled thread group in a wavefront.
- * This group type also supports sub-wave level intrinsics.
- * @note This is implemented on Linux, under developement
- * on Windows.
- */
- class tiled_group : public thread_group {
- private:
- friend __CG_QUALIFIER__ thread_group tiled_partition(const thread_group& parent,
- unsigned int tile_size);
- friend __CG_QUALIFIER__ tiled_group tiled_partition(const tiled_group& parent,
- unsigned int tile_size);
- __CG_QUALIFIER__ tiled_group new_tiled_group(unsigned int tile_size) const {
- const bool pow2 = ((tile_size & (tile_size - 1)) == 0);
- if (!tile_size || (tile_size > __AMDGCN_WAVEFRONT_SIZE) || !pow2) {
- __hip_assert(false && "invalid tile size")
- }
- if (size() <= tile_size) {
- return *this;
- }
- tiled_group tiledGroup = tiled_group(tile_size);
- tiledGroup.coalesced_info.tiled_info.is_tiled = true;
- return tiledGroup;
- }
- protected:
- explicit __CG_QUALIFIER__ tiled_group(unsigned int tileSize)
- : thread_group(internal::cg_tiled_group, tileSize) {
- coalesced_info.tiled_info.size = tileSize;
- coalesced_info.tiled_info.is_tiled = true;
- }
- public:
- __CG_QUALIFIER__ unsigned int size() const { return (coalesced_info.tiled_info.size); }
- __CG_QUALIFIER__ unsigned int thread_rank() const {
- return (internal::workgroup::thread_rank() & (coalesced_info.tiled_info.size - 1));
- }
- __CG_QUALIFIER__ void sync() const {
- internal::tiled_group::sync();
- }
- };
- /** \brief The coalesced_group cooperative group type
- *
- * \details Represents a active thread group in a wavefront.
- * This group type also supports sub-wave level intrinsics.
- * @note This is implemented on Linux, under developement
- * on Windows.
- */
- class coalesced_group : public thread_group {
- private:
- friend __CG_QUALIFIER__ coalesced_group coalesced_threads();
- friend __CG_QUALIFIER__ thread_group tiled_partition(const thread_group& parent, unsigned int tile_size);
- friend __CG_QUALIFIER__ coalesced_group tiled_partition(const coalesced_group& parent, unsigned int tile_size);
- __CG_QUALIFIER__ coalesced_group new_tiled_group(unsigned int tile_size) const {
- const bool pow2 = ((tile_size & (tile_size - 1)) == 0);
- if (!tile_size || (tile_size > size()) || !pow2) {
- return coalesced_group(0);
- }
- // If a tiled group is passed to be partitioned further into a coalesced_group.
- // prepare a mask for further partitioning it so that it stays coalesced.
- if (coalesced_info.tiled_info.is_tiled) {
- unsigned int base_offset = (thread_rank() & (~(tile_size - 1)));
- unsigned int masklength = min(static_cast<unsigned int>(size()) - base_offset, tile_size);
- lane_mask member_mask = static_cast<lane_mask>(-1) >> (__AMDGCN_WAVEFRONT_SIZE - masklength);
- member_mask <<= (__lane_id() & ~(tile_size - 1));
- coalesced_group coalesced_tile = coalesced_group(member_mask);
- coalesced_tile.coalesced_info.tiled_info.is_tiled = true;
- coalesced_tile.coalesced_info.tiled_info.meta_group_rank = thread_rank() / tile_size;
- coalesced_tile.coalesced_info.tiled_info.meta_group_size = size() / tile_size;
- return coalesced_tile;
- }
- // Here the parent coalesced_group is not partitioned.
- else {
- lane_mask member_mask = 0;
- unsigned int tile_rank = 0;
- int lanes_to_skip = ((thread_rank()) / tile_size) * tile_size;
- for (unsigned int i = 0; i < __AMDGCN_WAVEFRONT_SIZE; i++) {
- lane_mask active = coalesced_info.member_mask & (1 << i);
- // Make sure the lane is active
- if (active) {
- if (lanes_to_skip <= 0 && tile_rank < tile_size) {
- // Prepare a member_mask that is appropriate for a tile
- member_mask |= active;
- tile_rank++;
- }
- lanes_to_skip--;
- }
- }
- coalesced_group coalesced_tile = coalesced_group(member_mask);
- coalesced_tile.coalesced_info.tiled_info.meta_group_rank = thread_rank() / tile_size;
- coalesced_tile.coalesced_info.tiled_info.meta_group_size =
- (size() + tile_size - 1) / tile_size;
- return coalesced_tile;
- }
- return coalesced_group(0);
- }
- protected:
- // Constructor
- explicit __CG_QUALIFIER__ coalesced_group(lane_mask member_mask)
- : thread_group(internal::cg_coalesced_group) {
- coalesced_info.member_mask = member_mask; // Which threads are active
- coalesced_info.size = __popcll(coalesced_info.member_mask); // How many threads are active
- coalesced_info.tiled_info.is_tiled = false; // Not a partitioned group
- coalesced_info.tiled_info.meta_group_rank = 0;
- coalesced_info.tiled_info.meta_group_size = 1;
- }
- public:
- __CG_QUALIFIER__ unsigned int size() const {
- return coalesced_info.size;
- }
- __CG_QUALIFIER__ unsigned int thread_rank() const {
- return internal::coalesced_group::masked_bit_count(coalesced_info.member_mask);
- }
- __CG_QUALIFIER__ void sync() const {
- internal::coalesced_group::sync();
- }
- __CG_QUALIFIER__ unsigned int meta_group_rank() const {
- return coalesced_info.tiled_info.meta_group_rank;
- }
- __CG_QUALIFIER__ unsigned int meta_group_size() const {
- return coalesced_info.tiled_info.meta_group_size;
- }
- template <class T>
- __CG_QUALIFIER__ T shfl(T var, int srcRank) const {
- static_assert(is_valid_type<T>::value, "Neither an integer or float type.");
- srcRank = srcRank % static_cast<int>(size());
- int lane = (size() == __AMDGCN_WAVEFRONT_SIZE) ? srcRank
- : (__AMDGCN_WAVEFRONT_SIZE == 64) ? __fns64(coalesced_info.member_mask, 0, (srcRank + 1))
- : __fns32(coalesced_info.member_mask, 0, (srcRank + 1));
- return __shfl(var, lane, __AMDGCN_WAVEFRONT_SIZE);
- }
- template <class T>
- __CG_QUALIFIER__ T shfl_down(T var, unsigned int lane_delta) const {
- static_assert(is_valid_type<T>::value, "Neither an integer or float type.");
- // Note: The cuda implementation appears to use the remainder of lane_delta
- // and WARP_SIZE as the shift value rather than lane_delta itself.
- // This is not described in the documentation and is not done here.
- if (size() == __AMDGCN_WAVEFRONT_SIZE) {
- return __shfl_down(var, lane_delta, __AMDGCN_WAVEFRONT_SIZE);
- }
- int lane;
- if (__AMDGCN_WAVEFRONT_SIZE == 64) {
- lane = __fns64(coalesced_info.member_mask, __lane_id(), lane_delta + 1);
- }
- else {
- lane = __fns32(coalesced_info.member_mask, __lane_id(), lane_delta + 1);
- }
- if (lane == -1) {
- lane = __lane_id();
- }
- return __shfl(var, lane, __AMDGCN_WAVEFRONT_SIZE);
- }
- template <class T>
- __CG_QUALIFIER__ T shfl_up(T var, unsigned int lane_delta) const {
- static_assert(is_valid_type<T>::value, "Neither an integer or float type.");
- // Note: The cuda implementation appears to use the remainder of lane_delta
- // and WARP_SIZE as the shift value rather than lane_delta itself.
- // This is not described in the documentation and is not done here.
- if (size() == __AMDGCN_WAVEFRONT_SIZE) {
- return __shfl_up(var, lane_delta, __AMDGCN_WAVEFRONT_SIZE);
- }
- int lane;
- if (__AMDGCN_WAVEFRONT_SIZE == 64) {
- lane = __fns64(coalesced_info.member_mask, __lane_id(), -(lane_delta + 1));
- }
- else if (__AMDGCN_WAVEFRONT_SIZE == 32) {
- lane = __fns32(coalesced_info.member_mask, __lane_id(), -(lane_delta + 1));
- }
- if (lane == -1) {
- lane = __lane_id();
- }
- return __shfl(var, lane, __AMDGCN_WAVEFRONT_SIZE);
- }
- };
- /** \brief User exposed API to create coalesced groups.
- *
- * \details A collective operation that groups all active lanes into a new thread group.
- * @note This function is implemented on Linux, under developement
- * on Windows.
- */
- __CG_QUALIFIER__ coalesced_group coalesced_threads() {
- return cooperative_groups::coalesced_group(__builtin_amdgcn_read_exec());
- }
- /**
- * Implemenation of all publicly exposed base class APIs
- * @note This function is implemented on Linux, under developement
- * on Windows.
- */
- __CG_QUALIFIER__ uint32_t thread_group::thread_rank() const {
- switch (this->_type) {
- case internal::cg_multi_grid: {
- return (static_cast<const multi_grid_group*>(this)->thread_rank());
- }
- case internal::cg_grid: {
- return (static_cast<const grid_group*>(this)->thread_rank());
- }
- case internal::cg_workgroup: {
- return (static_cast<const thread_block*>(this)->thread_rank());
- }
- case internal::cg_tiled_group: {
- return (static_cast<const tiled_group*>(this)->thread_rank());
- }
- case internal::cg_coalesced_group: {
- return (static_cast<const coalesced_group*>(this)->thread_rank());
- }
- default: {
- __hip_assert(false && "invalid cooperative group type")
- return -1;
- }
- }
- }
- /**
- * Implemenation of all publicly exposed thread group API
- * @note This function is implemented on Linux, under developement
- * on Windows.
- */
- __CG_QUALIFIER__ bool thread_group::is_valid() const {
- switch (this->_type) {
- case internal::cg_multi_grid: {
- return (static_cast<const multi_grid_group*>(this)->is_valid());
- }
- case internal::cg_grid: {
- return (static_cast<const grid_group*>(this)->is_valid());
- }
- case internal::cg_workgroup: {
- return (static_cast<const thread_block*>(this)->is_valid());
- }
- case internal::cg_tiled_group: {
- return (static_cast<const tiled_group*>(this)->is_valid());
- }
- case internal::cg_coalesced_group: {
- return (static_cast<const coalesced_group*>(this)->is_valid());
- }
- default: {
- __hip_assert(false && "invalid cooperative group type")
- return false;
- }
- }
- }
- /**
- * Implemenation of all publicly exposed thread group sync API
- * @note This function is implemented on Linux, under developement
- * on Windows.
- */
- __CG_QUALIFIER__ void thread_group::sync() const {
- switch (this->_type) {
- case internal::cg_multi_grid: {
- static_cast<const multi_grid_group*>(this)->sync();
- break;
- }
- case internal::cg_grid: {
- static_cast<const grid_group*>(this)->sync();
- break;
- }
- case internal::cg_workgroup: {
- static_cast<const thread_block*>(this)->sync();
- break;
- }
- case internal::cg_tiled_group: {
- static_cast<const tiled_group*>(this)->sync();
- break;
- }
- case internal::cg_coalesced_group: {
- static_cast<const coalesced_group*>(this)->sync();
- break;
- }
- default: {
- __hip_assert(false && "invalid cooperative group type")
- }
- }
- }
- /**
- * Implemenation of publicly exposed `wrapper` API on top of basic cooperative
- * group type APIs
- * @note This function is implemented on Linux, under developement
- * on Windows.
- */
- template <class CGTy> __CG_QUALIFIER__ uint32_t group_size(CGTy const& g) { return g.size(); }
- /**
- * Implemenation of publicly exposed `wrapper` API on top of basic cooperative
- * group type APIs
- * @note This function is implemented on Linux, under developement
- * on Windows.
- */
- template <class CGTy> __CG_QUALIFIER__ uint32_t thread_rank(CGTy const& g) {
- return g.thread_rank();
- }
- /**
- * Implemenation of publicly exposed `wrapper` API on top of basic cooperative
- * group type APIs
- * @note This function is implemented on Linux, under developement
- * on Windows.
- */
- template <class CGTy> __CG_QUALIFIER__ bool is_valid(CGTy const& g) { return g.is_valid(); }
- /**
- * Implemenation of publicly exposed `wrapper` API on top of basic cooperative
- * group type APIs
- * @note This function is implemented on Linux, under developement
- * on Windows.
- */
- template <class CGTy> __CG_QUALIFIER__ void sync(CGTy const& g) { g.sync(); }
- /**
- * template class tile_base
- * @note This class is implemented on Linux, under developement
- * on Windows.
- */
- template <unsigned int tileSize> class tile_base {
- protected:
- _CG_STATIC_CONST_DECL_ unsigned int numThreads = tileSize;
- public:
- // Rank of the thread within this tile
- _CG_STATIC_CONST_DECL_ unsigned int thread_rank() {
- return (internal::workgroup::thread_rank() & (numThreads - 1));
- }
- // Number of threads within this tile
- __CG_STATIC_QUALIFIER__ unsigned int size() { return numThreads; }
- };
- /**
- * template class thread_block_tile_base
- * @note This class is implemented on Linux, under developement
- * on Windows.
- */
- template <unsigned int size> class thread_block_tile_base : public tile_base<size> {
- static_assert(is_valid_tile_size<size>::value,
- "Tile size is either not a power of 2 or greater than the wavefront size");
- using tile_base<size>::numThreads;
- public:
- __CG_STATIC_QUALIFIER__ void sync() {
- internal::tiled_group::sync();
- }
- template <class T> __CG_QUALIFIER__ T shfl(T var, int srcRank) const {
- static_assert(is_valid_type<T>::value, "Neither an integer or float type.");
- return (__shfl(var, srcRank, numThreads));
- }
- template <class T> __CG_QUALIFIER__ T shfl_down(T var, unsigned int lane_delta) const {
- static_assert(is_valid_type<T>::value, "Neither an integer or float type.");
- return (__shfl_down(var, lane_delta, numThreads));
- }
- template <class T> __CG_QUALIFIER__ T shfl_up(T var, unsigned int lane_delta) const {
- static_assert(is_valid_type<T>::value, "Neither an integer or float type.");
- return (__shfl_up(var, lane_delta, numThreads));
- }
- template <class T> __CG_QUALIFIER__ T shfl_xor(T var, unsigned int laneMask) const {
- static_assert(is_valid_type<T>::value, "Neither an integer or float type.");
- return (__shfl_xor(var, laneMask, numThreads));
- }
- };
- /** \brief User exposed API that captures the state of the parent group pre-partition
- */
- template <unsigned int tileSize, typename ParentCGTy>
- class parent_group_info {
- public:
- // Returns the linear rank of the group within the set of tiles partitioned
- // from a parent group (bounded by meta_group_size)
- __CG_STATIC_QUALIFIER__ unsigned int meta_group_rank() {
- return ParentCGTy::thread_rank() / tileSize;
- }
- // Returns the number of groups created when the parent group was partitioned.
- __CG_STATIC_QUALIFIER__ unsigned int meta_group_size() {
- return (ParentCGTy::size() + tileSize - 1) / tileSize;
- }
- };
- /** \brief Group type - thread_block_tile
- *
- * \details Represents one tile of thread group.
- * @note This type is implemented on Linux, under developement
- * on Windows.
- */
- template <unsigned int tileSize, class ParentCGTy>
- class thread_block_tile_type : public thread_block_tile_base<tileSize>,
- public tiled_group,
- public parent_group_info<tileSize, ParentCGTy> {
- _CG_STATIC_CONST_DECL_ unsigned int numThreads = tileSize;
- protected:
- __CG_QUALIFIER__ thread_block_tile_type() : tiled_group(numThreads) {
- coalesced_info.tiled_info.size = numThreads;
- coalesced_info.tiled_info.is_tiled = true;
- }
- };
- // Partial template specialization
- template <unsigned int tileSize>
- class thread_block_tile_type<tileSize, void> : public thread_block_tile_base<tileSize>,
- public tiled_group
- {
- _CG_STATIC_CONST_DECL_ unsigned int numThreads = tileSize;
- typedef thread_block_tile_base<numThreads> tbtBase;
- protected:
- __CG_QUALIFIER__ thread_block_tile_type(unsigned int meta_group_rank, unsigned int meta_group_size)
- : tiled_group(numThreads) {
- coalesced_info.tiled_info.size = numThreads;
- coalesced_info.tiled_info.is_tiled = true;
- coalesced_info.tiled_info.meta_group_rank = meta_group_rank;
- coalesced_info.tiled_info.meta_group_size = meta_group_size;
- }
- public:
- using tbtBase::size;
- using tbtBase::sync;
- using tbtBase::thread_rank;
- __CG_QUALIFIER__ unsigned int meta_group_rank() const {
- return coalesced_info.tiled_info.meta_group_rank;
- }
- __CG_QUALIFIER__ unsigned int meta_group_size() const {
- return coalesced_info.tiled_info.meta_group_size;
- }
- // end of operative group
- /**
- * @}
- */
- };
- /** \brief User exposed API to partition groups.
- *
- * \details A collective operation that partitions the parent group into a one-dimensional,
- * row-major, tiling of subgroups.
- */
- __CG_QUALIFIER__ thread_group tiled_partition(const thread_group& parent, unsigned int tile_size) {
- if (parent.cg_type() == internal::cg_tiled_group) {
- const tiled_group* cg = static_cast<const tiled_group*>(&parent);
- return cg->new_tiled_group(tile_size);
- }
- else if(parent.cg_type() == internal::cg_coalesced_group) {
- const coalesced_group* cg = static_cast<const coalesced_group*>(&parent);
- return cg->new_tiled_group(tile_size);
- }
- else {
- const thread_block* tb = static_cast<const thread_block*>(&parent);
- return tb->new_tiled_group(tile_size);
- }
- }
- // Thread block type overload
- __CG_QUALIFIER__ thread_group tiled_partition(const thread_block& parent, unsigned int tile_size) {
- return (parent.new_tiled_group(tile_size));
- }
- __CG_QUALIFIER__ tiled_group tiled_partition(const tiled_group& parent, unsigned int tile_size) {
- return (parent.new_tiled_group(tile_size));
- }
- // If a coalesced group is passed to be partitioned, it should remain coalesced
- __CG_QUALIFIER__ coalesced_group tiled_partition(const coalesced_group& parent, unsigned int tile_size) {
- return (parent.new_tiled_group(tile_size));
- }
- template <unsigned int size, class ParentCGTy> class thread_block_tile;
- namespace impl {
- template <unsigned int size, class ParentCGTy> class thread_block_tile_internal;
- template <unsigned int size, class ParentCGTy>
- class thread_block_tile_internal : public thread_block_tile_type<size, ParentCGTy> {
- protected:
- template <unsigned int tbtSize, class tbtParentT>
- __CG_QUALIFIER__ thread_block_tile_internal(
- const thread_block_tile_internal<tbtSize, tbtParentT>& g)
- : thread_block_tile_type<size, ParentCGTy>(g.meta_group_rank(), g.meta_group_size()) {}
- __CG_QUALIFIER__ thread_block_tile_internal(const thread_block& g)
- : thread_block_tile_type<size, ParentCGTy>() {}
- };
- } // namespace impl
- template <unsigned int size, class ParentCGTy>
- class thread_block_tile : public impl::thread_block_tile_internal<size, ParentCGTy> {
- protected:
- __CG_QUALIFIER__ thread_block_tile(const ParentCGTy& g)
- : impl::thread_block_tile_internal<size, ParentCGTy>(g) {}
- public:
- __CG_QUALIFIER__ operator thread_block_tile<size, void>() const {
- return thread_block_tile<size, void>(*this);
- }
- };
- template <unsigned int size>
- class thread_block_tile<size, void> : public impl::thread_block_tile_internal<size, void> {
- template <unsigned int, class ParentCGTy> friend class thread_block_tile;
- protected:
- public:
- template <class ParentCGTy>
- __CG_QUALIFIER__ thread_block_tile(const thread_block_tile<size, ParentCGTy>& g)
- : impl::thread_block_tile_internal<size, void>(g) {}
- };
- template <unsigned int size, class ParentCGTy = void> class thread_block_tile;
- namespace impl {
- template <unsigned int size, class ParentCGTy> struct tiled_partition_internal;
- template <unsigned int size>
- struct tiled_partition_internal<size, thread_block> : public thread_block_tile<size, thread_block> {
- __CG_QUALIFIER__ tiled_partition_internal(const thread_block& g)
- : thread_block_tile<size, thread_block>(g) {}
- };
- } // namespace impl
- /** \brief User exposed API to partition groups.
- *
- * \details This constructs a templated class derieved from thread_group.
- * The template defines tile size of the new thread group at compile time.
- */
- template <unsigned int size, class ParentCGTy>
- __CG_QUALIFIER__ thread_block_tile<size, ParentCGTy> tiled_partition(const ParentCGTy& g) {
- static_assert(is_valid_tile_size<size>::value,
- "Tiled partition with size > wavefront size. Currently not supported ");
- return impl::tiled_partition_internal<size, ParentCGTy>(g);
- }
- } // namespace cooperative_groups
- #endif // __cplusplus
- #endif // HIP_INCLUDE_HIP_AMD_DETAIL_HIP_COOPERATIVE_GROUPS_H
- /*
- Copyright (c) 2021 - 2023 Advanced Micro Devices, Inc. All rights reserved.
- Permission is hereby granted, free of charge, to any person obtaining a copy
- of this software and associated documentation files (the "Software"), to deal
- in the Software without restriction, including without limitation the rights
- to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
- copies of the Software, and to permit persons to whom the Software is
- furnished to do so, subject to the following conditions:
- The above copyright notice and this permission notice shall be included in
- all copies or substantial portions of the Software.
- THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
- AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
- OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
- THE SOFTWARE.
- */
- #pragma once
- #ifdef __cplusplus
- /**
- * @brief Unsafe floating point rmw atomic add.
- *
- * Performs a relaxed read-modify-write floating point atomic add with
- * device memory scope. Original value at \p addr is returned and
- * the value of \p addr is updated to have the original value plus \p value
- *
- * @note This operation currently only performs different operations for
- * the gfx90a target. Other devices continue to use safe atomics.
- *
- * It can be used to generate code that uses fast hardware floating point atomic
- * operations which may handle rounding and subnormal values differently than
- * non-atomic floating point operations.
- *
- * The operation is not always safe and can have undefined behavior unless
- * following condition are met:
- *
- * - \p addr is at least 4 bytes aligned
- * - If \p addr is a global segment address, it is in a coarse grain allocation.
- * Passing in global segment addresses in fine grain allocations will result in
- * undefined behavior and is not supported.
- *
- * @param [in,out] addr Pointer to value to be increment by \p value.
- * @param [in] value Value by \p addr is to be incremented.
- * @return Original value contained in \p addr.
- */
- __device__ inline float unsafeAtomicAdd(float* addr, float value) {
- #if defined(__gfx90a__) && \
- __has_builtin(__builtin_amdgcn_is_shared) && \
- __has_builtin(__builtin_amdgcn_is_private) && \
- __has_builtin(__builtin_amdgcn_ds_atomic_fadd_f32) && \
- __has_builtin(__builtin_amdgcn_global_atomic_fadd_f32)
- if (__builtin_amdgcn_is_shared(
- (const __attribute__((address_space(0))) void*)addr))
- return __builtin_amdgcn_ds_atomic_fadd_f32(addr, value);
- else if (__builtin_amdgcn_is_private(
- (const __attribute__((address_space(0))) void*)addr)) {
- float temp = *addr;
- *addr = temp + value;
- return temp;
- }
- else
- return __builtin_amdgcn_global_atomic_fadd_f32(addr, value);
- #elif __has_builtin(__hip_atomic_fetch_add)
- return __hip_atomic_fetch_add(addr, value, __ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_AGENT);
- #else
- return __atomic_fetch_add(addr, value, __ATOMIC_RELAXED);
- #endif
- }
- /**
- * @brief Unsafe floating point rmw atomic max.
- *
- * Performs a relaxed read-modify-write floating point atomic max with
- * device memory scope. The original value at \p addr is returned and
- * the value at \p addr is replaced by \p val if greater.
- *
- * @note This operation is currently identical to that performed by
- * atomicMax and is included for completeness.
- *
- * @param [in,out] addr Pointer to value to be updated
- * @param [in] val Value used to update the value at \p addr.
- * @return Original value contained in \p addr.
- */
- __device__ inline float unsafeAtomicMax(float* addr, float val) {
- #if __has_builtin(__hip_atomic_load) && \
- __has_builtin(__hip_atomic_compare_exchange_strong)
- float value = __hip_atomic_load(addr, __ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_AGENT);
- bool done = false;
- while (!done && value < val) {
- done = __hip_atomic_compare_exchange_strong(addr, &value, val,
- __ATOMIC_RELAXED, __ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_AGENT);
- }
- return value;
- #else
- unsigned int *uaddr = (unsigned int *)addr;
- unsigned int value = __atomic_load_n(uaddr, __ATOMIC_RELAXED);
- bool done = false;
- while (!done && __uint_as_float(value) < val) {
- done = __atomic_compare_exchange_n(uaddr, &value, __float_as_uint(val), false,
- __ATOMIC_RELAXED, __ATOMIC_RELAXED);
- }
- return __uint_as_float(value);
- #endif
- }
- /**
- * @brief Unsafe floating point rmw atomic min.
- *
- * Performs a relaxed read-modify-write floating point atomic min with
- * device memory scope. The original value at \p addr is returned and
- * the value at \p addr is replaced by \p val if lesser.
- *
- * @note This operation is currently identical to that performed by
- * atomicMin and is included for completeness.
- *
- * @param [in,out] addr Pointer to value to be updated
- * @param [in] val Value used to update the value at \p addr.
- * @return Original value contained in \p addr.
- */
- __device__ inline float unsafeAtomicMin(float* addr, float val) {
- #if __has_builtin(__hip_atomic_load) && \
- __has_builtin(__hip_atomic_compare_exchange_strong)
- float value = __hip_atomic_load(addr, __ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_AGENT);
- bool done = false;
- while (!done && value > val) {
- done = __hip_atomic_compare_exchange_strong(addr, &value, val,
- __ATOMIC_RELAXED, __ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_AGENT);
- }
- return value;
- #else
- unsigned int *uaddr = (unsigned int *)addr;
- unsigned int value = __atomic_load_n(uaddr, __ATOMIC_RELAXED);
- bool done = false;
- while (!done && __uint_as_float(value) > val) {
- done = __atomic_compare_exchange_n(uaddr, &value, __float_as_uint(val), false,
- __ATOMIC_RELAXED, __ATOMIC_RELAXED);
- }
- return __uint_as_float(value);
- #endif
- }
- /**
- * @brief Unsafe double precision rmw atomic add.
- *
- * Performs a relaxed read-modify-write double precision atomic add with
- * device memory scope. Original value at \p addr is returned and
- * the value of \p addr is updated to have the original value plus \p value
- *
- * @note This operation currently only performs different operations for
- * the gfx90a target. Other devices continue to use safe atomics.
- *
- * It can be used to generate code that uses fast hardware floating point atomic
- * operations which may handle rounding and subnormal values differently than
- * non-atomic floating point operations.
- *
- * The operation is not always safe and can have undefined behavior unless
- * following condition are met:
- *
- * - \p addr is at least 8 byte aligned
- * - If \p addr is a global segment address, it is in a coarse grain allocation.
- * Passing in global segment addresses in fine grain allocations will result in
- * undefined behavior and are not supported.
- *
- * @param [in,out] addr Pointer to value to be updated.
- * @param [in] value Value by \p addr is to be incremented.
- * @return Original value contained in \p addr.
- */
- __device__ inline double unsafeAtomicAdd(double* addr, double value) {
- #if defined(__gfx90a__) && __has_builtin(__builtin_amdgcn_flat_atomic_fadd_f64)
- return __builtin_amdgcn_flat_atomic_fadd_f64(addr, value);
- #elif defined (__hip_atomic_fetch_add)
- return __hip_atomic_fetch_add(addr, value, __ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_AGENT);
- #else
- return __atomic_fetch_add(addr, value, __ATOMIC_RELAXED);
- #endif
- }
- /**
- * @brief Unsafe double precision rmw atomic max.
- *
- * Performs a relaxed read-modify-write double precision atomic max with
- * device memory scope. Original value at \p addr is returned and
- * the value of \p addr is updated with \p val if greater.
- *
- * @note This operation currently only performs different operations for
- * the gfx90a target. Other devices continue to use safe atomics.
- *
- * It can be used to generate code that uses fast hardware floating point atomic
- * operations which may handle rounding and subnormal values differently than
- * non-atomic floating point operations.
- *
- * The operation is not always safe and can have undefined behavior unless
- * following condition are met:
- *
- * - \p addr is at least 8 byte aligned
- * - If \p addr is a global segment address, it is in a coarse grain allocation.
- * Passing in global segment addresses in fine grain allocations will result in
- * undefined behavior and are not supported.
- *
- * @param [in,out] addr Pointer to value to be updated.
- * @param [in] val Value used to updated the contents at \p addr
- * @return Original value contained at \p addr.
- */
- __device__ inline double unsafeAtomicMax(double* addr, double val) {
- #if (defined(__gfx90a__) || defined(__gfx940__) || defined(__gfx941__) || defined(__gfx942__)) && \
- __has_builtin(__builtin_amdgcn_flat_atomic_fmax_f64)
- return __builtin_amdgcn_flat_atomic_fmax_f64(addr, val);
- #else
- #if __has_builtin(__hip_atomic_load) && \
- __has_builtin(__hip_atomic_compare_exchange_strong)
- double value = __hip_atomic_load(addr, __ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_AGENT);
- bool done = false;
- while (!done && value < val) {
- done = __hip_atomic_compare_exchange_strong(addr, &value, val,
- __ATOMIC_RELAXED, __ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_AGENT);
- }
- return value;
- #else
- unsigned long long *uaddr = (unsigned long long *)addr;
- unsigned long long value = __atomic_load_n(uaddr, __ATOMIC_RELAXED);
- bool done = false;
- while (!done && __longlong_as_double(value) < val) {
- done = __atomic_compare_exchange_n(uaddr, &value, __double_as_longlong(val), false,
- __ATOMIC_RELAXED, __ATOMIC_RELAXED);
- }
- return __longlong_as_double(value);
- #endif
- #endif
- }
- /**
- * @brief Unsafe double precision rmw atomic min.
- *
- * Performs a relaxed read-modify-write double precision atomic min with
- * device memory scope. Original value at \p addr is returned and
- * the value of \p addr is updated with \p val if lesser.
- *
- * @note This operation currently only performs different operations for
- * the gfx90a target. Other devices continue to use safe atomics.
- *
- * It can be used to generate code that uses fast hardware floating point atomic
- * operations which may handle rounding and subnormal values differently than
- * non-atomic floating point operations.
- *
- * The operation is not always safe and can have undefined behavior unless
- * following condition are met:
- *
- * - \p addr is at least 8 byte aligned
- * - If \p addr is a global segment address, it is in a coarse grain allocation.
- * Passing in global segment addresses in fine grain allocations will result in
- * undefined behavior and are not supported.
- *
- * @param [in,out] addr Pointer to value to be updated.
- * @param [in] val Value used to updated the contents at \p addr
- * @return Original value contained at \p addr.
- */
- __device__ inline double unsafeAtomicMin(double* addr, double val) {
- #if (defined(__gfx90a__) || defined(__gfx940__) || defined(__gfx941__) || defined(__gfx942__)) && \
- __has_builtin(__builtin_amdgcn_flat_atomic_fmin_f64)
- return __builtin_amdgcn_flat_atomic_fmin_f64(addr, val);
- #else
- #if __has_builtin(__hip_atomic_load) && \
- __has_builtin(__hip_atomic_compare_exchange_strong)
- double value = __hip_atomic_load(addr, __ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_AGENT);
- bool done = false;
- while (!done && value > val) {
- done = __hip_atomic_compare_exchange_strong(addr, &value, val,
- __ATOMIC_RELAXED, __ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_AGENT);
- }
- return value;
- #else
- unsigned long long *uaddr = (unsigned long long *)addr;
- unsigned long long value = __atomic_load_n(uaddr, __ATOMIC_RELAXED);
- bool done = false;
- while (!done && __longlong_as_double(value) > val) {
- done = __atomic_compare_exchange_n(uaddr, &value, __double_as_longlong(val), false,
- __ATOMIC_RELAXED, __ATOMIC_RELAXED);
- }
- return __longlong_as_double(value);
- #endif
- #endif
- }
- /**
- * @brief Safe floating point rmw atomic add.
- *
- * Performs a relaxed read-modify-write floating point atomic add with
- * device memory scope. Original value at \p addr is returned and
- * the value of \p addr is updated to have the original value plus \p value
- *
- * @note This operation ensures that, on all targets, we produce safe atomics.
- * This will be the case even when -munsafe-fp-atomics is passed into the compiler.
- *
- * @param [in,out] addr Pointer to value to be increment by \p value.
- * @param [in] value Value by \p addr is to be incremented.
- * @return Original value contained in \p addr.
- */
- __device__ inline float safeAtomicAdd(float* addr, float value) {
- #if defined(__gfx908__) || defined(__gfx941__) \
- || ((defined(__gfx90a__) || defined(__gfx940__) || defined(__gfx942__)) \
- && !__has_builtin(__hip_atomic_fetch_add))
- // On gfx908, we can generate unsafe FP32 atomic add that does not follow all
- // IEEE rules when -munsafe-fp-atomics is passed. Do a CAS loop emulation instead.
- // On gfx941, we can generate unsafe FP32 atomic add that may not always happen atomically,
- // so we need to force a CAS loop emulation to ensure safety.
- // On gfx90a, gfx940 and gfx942 if we do not have the __hip_atomic_fetch_add builtin, we
- // need to force a CAS loop here.
- float old_val;
- #if __has_builtin(__hip_atomic_load)
- old_val = __hip_atomic_load(addr, __ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_AGENT);
- #else // !__has_builtin(__hip_atomic_load)
- old_val = __uint_as_float(__atomic_load_n(reinterpret_cast<unsigned int*>(addr), __ATOMIC_RELAXED));
- #endif // __has_builtin(__hip_atomic_load)
- float expected, temp;
- do {
- temp = expected = old_val;
- #if __has_builtin(__hip_atomic_compare_exchange_strong)
- __hip_atomic_compare_exchange_strong(addr, &expected, old_val + value, __ATOMIC_RELAXED,
- __ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_AGENT);
- #else // !__has_builtin(__hip_atomic_compare_exchange_strong)
- __atomic_compare_exchange_n(addr, &expected, old_val + value, false,
- __ATOMIC_RELAXED, __ATOMIC_RELAXED);
- #endif // __has_builtin(__hip_atomic_compare_exchange_strong)
- old_val = expected;
- } while (__float_as_uint(temp) != __float_as_uint(old_val));
- return old_val;
- #elif defined(__gfx90a__)
- // On gfx90a, with the __hip_atomic_fetch_add builtin, relaxed system-scope
- // atomics will produce safe CAS loops, but are otherwise not different than
- // agent-scope atomics. This logic is only applicable for gfx90a, and should
- // not be assumed on other architectures.
- return __hip_atomic_fetch_add(addr, value, __ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_SYSTEM);
- #elif __has_builtin(__hip_atomic_fetch_add)
- return __hip_atomic_fetch_add(addr, value, __ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_AGENT);
- #else
- return __atomic_fetch_add(addr, value, __ATOMIC_RELAXED);
- #endif
- }
- /**
- * @brief Safe floating point rmw atomic max.
- *
- * Performs a relaxed read-modify-write floating point atomic max with
- * device memory scope. The original value at \p addr is returned and
- * the value at \p addr is replaced by \p val if greater.
- *
- * @note This operation ensures that, on all targets, we produce safe atomics.
- * This will be the case even when -munsafe-fp-atomics is passed into the compiler.
- *
- * @param [in,out] addr Pointer to value to be updated
- * @param [in] val Value used to update the value at \p addr.
- * @return Original value contained in \p addr.
- */
- __device__ inline float safeAtomicMax(float* addr, float val) {
- #if __has_builtin(__hip_atomic_load) && \
- __has_builtin(__hip_atomic_compare_exchange_strong)
- float value = __hip_atomic_load(addr, __ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_AGENT);
- bool done = false;
- while (!done && value < val) {
- done = __hip_atomic_compare_exchange_strong(addr, &value, val,
- __ATOMIC_RELAXED, __ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_AGENT);
- }
- return value;
- #else
- unsigned int *uaddr = (unsigned int *)addr;
- unsigned int value = __atomic_load_n(uaddr, __ATOMIC_RELAXED);
- bool done = false;
- while (!done && __uint_as_float(value) < val) {
- done = __atomic_compare_exchange_n(uaddr, &value, __float_as_uint(val), false,
- __ATOMIC_RELAXED, __ATOMIC_RELAXED);
- }
- return __uint_as_float(value);
- #endif
- }
- /**
- * @brief Safe floating point rmw atomic min.
- *
- * Performs a relaxed read-modify-write floating point atomic min with
- * device memory scope. The original value at \p addr is returned and
- * the value at \p addr is replaced by \p val if lesser.
- *
- * @note This operation ensures that, on all targets, we produce safe atomics.
- * This will be the case even when -munsafe-fp-atomics is passed into the compiler.
- *
- * @param [in,out] addr Pointer to value to be updated
- * @param [in] val Value used to update the value at \p addr.
- * @return Original value contained in \p addr.
- */
- __device__ inline float safeAtomicMin(float* addr, float val) {
- #if __has_builtin(__hip_atomic_load) && \
- __has_builtin(__hip_atomic_compare_exchange_strong)
- float value = __hip_atomic_load(addr, __ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_AGENT);
- bool done = false;
- while (!done && value > val) {
- done = __hip_atomic_compare_exchange_strong(addr, &value, val,
- __ATOMIC_RELAXED, __ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_AGENT);
- }
- return value;
- #else
- unsigned int *uaddr = (unsigned int *)addr;
- unsigned int value = __atomic_load_n(uaddr, __ATOMIC_RELAXED);
- bool done = false;
- while (!done && __uint_as_float(value) > val) {
- done = __atomic_compare_exchange_n(uaddr, &value, __float_as_uint(val), false,
- __ATOMIC_RELAXED, __ATOMIC_RELAXED);
- }
- return __uint_as_float(value);
- #endif
- }
- /**
- * @brief Safe double precision rmw atomic add.
- *
- * Performs a relaxed read-modify-write double precision atomic add with
- * device memory scope. Original value at \p addr is returned and
- * the value of \p addr is updated to have the original value plus \p value
- *
- * @note This operation ensures that, on all targets, we produce safe atomics.
- * This will be the case even when -munsafe-fp-atomics is passed into the compiler.
- *
- * @param [in,out] addr Pointer to value to be increment by \p value.
- * @param [in] value Value by \p addr is to be incremented.
- * @return Original value contained in \p addr.
- */
- __device__ inline double safeAtomicAdd(double* addr, double value) {
- #if defined(__gfx90a__) && __has_builtin(__hip_atomic_fetch_add)
- // On gfx90a, with the __hip_atomic_fetch_add builtin, relaxed system-scope
- // atomics will produce safe CAS loops, but are otherwise not different than
- // agent-scope atomics. This logic is only applicable for gfx90a, and should
- // not be assumed on other architectures.
- return __hip_atomic_fetch_add(addr, value, __ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_SYSTEM);
- #elif defined(__gfx90a__)
- // On gfx90a, if we do not have the __hip_atomic_fetch_add builtin, we need to
- // force a CAS loop here.
- double old_val;
- #if __has_builtin(__hip_atomic_load)
- old_val = __hip_atomic_load(addr, __ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_AGENT);
- #else // !__has_builtin(__hip_atomic_load)
- old_val = __longlong_as_double(__atomic_load_n(reinterpret_cast<unsigned long long*>(addr), __ATOMIC_RELAXED));
- #endif // __has_builtin(__hip_atomic_load)
- double expected, temp;
- do {
- temp = expected = old_val;
- #if __has_builtin(__hip_atomic_compare_exchange_strong)
- __hip_atomic_compare_exchange_strong(addr, &expected, old_val + value, __ATOMIC_RELAXED,
- __ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_AGENT);
- #else // !__has_builtin(__hip_atomic_compare_exchange_strong)
- __atomic_compare_exchange_n(addr, &expected, old_val + value, false,
- __ATOMIC_RELAXED, __ATOMIC_RELAXED);
- #endif // __has_builtin(__hip_atomic_compare_exchange_strong)
- old_val = expected;
- } while (__double_as_longlong(temp) != __double_as_longlong(old_val));
- return old_val;
- #else // !defined(__gfx90a__)
- #if __has_builtin(__hip_atomic_fetch_add)
- return __hip_atomic_fetch_add(addr, value, __ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_AGENT);
- #else // !__has_builtin(__hip_atomic_fetch_add)
- return __atomic_fetch_add(addr, value, __ATOMIC_RELAXED);
- #endif // __has_builtin(__hip_atomic_fetch_add)
- #endif
- }
- /**
- * @brief Safe double precision rmw atomic max.
- *
- * Performs a relaxed read-modify-write double precision atomic max with
- * device memory scope. Original value at \p addr is returned and
- * the value of \p addr is updated with \p val if greater.
- *
- * @note This operation ensures that, on all targets, we produce safe atomics.
- * This will be the case even when -munsafe-fp-atomics is passed into the compiler.
- *
- * @param [in,out] addr Pointer to value to be updated.
- * @param [in] val Value used to updated the contents at \p addr
- * @return Original value contained at \p addr.
- */
- __device__ inline double safeAtomicMax(double* addr, double val) {
- #if __has_builtin(__builtin_amdgcn_is_private)
- if (__builtin_amdgcn_is_private(
- (const __attribute__((address_space(0))) void*)addr)) {
- double old = *addr;
- *addr = __builtin_fmax(old, val);
- return old;
- } else {
- #endif
- #if __has_builtin(__hip_atomic_load) && \
- __has_builtin(__hip_atomic_compare_exchange_strong)
- double value = __hip_atomic_load(addr, __ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_AGENT);
- bool done = false;
- while (!done && value < val) {
- done = __hip_atomic_compare_exchange_strong(addr, &value, val,
- __ATOMIC_RELAXED, __ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_AGENT);
- }
- return value;
- #else
- unsigned long long *uaddr = (unsigned long long *)addr;
- unsigned long long value = __atomic_load_n(uaddr, __ATOMIC_RELAXED);
- bool done = false;
- while (!done && __longlong_as_double(value) < val) {
- done = __atomic_compare_exchange_n(uaddr, &value, __double_as_longlong(val), false,
- __ATOMIC_RELAXED, __ATOMIC_RELAXED);
- }
- return __longlong_as_double(value);
- #endif
- #if __has_builtin(__builtin_amdgcn_is_private)
- }
- #endif
- }
- /**
- * @brief Safe double precision rmw atomic min.
- *
- * Performs a relaxed read-modify-write double precision atomic min with
- * device memory scope. Original value at \p addr is returned and
- * the value of \p addr is updated with \p val if lesser.
- *
- * @note This operation ensures that, on all targets, we produce safe atomics.
- * This will be the case even when -munsafe-fp-atomics is passed into the compiler.
- *
- * @param [in,out] addr Pointer to value to be updated.
- * @param [in] val Value used to updated the contents at \p addr
- * @return Original value contained at \p addr.
- */
- __device__ inline double safeAtomicMin(double* addr, double val) {
- #if __has_builtin(__builtin_amdgcn_is_private)
- if (__builtin_amdgcn_is_private(
- (const __attribute__((address_space(0))) void*)addr)) {
- double old = *addr;
- *addr = __builtin_fmin(old, val);
- return old;
- } else {
- #endif
- #if __has_builtin(__hip_atomic_load) && \
- __has_builtin(__hip_atomic_compare_exchange_strong)
- double value = __hip_atomic_load(addr, __ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_AGENT);
- bool done = false;
- while (!done && value > val) {
- done = __hip_atomic_compare_exchange_strong(addr, &value, val,
- __ATOMIC_RELAXED, __ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_AGENT);
- }
- return value;
- #else
- unsigned long long *uaddr = (unsigned long long *)addr;
- unsigned long long value = __atomic_load_n(uaddr, __ATOMIC_RELAXED);
- bool done = false;
- while (!done && __longlong_as_double(value) > val) {
- done = __atomic_compare_exchange_n(uaddr, &value, __double_as_longlong(val), false,
- __ATOMIC_RELAXED, __ATOMIC_RELAXED);
- }
- return __longlong_as_double(value);
- #endif
- #if __has_builtin(__builtin_amdgcn_is_private)
- }
- #endif
- }
- #endif
- /*
- Copyright (c) 2015 - Present Advanced Micro Devices, Inc. All rights reserved.
- Permission is hereby granted, free of charge, to any person obtaining a copy
- of this software and associated documentation files (the "Software"), to deal
- in the Software without restriction, including without limitation the rights
- to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
- copies of the Software, and to permit persons to whom the Software is
- furnished to do so, subject to the following conditions:
- The above copyright notice and this permission notice shall be included in
- all copies or substantial portions of the Software.
- THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
- AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
- OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
- THE SOFTWARE.
- */
- #pragma once
- #if !defined(__HIPCC_RTC__)
- #include "amd_device_functions.h"
- #endif
- #if __has_builtin(__hip_atomic_compare_exchange_strong)
- template<bool B, typename T, typename F> struct Cond_t;
- template<typename T, typename F> struct Cond_t<true, T, F> { using type = T; };
- template<typename T, typename F> struct Cond_t<false, T, F> { using type = F; };
- #if !__HIP_DEVICE_COMPILE__
- //TODO: Remove this after compiler pre-defines the following Macros.
- #define __HIP_MEMORY_SCOPE_SINGLETHREAD 1
- #define __HIP_MEMORY_SCOPE_WAVEFRONT 2
- #define __HIP_MEMORY_SCOPE_WORKGROUP 3
- #define __HIP_MEMORY_SCOPE_AGENT 4
- #define __HIP_MEMORY_SCOPE_SYSTEM 5
- #endif
- #if !defined(__HIPCC_RTC__)
- #include "amd_hip_unsafe_atomics.h"
- #endif
- // Atomic expanders
- template<
- int mem_order = __ATOMIC_SEQ_CST,
- int mem_scope= __HIP_MEMORY_SCOPE_SYSTEM,
- typename T,
- typename Op,
- typename F>
- inline
- __attribute__((always_inline, device))
- T hip_cas_expander(T* p, T x, Op op, F f) noexcept
- {
- using FP = __attribute__((address_space(0))) const void*;
- __device__
- extern bool is_shared_workaround(FP) asm("llvm.amdgcn.is.shared");
- if (is_shared_workaround((FP)p))
- return f();
- using U = typename Cond_t<
- sizeof(T) == sizeof(unsigned int), unsigned int, unsigned long long>::type;
- auto q = reinterpret_cast<U*>(p);
- U tmp0{__hip_atomic_load(q, mem_order, mem_scope)};
- U tmp1;
- do {
- tmp1 = tmp0;
- op(reinterpret_cast<T&>(tmp1), x);
- } while (!__hip_atomic_compare_exchange_strong(q, &tmp0, tmp1, mem_order,
- mem_order, mem_scope));
- return reinterpret_cast<const T&>(tmp0);
- }
- template<
- int mem_order = __ATOMIC_SEQ_CST,
- int mem_scope= __HIP_MEMORY_SCOPE_SYSTEM,
- typename T,
- typename Cmp,
- typename F>
- inline
- __attribute__((always_inline, device))
- T hip_cas_extrema_expander(T* p, T x, Cmp cmp, F f) noexcept
- {
- using FP = __attribute__((address_space(0))) const void*;
- __device__
- extern bool is_shared_workaround(FP) asm("llvm.amdgcn.is.shared");
- if (is_shared_workaround((FP)p))
- return f();
- using U = typename Cond_t<
- sizeof(T) == sizeof(unsigned int), unsigned int, unsigned long long>::type;
- auto q = reinterpret_cast<U*>(p);
- U tmp{__hip_atomic_load(q, mem_order, mem_scope)};
- while (cmp(x, reinterpret_cast<const T&>(tmp)) &&
- !__hip_atomic_compare_exchange_strong(q, &tmp, x, mem_order, mem_order,
- mem_scope));
- return reinterpret_cast<const T&>(tmp);
- }
- __device__
- inline
- int atomicCAS(int* address, int compare, int val) {
- __hip_atomic_compare_exchange_strong(address, &compare, val, __ATOMIC_RELAXED, __ATOMIC_RELAXED,
- __HIP_MEMORY_SCOPE_AGENT);
- return compare;
- }
- __device__
- inline
- int atomicCAS_system(int* address, int compare, int val) {
- __hip_atomic_compare_exchange_strong(address, &compare, val, __ATOMIC_RELAXED, __ATOMIC_RELAXED,
- __HIP_MEMORY_SCOPE_SYSTEM);
- return compare;
- }
- __device__
- inline
- unsigned int atomicCAS(unsigned int* address, unsigned int compare, unsigned int val) {
- __hip_atomic_compare_exchange_strong(address, &compare, val, __ATOMIC_RELAXED, __ATOMIC_RELAXED,
- __HIP_MEMORY_SCOPE_AGENT);
- return compare;
- }
- __device__
- inline
- unsigned int atomicCAS_system(unsigned int* address, unsigned int compare, unsigned int val) {
- __hip_atomic_compare_exchange_strong(address, &compare, val, __ATOMIC_RELAXED, __ATOMIC_RELAXED,
- __HIP_MEMORY_SCOPE_SYSTEM);
- return compare;
- }
- __device__
- inline
- unsigned long atomicCAS(unsigned long* address, unsigned long compare, unsigned long val) {
- __hip_atomic_compare_exchange_strong(address, &compare, val, __ATOMIC_RELAXED, __ATOMIC_RELAXED,
- __HIP_MEMORY_SCOPE_AGENT);
- return compare;
- }
- __device__
- inline
- unsigned long atomicCAS_system(unsigned long* address, unsigned long compare, unsigned long val) {
- __hip_atomic_compare_exchange_strong(address, &compare, val, __ATOMIC_RELAXED, __ATOMIC_RELAXED,
- __HIP_MEMORY_SCOPE_SYSTEM);
- return compare;
- }
- __device__
- inline
- unsigned long long atomicCAS(unsigned long long* address, unsigned long long compare,
- unsigned long long val) {
- __hip_atomic_compare_exchange_strong(address, &compare, val, __ATOMIC_RELAXED, __ATOMIC_RELAXED,
- __HIP_MEMORY_SCOPE_AGENT);
- return compare;
- }
- __device__
- inline
- unsigned long long atomicCAS_system(unsigned long long* address, unsigned long long compare,
- unsigned long long val) {
- __hip_atomic_compare_exchange_strong(address, &compare, val, __ATOMIC_RELAXED, __ATOMIC_RELAXED,
- __HIP_MEMORY_SCOPE_SYSTEM);
- return compare;
- }
- __device__
- inline
- float atomicCAS(float* address, float compare, float val) {
- __hip_atomic_compare_exchange_strong(address, &compare, val, __ATOMIC_RELAXED, __ATOMIC_RELAXED,
- __HIP_MEMORY_SCOPE_AGENT);
- return compare;
- }
- __device__
- inline
- float atomicCAS_system(float* address, float compare, float val) {
- __hip_atomic_compare_exchange_strong(address, &compare, val, __ATOMIC_RELAXED, __ATOMIC_RELAXED,
- __HIP_MEMORY_SCOPE_SYSTEM);
- return compare;
- }
- __device__
- inline
- double atomicCAS(double* address, double compare, double val) {
- __hip_atomic_compare_exchange_strong(address, &compare, val, __ATOMIC_RELAXED, __ATOMIC_RELAXED,
- __HIP_MEMORY_SCOPE_AGENT);
- return compare;
- }
- __device__
- inline
- double atomicCAS_system(double* address, double compare, double val) {
- __hip_atomic_compare_exchange_strong(address, &compare, val, __ATOMIC_RELAXED, __ATOMIC_RELAXED,
- __HIP_MEMORY_SCOPE_SYSTEM);
- return compare;
- }
- __device__
- inline
- int atomicAdd(int* address, int val) {
- return __hip_atomic_fetch_add(address, val, __ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_AGENT);
- }
- __device__
- inline
- int atomicAdd_system(int* address, int val) {
- return __hip_atomic_fetch_add(address, val, __ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_SYSTEM);
- }
- __device__
- inline
- unsigned int atomicAdd(unsigned int* address, unsigned int val) {
- return __hip_atomic_fetch_add(address, val, __ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_AGENT);
- }
- __device__
- inline
- unsigned int atomicAdd_system(unsigned int* address, unsigned int val) {
- return __hip_atomic_fetch_add(address, val, __ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_SYSTEM);
- }
- __device__
- inline
- unsigned long atomicAdd(unsigned long* address, unsigned long val) {
- return __hip_atomic_fetch_add(address, val, __ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_AGENT);
- }
- __device__
- inline
- unsigned long atomicAdd_system(unsigned long* address, unsigned long val) {
- return __hip_atomic_fetch_add(address, val, __ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_SYSTEM);
- }
- __device__
- inline
- unsigned long long atomicAdd(unsigned long long* address, unsigned long long val) {
- return __hip_atomic_fetch_add(address, val, __ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_AGENT);
- }
- __device__
- inline
- unsigned long long atomicAdd_system(unsigned long long* address, unsigned long long val) {
- return __hip_atomic_fetch_add(address, val, __ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_SYSTEM);
- }
- __device__
- inline
- float atomicAdd(float* address, float val) {
- #if defined(__AMDGCN_UNSAFE_FP_ATOMICS__)
- return unsafeAtomicAdd(address, val);
- #else
- return __hip_atomic_fetch_add(address, val, __ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_AGENT);
- #endif
- }
- __device__
- inline
- float atomicAdd_system(float* address, float val) {
- return __hip_atomic_fetch_add(address, val, __ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_SYSTEM);
- }
- #if !defined(__HIPCC_RTC__)
- DEPRECATED("use atomicAdd instead")
- #endif // !defined(__HIPCC_RTC__)
- __device__
- inline
- void atomicAddNoRet(float* address, float val)
- {
- __ockl_atomic_add_noret_f32(address, val);
- }
- __device__
- inline
- double atomicAdd(double* address, double val) {
- #if defined(__AMDGCN_UNSAFE_FP_ATOMICS__)
- return unsafeAtomicAdd(address, val);
- #else
- return __hip_atomic_fetch_add(address, val, __ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_AGENT);
- #endif
- }
- __device__
- inline
- double atomicAdd_system(double* address, double val) {
- return __hip_atomic_fetch_add(address, val, __ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_SYSTEM);
- }
- __device__
- inline
- int atomicSub(int* address, int val) {
- return __hip_atomic_fetch_add(address, -val, __ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_AGENT);
- }
- __device__
- inline
- int atomicSub_system(int* address, int val) {
- return __hip_atomic_fetch_add(address, -val, __ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_SYSTEM);
- }
- __device__
- inline
- unsigned int atomicSub(unsigned int* address, unsigned int val) {
- return __hip_atomic_fetch_add(address, -val, __ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_AGENT);
- }
- __device__
- inline
- unsigned int atomicSub_system(unsigned int* address, unsigned int val) {
- return __hip_atomic_fetch_add(address, -val, __ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_SYSTEM);
- }
- __device__
- inline
- unsigned long atomicSub(unsigned long* address, unsigned long val) {
- return __hip_atomic_fetch_add(address, -val, __ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_AGENT);
- }
- __device__
- inline
- unsigned long atomicSub_system(unsigned long* address, unsigned long val) {
- return __hip_atomic_fetch_add(address, -val, __ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_SYSTEM);
- }
- __device__
- inline
- unsigned long long atomicSub(unsigned long long* address, unsigned long long val) {
- return __hip_atomic_fetch_add(address, -val, __ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_AGENT);
- }
- __device__
- inline
- unsigned long long atomicSub_system(unsigned long long* address, unsigned long long val) {
- return __hip_atomic_fetch_add(address, -val, __ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_SYSTEM);
- }
- __device__
- inline
- float atomicSub(float* address, float val) {
- #if defined(__AMDGCN_UNSAFE_FP_ATOMICS__)
- return unsafeAtomicAdd(address, -val);
- #else
- return __hip_atomic_fetch_add(address, -val, __ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_AGENT);
- #endif
- }
- __device__
- inline
- float atomicSub_system(float* address, float val) {
- return __hip_atomic_fetch_add(address, -val, __ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_SYSTEM);
- }
- __device__
- inline
- double atomicSub(double* address, double val) {
- #if defined(__AMDGCN_UNSAFE_FP_ATOMICS__)
- return unsafeAtomicAdd(address, -val);
- #else
- return __hip_atomic_fetch_add(address, -val, __ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_AGENT);
- #endif
- }
- __device__
- inline
- double atomicSub_system(double* address, double val) {
- return __hip_atomic_fetch_add(address, -val, __ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_SYSTEM);
- }
- __device__
- inline
- int atomicExch(int* address, int val) {
- return __hip_atomic_exchange(address, val, __ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_AGENT);
- }
- __device__
- inline
- int atomicExch_system(int* address, int val) {
- return __hip_atomic_exchange(address, val, __ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_SYSTEM);
- }
- __device__
- inline
- unsigned int atomicExch(unsigned int* address, unsigned int val) {
- return __hip_atomic_exchange(address, val, __ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_AGENT);
- }
- __device__
- inline
- unsigned int atomicExch_system(unsigned int* address, unsigned int val) {
- return __hip_atomic_exchange(address, val, __ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_SYSTEM);
- }
- __device__
- inline
- unsigned long atomicExch(unsigned long* address, unsigned long val) {
- return __hip_atomic_exchange(address, val, __ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_AGENT);
- }
- __device__
- inline
- unsigned long atomicExch_system(unsigned long* address, unsigned long val) {
- return __hip_atomic_exchange(address, val, __ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_SYSTEM);
- }
- __device__
- inline
- unsigned long long atomicExch(unsigned long long* address, unsigned long long val) {
- return __hip_atomic_exchange(address, val, __ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_AGENT);
- }
- __device__
- inline
- unsigned long long atomicExch_system(unsigned long long* address, unsigned long long val) {
- return __hip_atomic_exchange(address, val, __ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_SYSTEM);
- }
- __device__
- inline
- float atomicExch(float* address, float val) {
- return __hip_atomic_exchange(address, val, __ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_AGENT);
- }
- __device__
- inline
- float atomicExch_system(float* address, float val) {
- return __hip_atomic_exchange(address, val, __ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_SYSTEM);
- }
- __device__
- inline
- double atomicExch(double* address, double val) {
- return __hip_atomic_exchange(address, val, __ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_AGENT);
- }
- __device__
- inline
- double atomicExch_system(double* address, double val) {
- return __hip_atomic_exchange(address, val, __ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_SYSTEM);
- }
- __device__
- inline
- int atomicMin(int* address, int val) {
- #if defined(__gfx941__)
- return hip_cas_extrema_expander<__ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_AGENT>(
- address, val, [](int x, int y) { return x < y; }, [=]() {
- return __hip_atomic_fetch_min(address, val, __ATOMIC_RELAXED,
- __HIP_MEMORY_SCOPE_AGENT);
- });
- #else
- return __hip_atomic_fetch_min(address, val, __ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_AGENT);
- #endif // __gfx941__
- }
- __device__
- inline
- int atomicMin_system(int* address, int val) {
- #if defined(__gfx941__)
- return hip_cas_extrema_expander<__ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_SYSTEM>(
- address, val, [](int x, int y) { return x < y; }, [=]() {
- return __hip_atomic_fetch_min(address, val, __ATOMIC_RELAXED,
- __HIP_MEMORY_SCOPE_SYSTEM);
- });
- #else
- return __hip_atomic_fetch_min(address, val, __ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_SYSTEM);
- #endif // __gfx941__
- }
- __device__
- inline
- unsigned int atomicMin(unsigned int* address, unsigned int val) {
- #if defined(__gfx941__)
- return hip_cas_extrema_expander<__ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_AGENT>(
- address, val, [](unsigned int x, unsigned int y) { return x < y; }, [=]() {
- return __hip_atomic_fetch_min(address, val, __ATOMIC_RELAXED,
- __HIP_MEMORY_SCOPE_AGENT);
- });
- #else
- return __hip_atomic_fetch_min(address, val, __ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_AGENT);
- #endif // __gfx941__
- }
- __device__
- inline
- unsigned int atomicMin_system(unsigned int* address, unsigned int val) {
- #if defined(__gfx941__)
- return hip_cas_extrema_expander<__ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_SYSTEM>(
- address, val, [](unsigned int x, unsigned int y) { return x < y; }, [=]() {
- return __hip_atomic_fetch_min(address, val, __ATOMIC_RELAXED,
- __HIP_MEMORY_SCOPE_SYSTEM);
- });
- #else
- return __hip_atomic_fetch_min(address, val, __ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_SYSTEM);
- #endif // __gfx941__
- }
- __device__
- inline
- unsigned long long atomicMin(unsigned long* address, unsigned long val) {
- #if defined(__gfx941__)
- return hip_cas_extrema_expander<__ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_AGENT>(
- address,
- val,
- [](unsigned long x, unsigned long y) { return x < y; },
- [=]() {
- return __hip_atomic_fetch_min(address, val, __ATOMIC_RELAXED,
- __HIP_MEMORY_SCOPE_AGENT);
- });
- #else
- return __hip_atomic_fetch_min(address, val, __ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_AGENT);
- #endif // __gfx941__
- }
- __device__
- inline
- unsigned long atomicMin_system(unsigned long* address, unsigned long val) {
- #if defined(__gfx941__)
- return hip_cas_extrema_expander<__ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_SYSTEM>(
- address,
- val,
- [](unsigned long x, unsigned long y) { return x < y; },
- [=]() {
- return __hip_atomic_fetch_min(address, val, __ATOMIC_RELAXED,
- __HIP_MEMORY_SCOPE_SYSTEM);
- });
- #else
- return __hip_atomic_fetch_min(address, val, __ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_SYSTEM);
- #endif // __gfx941__
- }
- __device__
- inline
- unsigned long long atomicMin(unsigned long long* address, unsigned long long val) {
- #if defined(__gfx941__)
- return hip_cas_extrema_expander<__ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_AGENT>(
- address,
- val,
- [](unsigned long long x, unsigned long long y) { return x < y; },
- [=]() {
- return __hip_atomic_fetch_min(address, val, __ATOMIC_RELAXED,
- __HIP_MEMORY_SCOPE_AGENT);
- });
- #else
- return __hip_atomic_fetch_min(address, val, __ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_AGENT);
- #endif // __gfx941__
- }
- __device__
- inline
- unsigned long long atomicMin_system(unsigned long long* address, unsigned long long val) {
- #if defined(__gfx941__)
- return hip_cas_extrema_expander<__ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_SYSTEM>(
- address,
- val,
- [](unsigned long long x, unsigned long long y) { return x < y; },
- [=]() {
- return __hip_atomic_fetch_min(address, val, __ATOMIC_RELAXED,
- __HIP_MEMORY_SCOPE_SYSTEM);
- });
- #else
- return __hip_atomic_fetch_min(address, val, __ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_SYSTEM);
- #endif // __gfx941__
- }
- __device__
- inline
- long long atomicMin(long long* address, long long val) {
- #if defined(__gfx941__)
- return hip_cas_extrema_expander<__ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_AGENT>(
- address, val, [](long long x, long long y) { return x < y; },
- [=]() {
- return __hip_atomic_fetch_min(address, val, __ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_AGENT);
- });
- #else
- return __hip_atomic_fetch_min(address, val, __ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_AGENT);
- #endif // __gfx941__
- }
- __device__
- inline
- long long atomicMin_system(long long* address, long long val) {
- #if defined(__gfx941__)
- return hip_cas_extrema_expander<__ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_SYSTEM>(
- address, val, [](long long x, long long y) { return x < y; },
- [=]() {
- return __hip_atomic_fetch_min(address, val, __ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_SYSTEM);
- });
- #else
- return __hip_atomic_fetch_min(address, val, __ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_SYSTEM);
- #endif // __gfx941__
- }
- __device__
- inline
- float atomicMin(float* addr, float val) {
- #if defined(__AMDGCN_UNSAFE_FP_ATOMICS__)
- return unsafeAtomicMin(addr, val);
- #else
- #if __has_builtin(__hip_atomic_load) && \
- __has_builtin(__hip_atomic_compare_exchange_strong)
- float value = __hip_atomic_load(addr, __ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_AGENT);
- bool done = false;
- while (!done && value > val) {
- done = __hip_atomic_compare_exchange_strong(addr, &value, val,
- __ATOMIC_RELAXED, __ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_AGENT);
- }
- return value;
- #else
- unsigned int *uaddr = (unsigned int *)addr;
- unsigned int value = __atomic_load_n(uaddr, __ATOMIC_RELAXED);
- bool done = false;
- while (!done && __uint_as_float(value) > val) {
- done = __atomic_compare_exchange_n(uaddr, &value, __float_as_uint(val), false,
- __ATOMIC_RELAXED, __ATOMIC_RELAXED);
- }
- return __uint_as_float(value);
- #endif
- #endif
- }
- __device__
- inline
- float atomicMin_system(float* address, float val) {
- unsigned int* uaddr { reinterpret_cast<unsigned int*>(address) };
- #if __has_builtin(__hip_atomic_load)
- unsigned int tmp {__hip_atomic_load(uaddr, __ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_SYSTEM)};
- #else
- unsigned int tmp {__atomic_load_n(uaddr, __ATOMIC_RELAXED)};
- #endif
- float value = __uint_as_float(tmp);
- while (val < value) {
- value = atomicCAS_system(address, value, val);
- }
- return value;
- }
- __device__
- inline
- double atomicMin(double* addr, double val) {
- #if defined(__AMDGCN_UNSAFE_FP_ATOMICS__)
- return unsafeAtomicMin(addr, val);
- #else
- #if __has_builtin(__hip_atomic_load) && \
- __has_builtin(__hip_atomic_compare_exchange_strong)
- double value = __hip_atomic_load(addr, __ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_AGENT);
- bool done = false;
- while (!done && value > val) {
- done = __hip_atomic_compare_exchange_strong(addr, &value, val,
- __ATOMIC_RELAXED, __ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_AGENT);
- }
- return value;
- #else
- unsigned long long *uaddr = (unsigned long long *)addr;
- unsigned long long value = __atomic_load_n(uaddr, __ATOMIC_RELAXED);
- bool done = false;
- while (!done && __longlong_as_double(value) > val) {
- done = __atomic_compare_exchange_n(uaddr, &value, __double_as_longlong(val), false,
- __ATOMIC_RELAXED, __ATOMIC_RELAXED);
- }
- return __longlong_as_double(value);
- #endif
- #endif
- }
- __device__
- inline
- double atomicMin_system(double* address, double val) {
- unsigned long long* uaddr { reinterpret_cast<unsigned long long*>(address) };
- #if __has_builtin(__hip_atomic_load)
- unsigned long long tmp {__hip_atomic_load(uaddr, __ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_SYSTEM)};
- #else
- unsigned long long tmp {__atomic_load_n(uaddr, __ATOMIC_RELAXED)};
- #endif
- double value = __longlong_as_double(tmp);
- while (val < value) {
- value = atomicCAS_system(address, value, val);
- }
- return value;
- }
- __device__
- inline
- int atomicMax(int* address, int val) {
- #if defined(__gfx941__)
- return hip_cas_extrema_expander<__ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_AGENT>(
- address, val, [](int x, int y) { return y < x; }, [=]() {
- return __hip_atomic_fetch_max(address, val, __ATOMIC_RELAXED,
- __HIP_MEMORY_SCOPE_AGENT);
- });
- #else
- return __hip_atomic_fetch_max(address, val, __ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_AGENT);
- #endif // __gfx941__
- }
- __device__
- inline
- int atomicMax_system(int* address, int val) {
- #if defined(__gfx941__)
- return hip_cas_extrema_expander<__ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_SYSTEM>(
- address, val, [](int x, int y) { return y < x; }, [=]() {
- return __hip_atomic_fetch_max(address, val, __ATOMIC_RELAXED,
- __HIP_MEMORY_SCOPE_SYSTEM);
- });
- #else
- return __hip_atomic_fetch_max(address, val, __ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_SYSTEM);
- #endif // __gfx941__
- }
- __device__
- inline
- unsigned int atomicMax(unsigned int* address, unsigned int val) {
- #if defined(__gfx941__)
- return hip_cas_extrema_expander<__ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_AGENT>(
- address, val, [](unsigned int x, unsigned int y) { return y < x; }, [=]() {
- return __hip_atomic_fetch_max(address, val, __ATOMIC_RELAXED,
- __HIP_MEMORY_SCOPE_AGENT);
- });
- #else
- return __hip_atomic_fetch_max(address, val, __ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_AGENT);
- #endif // __gfx941__
- }
- __device__
- inline
- unsigned int atomicMax_system(unsigned int* address, unsigned int val) {
- #if defined(__gfx941__)
- return hip_cas_extrema_expander<__ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_SYSTEM>(
- address, val, [](unsigned int x, unsigned int y) { return y < x; }, [=]() {
- return __hip_atomic_fetch_max(address, val, __ATOMIC_RELAXED,
- __HIP_MEMORY_SCOPE_SYSTEM);
- });
- #else
- return __hip_atomic_fetch_max(address, val, __ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_SYSTEM);
- #endif // __gfx941__
- }
- __device__
- inline
- unsigned long atomicMax(unsigned long* address, unsigned long val) {
- #if defined(__gfx941__)
- return hip_cas_extrema_expander<__ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_AGENT>(
- address,
- val,
- [](unsigned long x, unsigned long y) { return y < x; },
- [=]() {
- return __hip_atomic_fetch_max(address, val, __ATOMIC_RELAXED,
- __HIP_MEMORY_SCOPE_AGENT);
- });
- #else
- return __hip_atomic_fetch_max(address, val, __ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_AGENT);
- #endif // __gfx941__
- }
- __device__
- inline
- unsigned long atomicMax_system(unsigned long* address, unsigned long val) {
- #if defined(__gfx941__)
- return hip_cas_extrema_expander<__ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_SYSTEM>(
- address,
- val,
- [](unsigned long x, unsigned long y) { return y < x; },
- [=]() {
- return __hip_atomic_fetch_max(address, val, __ATOMIC_RELAXED,
- __HIP_MEMORY_SCOPE_SYSTEM);
- });
- #else
- return __hip_atomic_fetch_max(address, val, __ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_SYSTEM);
- #endif // __gfx941__
- }
- __device__
- inline
- unsigned long long atomicMax(unsigned long long* address, unsigned long long val) {
- #if defined(__gfx941__)
- return hip_cas_extrema_expander<__ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_AGENT>(
- address,
- val,
- [](unsigned long long x, unsigned long long y) { return y < x; },
- [=]() {
- return __hip_atomic_fetch_max(address, val, __ATOMIC_RELAXED,
- __HIP_MEMORY_SCOPE_AGENT);
- });
- #else
- return __hip_atomic_fetch_max(address, val, __ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_AGENT);
- #endif // __gfx941__
- }
- __device__
- inline
- unsigned long long atomicMax_system(unsigned long long* address, unsigned long long val) {
- #if defined(__gfx941__)
- return hip_cas_extrema_expander<__ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_SYSTEM>(
- address,
- val,
- [](unsigned long long x, unsigned long long y) { return y < x; },
- [=]() {
- return __hip_atomic_fetch_max(address, val, __ATOMIC_RELAXED,
- __HIP_MEMORY_SCOPE_SYSTEM);
- });
- #else
- return __hip_atomic_fetch_max(address, val, __ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_SYSTEM);
- #endif // __gfx941__
- }
- __device__
- inline
- long long atomicMax(long long* address, long long val) {
- #if defined(__gfx941__)
- return hip_cas_extrema_expander<__ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_AGENT>(
- address, val, [](long long x, long long y) { return y < x; },
- [=]() {
- return __hip_atomic_fetch_max(address, val, __ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_AGENT);
- });
- #else
- return __hip_atomic_fetch_max(address, val, __ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_AGENT);
- #endif // __gfx941__
- }
- __device__
- inline
- long long atomicMax_system(long long* address, long long val) {
- #if defined(__gfx941__)
- return hip_cas_extrema_expander<__ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_SYSTEM>(
- address, val, [](long long x, long long y) { return y < x; },
- [=]() {
- return __hip_atomic_fetch_max(address, val, __ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_SYSTEM);
- });
- #else
- return __hip_atomic_fetch_max(address, val, __ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_SYSTEM);
- #endif // __gfx941__
- }
- __device__
- inline
- float atomicMax(float* addr, float val) {
- #if defined(__AMDGCN_UNSAFE_FP_ATOMICS__)
- return unsafeAtomicMax(addr, val);
- #else
- #if __has_builtin(__hip_atomic_load) && \
- __has_builtin(__hip_atomic_compare_exchange_strong)
- float value = __hip_atomic_load(addr, __ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_AGENT);
- bool done = false;
- while (!done && value < val) {
- done = __hip_atomic_compare_exchange_strong(addr, &value, val,
- __ATOMIC_RELAXED, __ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_AGENT);
- }
- return value;
- #else
- unsigned int *uaddr = (unsigned int *)addr;
- unsigned int value = __atomic_load_n(uaddr, __ATOMIC_RELAXED);
- bool done = false;
- while (!done && __uint_as_float(value) < val) {
- done = __atomic_compare_exchange_n(uaddr, &value, __float_as_uint(val), false,
- __ATOMIC_RELAXED, __ATOMIC_RELAXED);
- }
- return __uint_as_float(value);
- #endif
- #endif
- }
- __device__
- inline
- float atomicMax_system(float* address, float val) {
- unsigned int* uaddr { reinterpret_cast<unsigned int*>(address) };
- #if __has_builtin(__hip_atomic_load)
- unsigned int tmp {__hip_atomic_load(uaddr, __ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_SYSTEM)};
- #else
- unsigned int tmp {__atomic_load_n(uaddr, __ATOMIC_RELAXED)};
- #endif
- float value = __uint_as_float(tmp);
- while (value < val) {
- value = atomicCAS_system(address, value, val);
- }
- return value;
- }
- __device__
- inline
- double atomicMax(double* addr, double val) {
- #if defined(__AMDGCN_UNSAFE_FP_ATOMICS__)
- return unsafeAtomicMax(addr, val);
- #else
- #if __has_builtin(__hip_atomic_load) && \
- __has_builtin(__hip_atomic_compare_exchange_strong)
- double value = __hip_atomic_load(addr, __ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_AGENT);
- bool done = false;
- while (!done && value < val) {
- done = __hip_atomic_compare_exchange_strong(addr, &value, val,
- __ATOMIC_RELAXED, __ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_AGENT);
- }
- return value;
- #else
- unsigned long long *uaddr = (unsigned long long *)addr;
- unsigned long long value = __atomic_load_n(uaddr, __ATOMIC_RELAXED);
- bool done = false;
- while (!done && __longlong_as_double(value) < val) {
- done = __atomic_compare_exchange_n(uaddr, &value, __double_as_longlong(val), false,
- __ATOMIC_RELAXED, __ATOMIC_RELAXED);
- }
- return __longlong_as_double(value);
- #endif
- #endif
- }
- __device__
- inline
- double atomicMax_system(double* address, double val) {
- unsigned long long* uaddr { reinterpret_cast<unsigned long long*>(address) };
- #if __has_builtin(__hip_atomic_load)
- unsigned long long tmp {__hip_atomic_load(uaddr, __ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_SYSTEM)};
- #else
- unsigned long long tmp {__atomic_load_n(uaddr, __ATOMIC_RELAXED)};
- #endif
- double value = __longlong_as_double(tmp);
- while (value < val) {
- value = atomicCAS_system(address, value, val);
- }
- return value;
- }
- __device__
- inline
- unsigned int atomicInc(unsigned int* address, unsigned int val)
- {
- #if defined(__gfx941__)
- __device__
- extern
- unsigned int __builtin_amdgcn_atomic_inc(
- unsigned int*,
- unsigned int,
- unsigned int,
- unsigned int,
- bool) __asm("llvm.amdgcn.atomic.inc.i32.p0i32");
- return hip_cas_expander<__ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_AGENT>(
- address,
- val,
- [](unsigned int& x, unsigned int y) { x = (x >= y) ? 0 : (x + 1); },
- [=]() {
- return
- __builtin_amdgcn_atomic_inc(address, val, __ATOMIC_RELAXED, 1, false);
- });
- #else
- return __builtin_amdgcn_atomic_inc32(address, val, __ATOMIC_RELAXED, "agent");
- #endif // __gfx941__
- }
- __device__
- inline
- unsigned int atomicDec(unsigned int* address, unsigned int val)
- {
- #if defined(__gfx941__)
- __device__
- extern
- unsigned int __builtin_amdgcn_atomic_dec(
- unsigned int*,
- unsigned int,
- unsigned int,
- unsigned int,
- bool) __asm("llvm.amdgcn.atomic.dec.i32.p0i32");
- return hip_cas_expander<__ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_AGENT>(
- address,
- val,
- [](unsigned int& x, unsigned int y) { x = (!x || x > y) ? y : (x - 1); },
- [=]() {
- return
- __builtin_amdgcn_atomic_dec(address, val, __ATOMIC_RELAXED, 1, false);
- });
- #else
- return __builtin_amdgcn_atomic_dec32(address, val, __ATOMIC_RELAXED, "agent");
- #endif // __gfx941__
-
- }
- __device__
- inline
- int atomicAnd(int* address, int val) {
- #if defined(__gfx941__)
- return hip_cas_expander<__ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_AGENT>(
- address, val, [](int& x, int y) { x &= y; }, [=]() {
- return __hip_atomic_fetch_and(address, val, __ATOMIC_RELAXED,
- __HIP_MEMORY_SCOPE_AGENT);
- });
- #else
- return __hip_atomic_fetch_and(address, val, __ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_AGENT);
- #endif // __gfx941__
- }
- __device__
- inline
- int atomicAnd_system(int* address, int val) {
- #if defined(__gfx941__)
- return hip_cas_expander<__ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_SYSTEM>(
- address, val, [](int& x, int y) { x &= y; }, [=]() {
- return __hip_atomic_fetch_and(address, val, __ATOMIC_RELAXED,
- __HIP_MEMORY_SCOPE_SYSTEM);
- });
- #else
- return __hip_atomic_fetch_and(address, val, __ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_SYSTEM);
- #endif // __gfx941__
- }
- __device__
- inline
- unsigned int atomicAnd(unsigned int* address, unsigned int val) {
- #if defined(__gfx941__)
- return hip_cas_expander<__ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_AGENT>(
- address, val, [](unsigned int& x, unsigned int y) { x &= y; }, [=]() {
- return __hip_atomic_fetch_and(address, val, __ATOMIC_RELAXED,
- __HIP_MEMORY_SCOPE_AGENT);
- });
- #else
- return __hip_atomic_fetch_and(address, val, __ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_AGENT);
- #endif // __gfx941__
- }
- __device__
- inline
- unsigned int atomicAnd_system(unsigned int* address, unsigned int val) {
- #if defined(__gfx941__)
- return hip_cas_expander<__ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_SYSTEM>(
- address, val, [](unsigned int& x, unsigned int y) { x &= y; }, [=]() {
- return __hip_atomic_fetch_and(address, val, __ATOMIC_RELAXED,
- __HIP_MEMORY_SCOPE_SYSTEM);
- });
- #else
- return __hip_atomic_fetch_and(address, val, __ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_SYSTEM);
- #endif // __gfx941__
- }
- __device__
- inline
- unsigned long atomicAnd(unsigned long* address, unsigned long val) {
- #if defined(__gfx941__)
- return hip_cas_expander<__ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_AGENT>(
- address, val, [](unsigned long& x, unsigned long y) { x &= y; }, [=]() {
- return __hip_atomic_fetch_and(address, val, __ATOMIC_RELAXED,
- __HIP_MEMORY_SCOPE_AGENT);
- });
- #else
- return __hip_atomic_fetch_and(address, val, __ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_AGENT);
- #endif // __gfx941__
- }
- __device__
- inline
- unsigned long atomicAnd_system(unsigned long* address, unsigned long val) {
- #if defined(__gfx941__)
- return hip_cas_expander<__ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_SYSTEM>(
- address, val, [](unsigned long& x, unsigned long y) { x &= y; }, [=]() {
- return __hip_atomic_fetch_and(address, val, __ATOMIC_RELAXED,
- __HIP_MEMORY_SCOPE_SYSTEM);
- });
- #else
- return __hip_atomic_fetch_and(address, val, __ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_SYSTEM);
- #endif // __gfx941__
- }
- __device__
- inline
- unsigned long long atomicAnd(unsigned long long* address, unsigned long long val) {
- #if defined(__gfx941__)
- return hip_cas_expander<__ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_AGENT>(
- address,
- val,
- [](unsigned long long& x, unsigned long long y) { x &= y; },
- [=]() {
- return __hip_atomic_fetch_and(address, val, __ATOMIC_RELAXED,
- __HIP_MEMORY_SCOPE_AGENT);
- });
- #else
- return __hip_atomic_fetch_and(address, val, __ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_AGENT);
- #endif // __gfx941__
- }
- __device__
- inline
- unsigned long long atomicAnd_system(unsigned long long* address, unsigned long long val) {
- #if defined(__gfx941__)
- return hip_cas_expander<__ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_SYSTEM>(
- address,
- val,
- [](unsigned long long& x, unsigned long long y) { x &= y; },
- [=]() {
- return __hip_atomic_fetch_and(address, val, __ATOMIC_RELAXED,
- __HIP_MEMORY_SCOPE_SYSTEM);
- });
- #else
- return __hip_atomic_fetch_and(address, val, __ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_SYSTEM);
- #endif // __gfx941__
- }
- __device__
- inline
- int atomicOr(int* address, int val) {
- #if defined(__gfx941__)
- return hip_cas_expander<__ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_AGENT>(
- address, val, [](int& x, int y) { x |= y; }, [=]() {
- return __hip_atomic_fetch_or(address, val, __ATOMIC_RELAXED,
- __HIP_MEMORY_SCOPE_AGENT);
- });
- #else
- return __hip_atomic_fetch_or(address, val, __ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_AGENT);
- #endif // __gfx941__
- }
- __device__
- inline
- int atomicOr_system(int* address, int val) {
- #if defined(__gfx941__)
- return hip_cas_expander<__ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_SYSTEM>(
- address, val, [](int& x, int y) { x |= y; }, [=]() {
- return __hip_atomic_fetch_or(address, val, __ATOMIC_RELAXED,
- __HIP_MEMORY_SCOPE_SYSTEM);
- });
- #else
- return __hip_atomic_fetch_or(address, val, __ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_SYSTEM);
- #endif // __gfx941__
- }
- __device__
- inline
- unsigned int atomicOr(unsigned int* address, unsigned int val) {
- #if defined(__gfx941__)
- return hip_cas_expander<__ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_AGENT>(
- address, val, [](unsigned int& x, unsigned int y) { x |= y; }, [=]() {
- return __hip_atomic_fetch_or(address, val, __ATOMIC_RELAXED,
- __HIP_MEMORY_SCOPE_AGENT);
- });
- #else
- return __hip_atomic_fetch_or(address, val, __ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_AGENT);
- #endif // __gfx941__
- }
- __device__
- inline
- unsigned int atomicOr_system(unsigned int* address, unsigned int val) {
- #if defined(__gfx941__)
- return hip_cas_expander<__ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_SYSTEM>(
- address, val, [](unsigned int& x, unsigned int y) { x |= y; }, [=]() {
- return __hip_atomic_fetch_or(address, val, __ATOMIC_RELAXED,
- __HIP_MEMORY_SCOPE_SYSTEM);
- });
- #else
- return __hip_atomic_fetch_or(address, val, __ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_SYSTEM);
- #endif // __gfx941__
- }
- __device__
- inline
- unsigned long atomicOr(unsigned long* address, unsigned long val) {
- #if defined(__gfx941__)
- return hip_cas_expander<__ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_AGENT>(
- address, val, [](unsigned long& x, unsigned long y) { x |= y; }, [=]() {
- return __hip_atomic_fetch_or(address, val, __ATOMIC_RELAXED,
- __HIP_MEMORY_SCOPE_AGENT);
- });
- #else
- return __hip_atomic_fetch_or(address, val, __ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_AGENT);
- #endif // __gfx941__
- }
- __device__
- inline
- unsigned long atomicOr_system(unsigned long* address, unsigned long val) {
- #if defined(__gfx941__)
- return hip_cas_expander<__ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_SYSTEM>(
- address, val, [](unsigned long& x, unsigned long y) { x |= y; }, [=]() {
- return __hip_atomic_fetch_or(address, val, __ATOMIC_RELAXED,
- __HIP_MEMORY_SCOPE_SYSTEM);
- });
- #else
- return __hip_atomic_fetch_or(address, val, __ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_SYSTEM);
- #endif // __gfx941__
- }
- __device__
- inline
- unsigned long long atomicOr(unsigned long long* address, unsigned long long val) {
- #if defined(__gfx941__)
- return hip_cas_expander<__ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_AGENT>(
- address,
- val,
- [](unsigned long long& x, unsigned long long y) { x |= y; },
- [=]() {
- return __hip_atomic_fetch_or(address, val, __ATOMIC_RELAXED,
- __HIP_MEMORY_SCOPE_AGENT);
- });
- #else
- return __hip_atomic_fetch_or(address, val, __ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_AGENT);
- #endif // __gfx941__
- }
- __device__
- inline
- unsigned long long atomicOr_system(unsigned long long* address, unsigned long long val) {
- #if defined(__gfx941__)
- return hip_cas_expander<__ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_SYSTEM>(
- address,
- val,
- [](unsigned long long& x, unsigned long long y) { x |= y; },
- [=]() {
- return __hip_atomic_fetch_or(address, val, __ATOMIC_RELAXED,
- __HIP_MEMORY_SCOPE_SYSTEM);
- });
- #else
- return __hip_atomic_fetch_or(address, val, __ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_SYSTEM);
- #endif // __gfx941__
- }
- __device__
- inline
- int atomicXor(int* address, int val) {
- #if defined(__gfx941__)
- return hip_cas_expander<__ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_AGENT>(
- address, val, [](int& x, int y) { x ^= y; }, [=]() {
- return __hip_atomic_fetch_xor(address, val, __ATOMIC_RELAXED,
- __HIP_MEMORY_SCOPE_AGENT);
- });
- #else
- return __hip_atomic_fetch_xor(address, val, __ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_AGENT);
- #endif // __gfx941__
- }
- __device__
- inline
- int atomicXor_system(int* address, int val) {
- #if defined(__gfx941__)
- return hip_cas_expander<__ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_SYSTEM>(
- address, val, [](int& x, int y) { x ^= y; }, [=]() {
- return __hip_atomic_fetch_xor(address, val, __ATOMIC_RELAXED,
- __HIP_MEMORY_SCOPE_SYSTEM);
- });
- #else
- return __hip_atomic_fetch_xor(address, val, __ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_SYSTEM);
- #endif // __gfx941__
- }
- __device__
- inline
- unsigned int atomicXor(unsigned int* address, unsigned int val) {
- #if defined(__gfx941__)
- return hip_cas_expander<__ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_AGENT>(
- address, val, [](unsigned int& x, unsigned int y) { x ^= y; }, [=]() {
- return __hip_atomic_fetch_xor(address, val, __ATOMIC_RELAXED,
- __HIP_MEMORY_SCOPE_AGENT);
- });
- #else
- return __hip_atomic_fetch_xor(address, val, __ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_AGENT);
- #endif // __gfx941__
- }
- __device__
- inline
- unsigned int atomicXor_system(unsigned int* address, unsigned int val) {
- #if defined(__gfx941__)
- return hip_cas_expander<__ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_SYSTEM>(
- address, val, [](unsigned int& x, unsigned int y) { x ^= y; }, [=]() {
- return __hip_atomic_fetch_xor(address, val, __ATOMIC_RELAXED,
- __HIP_MEMORY_SCOPE_SYSTEM);
- });
- #else
- return __hip_atomic_fetch_xor(address, val, __ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_SYSTEM);
- #endif // __gfx941__
- }
- __device__
- inline
- unsigned long atomicXor(unsigned long* address, unsigned long val) {
- #if defined(__gfx941__)
- return hip_cas_expander<__ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_AGENT>(
- address, val, [](unsigned long& x, unsigned long y) { x ^= y; }, [=]() {
- return __hip_atomic_fetch_xor(address, val, __ATOMIC_RELAXED,
- __HIP_MEMORY_SCOPE_AGENT);
- });
- #else
- return __hip_atomic_fetch_xor(address, val, __ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_AGENT);
- #endif // __gfx941__
- }
- __device__
- inline
- unsigned long atomicXor_system(unsigned long* address, unsigned long val) {
- #if defined(__gfx941__)
- return hip_cas_expander<__ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_SYSTEM>(
- address, val, [](unsigned long& x, unsigned long y) { x ^= y; }, [=]() {
- return __hip_atomic_fetch_xor(address, val, __ATOMIC_RELAXED,
- __HIP_MEMORY_SCOPE_SYSTEM);
- });
- #else
- return __hip_atomic_fetch_xor(address, val, __ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_SYSTEM);
- #endif // __gfx941__
- }
- __device__
- inline
- unsigned long long atomicXor(unsigned long long* address, unsigned long long val) {
- #if defined(__gfx941__)
- return hip_cas_expander<__ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_AGENT>(
- address,
- val,
- [](unsigned long long& x, unsigned long long y) { x ^= y; },
- [=]() {
- return __hip_atomic_fetch_xor(address, val, __ATOMIC_RELAXED,
- __HIP_MEMORY_SCOPE_AGENT);
- });
- #else
- return __hip_atomic_fetch_xor(address, val, __ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_AGENT);
- #endif // __gfx941__
- }
- __device__
- inline
- unsigned long long atomicXor_system(unsigned long long* address, unsigned long long val) {
- return __hip_atomic_fetch_xor(address, val, __ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_SYSTEM);
- }
- #else // __hip_atomic_compare_exchange_strong
- __device__
- inline
- int atomicCAS(int* address, int compare, int val)
- {
- __atomic_compare_exchange_n(
- address, &compare, val, false, __ATOMIC_RELAXED, __ATOMIC_RELAXED);
- return compare;
- }
- __device__
- inline
- unsigned int atomicCAS(
- unsigned int* address, unsigned int compare, unsigned int val)
- {
- __atomic_compare_exchange_n(
- address, &compare, val, false, __ATOMIC_RELAXED, __ATOMIC_RELAXED);
- return compare;
- }
- __device__
- inline
- unsigned long long atomicCAS(
- unsigned long long* address,
- unsigned long long compare,
- unsigned long long val)
- {
- __atomic_compare_exchange_n(
- address, &compare, val, false, __ATOMIC_RELAXED, __ATOMIC_RELAXED);
- return compare;
- }
- __device__
- inline
- int atomicAdd(int* address, int val)
- {
- return __atomic_fetch_add(address, val, __ATOMIC_RELAXED);
- }
- __device__
- inline
- unsigned int atomicAdd(unsigned int* address, unsigned int val)
- {
- return __atomic_fetch_add(address, val, __ATOMIC_RELAXED);
- }
- __device__
- inline
- unsigned long long atomicAdd(
- unsigned long long* address, unsigned long long val)
- {
- return __atomic_fetch_add(address, val, __ATOMIC_RELAXED);
- }
- __device__
- inline
- float atomicAdd(float* address, float val)
- {
- #if defined(__AMDGCN_UNSAFE_FP_ATOMICS__)
- return unsafeAtomicAdd(address, val);
- #else
- return __atomic_fetch_add(address, val, __ATOMIC_RELAXED);
- #endif
- }
- #if !defined(__HIPCC_RTC__)
- DEPRECATED("use atomicAdd instead")
- #endif // !defined(__HIPCC_RTC__)
- __device__
- inline
- void atomicAddNoRet(float* address, float val)
- {
- __ockl_atomic_add_noret_f32(address, val);
- }
- __device__
- inline
- double atomicAdd(double* address, double val)
- {
- #if defined(__AMDGCN_UNSAFE_FP_ATOMICS__)
- return unsafeAtomicAdd(address, val);
- #else
- return __atomic_fetch_add(address, val, __ATOMIC_RELAXED);
- #endif
- }
- __device__
- inline
- int atomicSub(int* address, int val)
- {
- return __atomic_fetch_sub(address, val, __ATOMIC_RELAXED);
- }
- __device__
- inline
- unsigned int atomicSub(unsigned int* address, unsigned int val)
- {
- return __atomic_fetch_sub(address, val, __ATOMIC_RELAXED);
- }
- __device__
- inline
- int atomicExch(int* address, int val)
- {
- return __atomic_exchange_n(address, val, __ATOMIC_RELAXED);
- }
- __device__
- inline
- unsigned int atomicExch(unsigned int* address, unsigned int val)
- {
- return __atomic_exchange_n(address, val, __ATOMIC_RELAXED);
- }
- __device__
- inline
- unsigned long long atomicExch(unsigned long long* address, unsigned long long val)
- {
- return __atomic_exchange_n(address, val, __ATOMIC_RELAXED);
- }
- __device__
- inline
- float atomicExch(float* address, float val)
- {
- return __uint_as_float(__atomic_exchange_n(
- reinterpret_cast<unsigned int*>(address),
- __float_as_uint(val),
- __ATOMIC_RELAXED));
- }
- __device__
- inline
- int atomicMin(int* address, int val)
- {
- return __atomic_fetch_min(address, val, __ATOMIC_RELAXED);
- }
- __device__
- inline
- unsigned int atomicMin(unsigned int* address, unsigned int val)
- {
- return __atomic_fetch_min(address, val, __ATOMIC_RELAXED);
- }
- __device__
- inline
- unsigned long long atomicMin(
- unsigned long long* address, unsigned long long val)
- {
- unsigned long long tmp{__atomic_load_n(address, __ATOMIC_RELAXED)};
- while (val < tmp) {
- const auto tmp1 = __atomic_load_n(address, __ATOMIC_RELAXED);
- if (tmp1 != tmp) { tmp = tmp1; continue; }
- tmp = atomicCAS(address, tmp, val);
- }
- return tmp;
- }
- __device__ inline long long atomicMin(long long* address, long long val) {
- long long tmp{__atomic_load_n(address, __ATOMIC_RELAXED)};
- while (val < tmp) {
- const auto tmp1 = __atomic_load_n(address, __ATOMIC_RELAXED);
- if (tmp1 != tmp) {
- tmp = tmp1;
- continue;
- }
- tmp = atomicCAS(address, tmp, val);
- }
- return tmp;
- }
- __device__
- inline
- int atomicMax(int* address, int val)
- {
- return __atomic_fetch_max(address, val, __ATOMIC_RELAXED);
- }
- __device__
- inline
- unsigned int atomicMax(unsigned int* address, unsigned int val)
- {
- return __atomic_fetch_max(address, val, __ATOMIC_RELAXED);
- }
- __device__
- inline
- unsigned long long atomicMax(
- unsigned long long* address, unsigned long long val)
- {
- unsigned long long tmp{__atomic_load_n(address, __ATOMIC_RELAXED)};
- while (tmp < val) {
- const auto tmp1 = __atomic_load_n(address, __ATOMIC_RELAXED);
- if (tmp1 != tmp) { tmp = tmp1; continue; }
- tmp = atomicCAS(address, tmp, val);
- }
- return tmp;
- }
- __device__ inline long long atomicMax(long long* address, long long val) {
- long long tmp{__atomic_load_n(address, __ATOMIC_RELAXED)};
- while (tmp < val) {
- const auto tmp1 = __atomic_load_n(address, __ATOMIC_RELAXED);
- if (tmp1 != tmp) {
- tmp = tmp1;
- continue;
- }
- tmp = atomicCAS(address, tmp, val);
- }
- return tmp;
- }
- __device__
- inline
- unsigned int atomicInc(unsigned int* address, unsigned int val)
- {
- return __builtin_amdgcn_atomic_inc32(address, val, __ATOMIC_RELAXED, "agent");
- }
- __device__
- inline
- unsigned int atomicDec(unsigned int* address, unsigned int val)
- {
- return __builtin_amdgcn_atomic_dec32(address, val, __ATOMIC_RELAXED, "agent");
- }
- __device__
- inline
- int atomicAnd(int* address, int val)
- {
- return __atomic_fetch_and(address, val, __ATOMIC_RELAXED);
- }
- __device__
- inline
- unsigned int atomicAnd(unsigned int* address, unsigned int val)
- {
- return __atomic_fetch_and(address, val, __ATOMIC_RELAXED);
- }
- __device__
- inline
- unsigned long long atomicAnd(
- unsigned long long* address, unsigned long long val)
- {
- return __atomic_fetch_and(address, val, __ATOMIC_RELAXED);
- }
- __device__
- inline
- int atomicOr(int* address, int val)
- {
- return __atomic_fetch_or(address, val, __ATOMIC_RELAXED);
- }
- __device__
- inline
- unsigned int atomicOr(unsigned int* address, unsigned int val)
- {
- return __atomic_fetch_or(address, val, __ATOMIC_RELAXED);
- }
- __device__
- inline
- unsigned long long atomicOr(
- unsigned long long* address, unsigned long long val)
- {
- return __atomic_fetch_or(address, val, __ATOMIC_RELAXED);
- }
- __device__
- inline
- int atomicXor(int* address, int val)
- {
- return __atomic_fetch_xor(address, val, __ATOMIC_RELAXED);
- }
- __device__
- inline
- unsigned int atomicXor(unsigned int* address, unsigned int val)
- {
- return __atomic_fetch_xor(address, val, __ATOMIC_RELAXED);
- }
- __device__
- inline
- unsigned long long atomicXor(
- unsigned long long* address, unsigned long long val)
- {
- return __atomic_fetch_xor(address, val, __ATOMIC_RELAXED);
- }
- #endif // __hip_atomic_compare_exchange_strong
- /*
- Copyright (c) 2015 - 2023 Advanced Micro Devices, Inc. All rights reserved.
- Permission is hereby granted, free of charge, to any person obtaining a copy
- of this software and associated documentation files (the "Software"), to deal
- in the Software without restriction, including without limitation the rights
- to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
- copies of the Software, and to permit persons to whom the Software is
- furnished to do so, subject to the following conditions:
- The above copyright notice and this permission notice shall be included in
- all copies or substantial portions of the Software.
- THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
- AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
- OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
- THE SOFTWARE.
- */
- #pragma once
- #if !defined(__HIPCC_RTC__)
- #include "host_defines.h"
- #include "amd_hip_vector_types.h" // For Native_vec_
- #endif
- #if defined(__cplusplus)
- extern "C" {
- #endif
- // DOT FUNCTIONS
- #if defined(__clang__) && defined(__HIP__)
- __device__
- __attribute__((const))
- int __ockl_sdot2(
- HIP_vector_base<short, 2>::Native_vec_,
- HIP_vector_base<short, 2>::Native_vec_,
- int, bool);
- __device__
- __attribute__((const))
- unsigned int __ockl_udot2(
- HIP_vector_base<unsigned short, 2>::Native_vec_,
- HIP_vector_base<unsigned short, 2>::Native_vec_,
- unsigned int, bool);
- __device__
- __attribute__((const))
- int __ockl_sdot4(
- HIP_vector_base<char, 4>::Native_vec_,
- HIP_vector_base<char, 4>::Native_vec_,
- int, bool);
- __device__
- __attribute__((const))
- unsigned int __ockl_udot4(
- HIP_vector_base<unsigned char, 4>::Native_vec_,
- HIP_vector_base<unsigned char, 4>::Native_vec_,
- unsigned int, bool);
- __device__
- __attribute__((const))
- int __ockl_sdot8(int, int, int, bool);
- __device__
- __attribute__((const))
- unsigned int __ockl_udot8(unsigned int, unsigned int, unsigned int, bool);
- #endif
- #if !__CLANG_HIP_RUNTIME_WRAPPER_INCLUDED__
- // BEGIN FLOAT
- __device__
- __attribute__((const))
- float __ocml_acos_f32(float);
- __device__
- __attribute__((pure))
- float __ocml_acosh_f32(float);
- __device__
- __attribute__((const))
- float __ocml_asin_f32(float);
- __device__
- __attribute__((pure))
- float __ocml_asinh_f32(float);
- __device__
- __attribute__((const))
- float __ocml_atan2_f32(float, float);
- __device__
- __attribute__((const))
- float __ocml_atan_f32(float);
- __device__
- __attribute__((pure))
- float __ocml_atanh_f32(float);
- __device__
- __attribute__((pure))
- float __ocml_cbrt_f32(float);
- __device__
- __attribute__((const))
- float __ocml_ceil_f32(float);
- __device__
- __attribute__((const))
- __device__
- float __ocml_copysign_f32(float, float);
- __device__
- float __ocml_cos_f32(float);
- __device__
- float __ocml_native_cos_f32(float);
- __device__
- __attribute__((pure))
- __device__
- float __ocml_cosh_f32(float);
- __device__
- float __ocml_cospi_f32(float);
- __device__
- float __ocml_i0_f32(float);
- __device__
- float __ocml_i1_f32(float);
- __device__
- __attribute__((pure))
- float __ocml_erfc_f32(float);
- __device__
- __attribute__((pure))
- float __ocml_erfcinv_f32(float);
- __device__
- __attribute__((pure))
- float __ocml_erfcx_f32(float);
- __device__
- __attribute__((pure))
- float __ocml_erf_f32(float);
- __device__
- __attribute__((pure))
- float __ocml_erfinv_f32(float);
- __device__
- __attribute__((pure))
- float __ocml_exp10_f32(float);
- __device__
- __attribute__((pure))
- float __ocml_native_exp10_f32(float);
- __device__
- __attribute__((pure))
- float __ocml_exp2_f32(float);
- __device__
- __attribute__((pure))
- float __ocml_exp_f32(float);
- __device__
- __attribute__((pure))
- float __ocml_native_exp_f32(float);
- __device__
- __attribute__((pure))
- float __ocml_expm1_f32(float);
- __device__
- __attribute__((const))
- float __ocml_fabs_f32(float);
- __device__
- __attribute__((const))
- float __ocml_fdim_f32(float, float);
- __device__
- __attribute__((const))
- float __ocml_floor_f32(float);
- __device__
- __attribute__((const))
- float __ocml_fma_f32(float, float, float);
- __device__
- __attribute__((const))
- float __ocml_fmax_f32(float, float);
- __device__
- __attribute__((const))
- float __ocml_fmin_f32(float, float);
- __device__
- __attribute__((const))
- __device__
- float __ocml_fmod_f32(float, float);
- __device__
- float __ocml_frexp_f32(float, __attribute__((address_space(5))) int*);
- __device__
- __attribute__((const))
- float __ocml_hypot_f32(float, float);
- __device__
- __attribute__((const))
- int __ocml_ilogb_f32(float);
- __device__
- __attribute__((const))
- int __ocml_isfinite_f32(float);
- __device__
- __attribute__((const))
- int __ocml_isinf_f32(float);
- __device__
- __attribute__((const))
- int __ocml_isnan_f32(float);
- __device__
- float __ocml_j0_f32(float);
- __device__
- float __ocml_j1_f32(float);
- __device__
- __attribute__((const))
- float __ocml_ldexp_f32(float, int);
- __device__
- float __ocml_lgamma_f32(float);
- __device__
- __attribute__((pure))
- float __ocml_log10_f32(float);
- __device__
- __attribute__((pure))
- float __ocml_native_log10_f32(float);
- __device__
- __attribute__((pure))
- float __ocml_log1p_f32(float);
- __device__
- __attribute__((pure))
- float __ocml_log2_f32(float);
- __device__
- __attribute__((pure))
- float __ocml_native_log2_f32(float);
- __device__
- __attribute__((const))
- float __ocml_logb_f32(float);
- __device__
- __attribute__((pure))
- float __ocml_log_f32(float);
- __device__
- __attribute__((pure))
- float __ocml_native_log_f32(float);
- __device__
- float __ocml_modf_f32(float, __attribute__((address_space(5))) float*);
- __device__
- __attribute__((const))
- float __ocml_nearbyint_f32(float);
- __device__
- __attribute__((const))
- float __ocml_nextafter_f32(float, float);
- __device__
- __attribute__((const))
- float __ocml_len3_f32(float, float, float);
- __device__
- __attribute__((const))
- float __ocml_len4_f32(float, float, float, float);
- __device__
- __attribute__((pure))
- float __ocml_ncdf_f32(float);
- __device__
- __attribute__((pure))
- float __ocml_ncdfinv_f32(float);
- __device__
- __attribute__((pure))
- float __ocml_pow_f32(float, float);
- __device__
- __attribute__((pure))
- float __ocml_pown_f32(float, int);
- __device__
- __attribute__((pure))
- float __ocml_rcbrt_f32(float);
- __device__
- __attribute__((const))
- float __ocml_remainder_f32(float, float);
- __device__
- float __ocml_remquo_f32(float, float, __attribute__((address_space(5))) int*);
- __device__
- __attribute__((const))
- float __ocml_rhypot_f32(float, float);
- __device__
- __attribute__((const))
- float __ocml_rint_f32(float);
- __device__
- __attribute__((const))
- float __ocml_rlen3_f32(float, float, float);
- __device__
- __attribute__((const))
- float __ocml_rlen4_f32(float, float, float, float);
- __device__
- __attribute__((const))
- float __ocml_round_f32(float);
- __device__
- __attribute__((pure))
- float __ocml_rsqrt_f32(float);
- __device__
- __attribute__((const))
- float __ocml_scalb_f32(float, float);
- __device__
- __attribute__((const))
- float __ocml_scalbn_f32(float, int);
- __device__
- __attribute__((const))
- int __ocml_signbit_f32(float);
- __device__
- float __ocml_sincos_f32(float, __attribute__((address_space(5))) float*);
- __device__
- float __ocml_sincospi_f32(float, __attribute__((address_space(5))) float*);
- __device__
- float __ocml_sin_f32(float);
- __device__
- float __ocml_native_sin_f32(float);
- __device__
- __attribute__((pure))
- float __ocml_sinh_f32(float);
- __device__
- float __ocml_sinpi_f32(float);
- __device__
- __attribute__((const))
- float __ocml_sqrt_f32(float);
- __device__
- __attribute__((const))
- float __ocml_native_sqrt_f32(float);
- __device__
- float __ocml_tan_f32(float);
- __device__
- __attribute__((pure))
- float __ocml_tanh_f32(float);
- __device__
- float __ocml_tgamma_f32(float);
- __device__
- __attribute__((const))
- float __ocml_trunc_f32(float);
- __device__
- float __ocml_y0_f32(float);
- __device__
- float __ocml_y1_f32(float);
- // BEGIN INTRINSICS
- __device__
- __attribute__((const))
- float __ocml_add_rte_f32(float, float);
- __device__
- __attribute__((const))
- float __ocml_add_rtn_f32(float, float);
- __device__
- __attribute__((const))
- float __ocml_add_rtp_f32(float, float);
- __device__
- __attribute__((const))
- float __ocml_add_rtz_f32(float, float);
- __device__
- __attribute__((const))
- float __ocml_sub_rte_f32(float, float);
- __device__
- __attribute__((const))
- float __ocml_sub_rtn_f32(float, float);
- __device__
- __attribute__((const))
- float __ocml_sub_rtp_f32(float, float);
- __device__
- __attribute__((const))
- float __ocml_sub_rtz_f32(float, float);
- __device__
- __attribute__((const))
- float __ocml_mul_rte_f32(float, float);
- __device__
- __attribute__((const))
- float __ocml_mul_rtn_f32(float, float);
- __device__
- __attribute__((const))
- float __ocml_mul_rtp_f32(float, float);
- __device__
- __attribute__((const))
- float __ocml_mul_rtz_f32(float, float);
- __device__
- __attribute__((const))
- float __ocml_div_rte_f32(float, float);
- __device__
- __attribute__((const))
- float __ocml_div_rtn_f32(float, float);
- __device__
- __attribute__((const))
- float __ocml_div_rtp_f32(float, float);
- __device__
- __attribute__((const))
- float __ocml_div_rtz_f32(float, float);
- __device__
- __attribute__((const))
- float __ocml_sqrt_rte_f32(float);
- __device__
- __attribute__((const))
- float __ocml_sqrt_rtn_f32(float);
- __device__
- __attribute__((const))
- float __ocml_sqrt_rtp_f32(float);
- __device__
- __attribute__((const))
- float __ocml_sqrt_rtz_f32(float);
- __device__
- __attribute__((const))
- float __ocml_fma_rte_f32(float, float, float);
- __device__
- __attribute__((const))
- float __ocml_fma_rtn_f32(float, float, float);
- __device__
- __attribute__((const))
- float __ocml_fma_rtp_f32(float, float, float);
- __device__
- __attribute__((const))
- float __ocml_fma_rtz_f32(float, float, float);
- // END INTRINSICS
- // END FLOAT
- // BEGIN DOUBLE
- __device__
- __attribute__((const))
- double __ocml_acos_f64(double);
- __device__
- __attribute__((pure))
- double __ocml_acosh_f64(double);
- __device__
- __attribute__((const))
- double __ocml_asin_f64(double);
- __device__
- __attribute__((pure))
- double __ocml_asinh_f64(double);
- __device__
- __attribute__((const))
- double __ocml_atan2_f64(double, double);
- __device__
- __attribute__((const))
- double __ocml_atan_f64(double);
- __device__
- __attribute__((pure))
- double __ocml_atanh_f64(double);
- __device__
- __attribute__((pure))
- double __ocml_cbrt_f64(double);
- __device__
- __attribute__((const))
- double __ocml_ceil_f64(double);
- __device__
- __attribute__((const))
- double __ocml_copysign_f64(double, double);
- __device__
- double __ocml_cos_f64(double);
- __device__
- __attribute__((pure))
- double __ocml_cosh_f64(double);
- __device__
- double __ocml_cospi_f64(double);
- __device__
- double __ocml_i0_f64(double);
- __device__
- double __ocml_i1_f64(double);
- __device__
- __attribute__((pure))
- double __ocml_erfc_f64(double);
- __device__
- __attribute__((pure))
- double __ocml_erfcinv_f64(double);
- __device__
- __attribute__((pure))
- double __ocml_erfcx_f64(double);
- __device__
- __attribute__((pure))
- double __ocml_erf_f64(double);
- __device__
- __attribute__((pure))
- double __ocml_erfinv_f64(double);
- __device__
- __attribute__((pure))
- double __ocml_exp10_f64(double);
- __device__
- __attribute__((pure))
- double __ocml_exp2_f64(double);
- __device__
- __attribute__((pure))
- double __ocml_exp_f64(double);
- __device__
- __attribute__((pure))
- double __ocml_expm1_f64(double);
- __device__
- __attribute__((const))
- double __ocml_fabs_f64(double);
- __device__
- __attribute__((const))
- double __ocml_fdim_f64(double, double);
- __device__
- __attribute__((const))
- double __ocml_floor_f64(double);
- __device__
- __attribute__((const))
- double __ocml_fma_f64(double, double, double);
- __device__
- __attribute__((const))
- double __ocml_fmax_f64(double, double);
- __device__
- __attribute__((const))
- double __ocml_fmin_f64(double, double);
- __device__
- __attribute__((const))
- double __ocml_fmod_f64(double, double);
- __device__
- double __ocml_frexp_f64(double, __attribute__((address_space(5))) int*);
- __device__
- __attribute__((const))
- double __ocml_hypot_f64(double, double);
- __device__
- __attribute__((const))
- int __ocml_ilogb_f64(double);
- __device__
- __attribute__((const))
- int __ocml_isfinite_f64(double);
- __device__
- __attribute__((const))
- int __ocml_isinf_f64(double);
- __device__
- __attribute__((const))
- int __ocml_isnan_f64(double);
- __device__
- double __ocml_j0_f64(double);
- __device__
- double __ocml_j1_f64(double);
- __device__
- __attribute__((const))
- double __ocml_ldexp_f64(double, int);
- __device__
- double __ocml_lgamma_f64(double);
- __device__
- __attribute__((pure))
- double __ocml_log10_f64(double);
- __device__
- __attribute__((pure))
- double __ocml_log1p_f64(double);
- __device__
- __attribute__((pure))
- double __ocml_log2_f64(double);
- __device__
- __attribute__((const))
- double __ocml_logb_f64(double);
- __device__
- __attribute__((pure))
- double __ocml_log_f64(double);
- __device__
- double __ocml_modf_f64(double, __attribute__((address_space(5))) double*);
- __device__
- __attribute__((const))
- double __ocml_nearbyint_f64(double);
- __device__
- __attribute__((const))
- double __ocml_nextafter_f64(double, double);
- __device__
- __attribute__((const))
- double __ocml_len3_f64(double, double, double);
- __device__
- __attribute__((const))
- double __ocml_len4_f64(double, double, double, double);
- __device__
- __attribute__((pure))
- double __ocml_ncdf_f64(double);
- __device__
- __attribute__((pure))
- double __ocml_ncdfinv_f64(double);
- __device__
- __attribute__((pure))
- double __ocml_pow_f64(double, double);
- __device__
- __attribute__((pure))
- double __ocml_pown_f64(double, int);
- __device__
- __attribute__((pure))
- double __ocml_rcbrt_f64(double);
- __device__
- __attribute__((const))
- double __ocml_remainder_f64(double, double);
- __device__
- double __ocml_remquo_f64(
- double, double, __attribute__((address_space(5))) int*);
- __device__
- __attribute__((const))
- double __ocml_rhypot_f64(double, double);
- __device__
- __attribute__((const))
- double __ocml_rint_f64(double);
- __device__
- __attribute__((const))
- double __ocml_rlen3_f64(double, double, double);
- __device__
- __attribute__((const))
- double __ocml_rlen4_f64(double, double, double, double);
- __device__
- __attribute__((const))
- double __ocml_round_f64(double);
- __device__
- __attribute__((pure))
- double __ocml_rsqrt_f64(double);
- __device__
- __attribute__((const))
- double __ocml_scalb_f64(double, double);
- __device__
- __attribute__((const))
- double __ocml_scalbn_f64(double, int);
- __device__
- __attribute__((const))
- int __ocml_signbit_f64(double);
- __device__
- double __ocml_sincos_f64(double, __attribute__((address_space(5))) double*);
- __device__
- double __ocml_sincospi_f64(double, __attribute__((address_space(5))) double*);
- __device__
- double __ocml_sin_f64(double);
- __device__
- __attribute__((pure))
- double __ocml_sinh_f64(double);
- __device__
- double __ocml_sinpi_f64(double);
- __device__
- __attribute__((const))
- double __ocml_sqrt_f64(double);
- __device__
- double __ocml_tan_f64(double);
- __device__
- __attribute__((pure))
- double __ocml_tanh_f64(double);
- __device__
- double __ocml_tgamma_f64(double);
- __device__
- __attribute__((const))
- double __ocml_trunc_f64(double);
- __device__
- double __ocml_y0_f64(double);
- __device__
- double __ocml_y1_f64(double);
- // BEGIN INTRINSICS
- __device__
- __attribute__((const))
- double __ocml_add_rte_f64(double, double);
- __device__
- __attribute__((const))
- double __ocml_add_rtn_f64(double, double);
- __device__
- __attribute__((const))
- double __ocml_add_rtp_f64(double, double);
- __device__
- __attribute__((const))
- double __ocml_add_rtz_f64(double, double);
- __device__
- __attribute__((const))
- double __ocml_sub_rte_f64(double, double);
- __device__
- __attribute__((const))
- double __ocml_sub_rtn_f64(double, double);
- __device__
- __attribute__((const))
- double __ocml_sub_rtp_f64(double, double);
- __device__
- __attribute__((const))
- double __ocml_sub_rtz_f64(double, double);
- __device__
- __attribute__((const))
- double __ocml_mul_rte_f64(double, double);
- __device__
- __attribute__((const))
- double __ocml_mul_rtn_f64(double, double);
- __device__
- __attribute__((const))
- double __ocml_mul_rtp_f64(double, double);
- __device__
- __attribute__((const))
- double __ocml_mul_rtz_f64(double, double);
- __device__
- __attribute__((const))
- double __ocml_div_rte_f64(double, double);
- __device__
- __attribute__((const))
- double __ocml_div_rtn_f64(double, double);
- __device__
- __attribute__((const))
- double __ocml_div_rtp_f64(double, double);
- __device__
- __attribute__((const))
- double __ocml_div_rtz_f64(double, double);
- __device__
- __attribute__((const))
- double __ocml_sqrt_rte_f64(double);
- __device__
- __attribute__((const))
- double __ocml_sqrt_rtn_f64(double);
- __device__
- __attribute__((const))
- double __ocml_sqrt_rtp_f64(double);
- __device__
- __attribute__((const))
- double __ocml_sqrt_rtz_f64(double);
- __device__
- __attribute__((const))
- double __ocml_fma_rte_f64(double, double, double);
- __device__
- __attribute__((const))
- double __ocml_fma_rtn_f64(double, double, double);
- __device__
- __attribute__((const))
- double __ocml_fma_rtp_f64(double, double, double);
- __device__
- __attribute__((const))
- double __ocml_fma_rtz_f64(double, double, double);
- // END INTRINSICS
- // END DOUBLE
- #endif // !__CLANG_HIP_RUNTIME_WRAPPER_INCLUDED__
- #if defined(__cplusplus)
- } // extern "C"
- #endif
- /*
- Copyright (c) 2015 - 2023 Advanced Micro Devices, Inc. All rights reserved.
- Permission is hereby granted, free of charge, to any person obtaining a copy
- of this software and associated documentation files (the "Software"), to deal
- in the Software without restriction, including without limitation the rights
- to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
- copies of the Software, and to permit persons to whom the Software is
- furnished to do so, subject to the following conditions:
- The above copyright notice and this permission notice shall be included in
- all copies or substantial portions of the Software.
- THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
- AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
- OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
- THE SOFTWARE.
- */
- #pragma once
- // /*
- // Half Math Functions
- // */
- #if !defined(__HIPCC_RTC__)
- #include "host_defines.h"
- #endif
- #ifndef __CLANG_HIP_RUNTIME_WRAPPER_INCLUDED__
- extern "C"
- {
- __device__ __attribute__((const)) _Float16 __ocml_ceil_f16(_Float16);
- __device__ _Float16 __ocml_cos_f16(_Float16);
- __device__ __attribute__((pure)) _Float16 __ocml_exp_f16(_Float16);
- __device__ __attribute__((pure)) _Float16 __ocml_exp10_f16(_Float16);
- __device__ __attribute__((pure)) _Float16 __ocml_exp2_f16(_Float16);
- __device__ __attribute__((const)) _Float16 __ocml_floor_f16(_Float16);
- __device__ __attribute__((const))
- _Float16 __ocml_fma_f16(_Float16, _Float16, _Float16);
- __device__ __attribute__((const)) _Float16 __ocml_fabs_f16(_Float16);
- __device__ __attribute__((const)) int __ocml_isinf_f16(_Float16);
- __device__ __attribute__((const)) int __ocml_isnan_f16(_Float16);
- __device__ __attribute__((pure)) _Float16 __ocml_log_f16(_Float16);
- __device__ __attribute__((pure)) _Float16 __ocml_log10_f16(_Float16);
- __device__ __attribute__((pure)) _Float16 __ocml_log2_f16(_Float16);
- __device__ __attribute__((pure)) _Float16 __ocml_pown_f16(_Float16, int);
- __device__ __attribute__((const)) _Float16 __ocml_rint_f16(_Float16);
- __device__ __attribute__((const)) _Float16 __ocml_rsqrt_f16(_Float16);
- __device__ _Float16 __ocml_sin_f16(_Float16);
- __device__ __attribute__((const)) _Float16 __ocml_sqrt_f16(_Float16);
- __device__ __attribute__((const)) _Float16 __ocml_trunc_f16(_Float16);
- __device__ __attribute__((const)) _Float16 __ocml_fmax_f16(_Float16, _Float16);
- __device__ __attribute__((const)) _Float16 __ocml_fmin_f16(_Float16, _Float16);
- typedef _Float16 __2f16 __attribute__((ext_vector_type(2)));
- typedef short __2i16 __attribute__((ext_vector_type(2)));
- #if defined(__clang__) && defined(__HIP__)
- __device__ __attribute__((const)) float __ockl_fdot2(__2f16 a, __2f16 b, float c, bool s);
- #endif
- __device__ __attribute__((const)) __2f16 __ocml_ceil_2f16(__2f16);
- __device__ __attribute__((const)) __2f16 __ocml_fabs_2f16(__2f16);
- __device__ __2f16 __ocml_cos_2f16(__2f16);
- __device__ __attribute__((pure)) __2f16 __ocml_exp_2f16(__2f16);
- __device__ __attribute__((pure)) __2f16 __ocml_exp10_2f16(__2f16);
- __device__ __attribute__((pure)) __2f16 __ocml_exp2_2f16(__2f16);
- __device__ __attribute__((const)) __2f16 __ocml_floor_2f16(__2f16);
- __device__ __attribute__((const)) __2f16 __ocml_fma_2f16(__2f16, __2f16, __2f16);
- __device__ __attribute__((const)) __2i16 __ocml_isinf_2f16(__2f16);
- __device__ __attribute__((const)) __2i16 __ocml_isnan_2f16(__2f16);
- __device__ __attribute__((pure)) __2f16 __ocml_log_2f16(__2f16);
- __device__ __attribute__((pure)) __2f16 __ocml_log10_2f16(__2f16);
- __device__ __attribute__((pure)) __2f16 __ocml_log2_2f16(__2f16);
- __device__ __attribute__((const)) __2f16 __ocml_rint_2f16(__2f16);
- __device__ __attribute__((const)) __2f16 __ocml_rsqrt_2f16(__2f16);
- __device__ __2f16 __ocml_sin_2f16(__2f16);
- __device__ __attribute__((const)) __2f16 __ocml_sqrt_2f16(__2f16);
- __device__ __attribute__((const)) __2f16 __ocml_trunc_2f16(__2f16);
- __device__ __attribute__((const)) _Float16 __ocml_cvtrtn_f16_f32(float);
- __device__ __attribute__((const)) _Float16 __ocml_cvtrtp_f16_f32(float);
- __device__ __attribute__((const)) _Float16 __ocml_cvtrtz_f16_f32(float);
- }
- #endif // !__CLANG_HIP_RUNTIME_WRAPPER_INCLUDED__
- //TODO: remove these after they get into clang header __clang_hip_libdevice_declares.h'
- extern "C" {
- __device__ __attribute__((const)) _Float16 __ocml_fmax_f16(_Float16, _Float16);
- __device__ __attribute__((const)) _Float16 __ocml_fmin_f16(_Float16, _Float16);
- __device__ __attribute__((const)) _Float16 __ocml_cvtrtn_f16_f32(float);
- __device__ __attribute__((const)) _Float16 __ocml_cvtrtp_f16_f32(float);
- __device__ __attribute__((const)) _Float16 __ocml_cvtrtz_f16_f32(float);
- }
- /*
- Copyright (c) 2015 - 2023 Advanced Micro Devices, Inc. All rights reserved.
- Permission is hereby granted, free of charge, to any person obtaining a copy
- of this software and associated documentation files (the "Software"), to deal
- in the Software without restriction, including without limitation the rights
- to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
- copies of the Software, and to permit persons to whom the Software is
- furnished to do so, subject to the following conditions:
- The above copyright notice and this permission notice shall be included in
- all copies or substantial portions of the Software.
- THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
- AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
- OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
- THE SOFTWARE.
- */
- #pragma once
- #ifndef HIP_INCLUDE_HIP_AMD_DETAIL_HIP_FP16_H
- #define HIP_INCLUDE_HIP_AMD_DETAIL_HIP_FP16_H
- #if defined(__HIPCC_RTC__)
- #define __HOST_DEVICE__ __device__
- #else
- #define __HOST_DEVICE__ __host__ __device__
- #include <hip/amd_detail/amd_hip_common.h>
- #include "hip/amd_detail/host_defines.h"
- #include <assert.h>
- #if defined(__cplusplus)
- #include <algorithm>
- #include <type_traits>
- #include <utility>
- #endif
- #endif // !defined(__HIPCC_RTC__)
- #if defined(__clang__) && defined(__HIP__)
- typedef _Float16 _Float16_2 __attribute__((ext_vector_type(2)));
- struct __half_raw {
- union {
- static_assert(sizeof(_Float16) == sizeof(unsigned short), "");
- _Float16 data;
- unsigned short x;
- };
- };
- struct __half2_raw {
- union {
- static_assert(sizeof(_Float16_2) == sizeof(unsigned short[2]), "");
- struct {
- __half_raw x;
- __half_raw y;
- };
- _Float16_2 data;
- };
- };
- #if defined(__cplusplus)
- #if !defined(__HIPCC_RTC__)
- #include "hip_fp16_math_fwd.h"
- #include "amd_hip_vector_types.h"
- #include "host_defines.h"
- #include "amd_device_functions.h"
- #include "amd_warp_functions.h"
- #endif
- namespace std
- {
- template<> struct is_floating_point<_Float16> : std::true_type {};
- }
- template<bool cond, typename T = void>
- using Enable_if_t = typename std::enable_if<cond, T>::type;
- // BEGIN STRUCT __HALF
- struct __half {
- protected:
- union {
- static_assert(sizeof(_Float16) == sizeof(unsigned short), "");
- _Float16 data;
- unsigned short __x;
- };
- public:
- // CREATORS
- __HOST_DEVICE__
- __half() = default;
- __HOST_DEVICE__
- __half(const __half_raw& x) : data{x.data} {}
- #if !defined(__HIP_NO_HALF_CONVERSIONS__)
- __HOST_DEVICE__
- __half(decltype(data) x) : data{x} {}
- template<
- typename T,
- Enable_if_t<std::is_floating_point<T>{}>* = nullptr>
- __HOST_DEVICE__
- __half(T x) : data{static_cast<_Float16>(x)} {}
- #endif
- __HOST_DEVICE__
- __half(const __half&) = default;
- __HOST_DEVICE__
- __half(__half&&) = default;
- __HOST_DEVICE__
- ~__half() = default;
- // CREATORS - DEVICE ONLY
- #if !defined(__HIP_NO_HALF_CONVERSIONS__)
- template<
- typename T, Enable_if_t<std::is_integral<T>{}>* = nullptr>
- __HOST_DEVICE__
- __half(T x) : data{static_cast<_Float16>(x)} {}
- #endif
- // MANIPULATORS
- __HOST_DEVICE__
- __half& operator=(const __half&) = default;
- __HOST_DEVICE__
- __half& operator=(__half&&) = default;
- __HOST_DEVICE__
- __half& operator=(const __half_raw& x)
- {
- data = x.data;
- return *this;
- }
- __HOST_DEVICE__
- volatile __half& operator=(const __half_raw& x) volatile
- {
- data = x.data;
- return *this;
- }
- volatile __half& operator=(const volatile __half_raw& x) volatile
- {
- data = x.data;
- return *this;
- }
- __half& operator=(__half_raw&& x)
- {
- data = x.data;
- return *this;
- }
- volatile __half& operator=(__half_raw&& x) volatile
- {
- data = x.data;
- return *this;
- }
- volatile __half& operator=(volatile __half_raw&& x) volatile
- {
- data = x.data;
- return *this;
- }
- #if !defined(__HIP_NO_HALF_CONVERSIONS__)
- template<
- typename T,
- Enable_if_t<std::is_floating_point<T>{}>* = nullptr>
- __HOST_DEVICE__
- __half& operator=(T x)
- {
- data = static_cast<_Float16>(x);
- return *this;
- }
- #endif
- // MANIPULATORS - DEVICE ONLY
- #if !defined(__HIP_NO_HALF_CONVERSIONS__)
- template<
- typename T, Enable_if_t<std::is_integral<T>{}>* = nullptr>
- __device__
- __half& operator=(T x)
- {
- data = static_cast<_Float16>(x);
- return *this;
- }
- #endif
- #if !defined(__HIP_NO_HALF_OPERATORS__)
- __device__
- __half& operator+=(const __half& x)
- {
- data += x.data;
- return *this;
- }
- __device__
- __half& operator-=(const __half& x)
- {
- data -= x.data;
- return *this;
- }
- __device__
- __half& operator*=(const __half& x)
- {
- data *= x.data;
- return *this;
- }
- __device__
- __half& operator/=(const __half& x)
- {
- data /= x.data;
- return *this;
- }
- __device__
- __half& operator++() { ++data; return *this; }
- __device__
- __half operator++(int)
- {
- __half tmp{*this};
- ++*this;
- return tmp;
- }
- __device__
- __half& operator--() { --data; return *this; }
- __device__
- __half operator--(int)
- {
- __half tmp{*this};
- --*this;
- return tmp;
- }
- #endif
- // ACCESSORS
- #if !defined(__HIP_NO_HALF_CONVERSIONS__)
- template<
- typename T,
- Enable_if_t<std::is_floating_point<T>{}>* = nullptr>
- __HOST_DEVICE__
- operator T() const { return data; }
- #endif
- __HOST_DEVICE__
- operator __half_raw() const { return __half_raw{data}; }
- __HOST_DEVICE__
- operator __half_raw() const volatile
- {
- return __half_raw{data};
- }
- #if !defined(__HIP_NO_HALF_CONVERSIONS__)
- template<
- typename T, Enable_if_t<std::is_integral<T>{}>* = nullptr>
- __HOST_DEVICE__
- operator T() const { return data; }
- #endif
- #if !defined(__HIP_NO_HALF_OPERATORS__)
- __device__
- __half operator+() const { return *this; }
- __device__
- __half operator-() const
- {
- __half tmp{*this};
- tmp.data = -tmp.data;
- return tmp;
- }
- #endif
- // FRIENDS
- #if !defined(__HIP_NO_HALF_OPERATORS__)
- friend
- inline
- __device__
- __half operator+(const __half& x, const __half& y)
- {
- return __half{x} += y;
- }
- friend
- inline
- __device__
- __half operator-(const __half& x, const __half& y)
- {
- return __half{x} -= y;
- }
- friend
- inline
- __device__
- __half operator*(const __half& x, const __half& y)
- {
- return __half{x} *= y;
- }
- friend
- inline
- __device__
- __half operator/(const __half& x, const __half& y)
- {
- return __half{x} /= y;
- }
- friend
- inline
- __device__
- bool operator==(const __half& x, const __half& y)
- {
- return x.data == y.data;
- }
- friend
- inline
- __device__
- bool operator!=(const __half& x, const __half& y)
- {
- return !(x == y);
- }
- friend
- inline
- __device__
- bool operator<(const __half& x, const __half& y)
- {
- return x.data < y.data;
- }
- friend
- inline
- __device__
- bool operator>(const __half& x, const __half& y)
- {
- return y.data < x.data;
- }
- friend
- inline
- __device__
- bool operator<=(const __half& x, const __half& y)
- {
- return !(y < x);
- }
- friend
- inline
- __device__
- bool operator>=(const __half& x, const __half& y)
- {
- return !(x < y);
- }
- #endif // !defined(__HIP_NO_HALF_OPERATORS__)
- };
- // END STRUCT __HALF
- // BEGIN STRUCT __HALF2
- struct __half2 {
- public:
- union {
- static_assert(
- sizeof(_Float16_2) == sizeof(unsigned short[2]), "");
- struct {
- __half x;
- __half y;
- };
- _Float16_2 data;
- };
- // CREATORS
- __HOST_DEVICE__
- __half2() = default;
- __HOST_DEVICE__
- __half2(const __half2_raw& xx) : data{xx.data} {}
- __HOST_DEVICE__
- __half2(decltype(data) xx) : data{xx} {}
- __HOST_DEVICE__
- __half2(const __half& xx, const __half& yy)
- :
- data{static_cast<__half_raw>(xx).data,
- static_cast<__half_raw>(yy).data}
- {}
- __HOST_DEVICE__
- __half2(const __half2&) = default;
- __HOST_DEVICE__
- __half2(__half2&&) = default;
- __HOST_DEVICE__
- ~__half2() = default;
- // MANIPULATORS
- __HOST_DEVICE__
- __half2& operator=(const __half2&) = default;
- __HOST_DEVICE__
- __half2& operator=(__half2&&) = default;
- __HOST_DEVICE__
- __half2& operator=(const __half2_raw& xx)
- {
- data = xx.data;
- return *this;
- }
- // MANIPULATORS - DEVICE ONLY
- #if !defined(__HIP_NO_HALF_OPERATORS__)
- __device__
- __half2& operator+=(const __half2& xx)
- {
- data += xx.data;
- return *this;
- }
- __device__
- __half2& operator-=(const __half2& xx)
- {
- data -= xx.data;
- return *this;
- }
- __device__
- __half2& operator*=(const __half2& xx)
- {
- data *= xx.data;
- return *this;
- }
- __device__
- __half2& operator/=(const __half2& xx)
- {
- data /= xx.data;
- return *this;
- }
- __device__
- __half2& operator++() { return *this += _Float16_2{1, 1}; }
- __device__
- __half2 operator++(int)
- {
- __half2 tmp{*this};
- ++*this;
- return tmp;
- }
- __device__
- __half2& operator--() { return *this -= _Float16_2{1, 1}; }
- __device__
- __half2 operator--(int)
- {
- __half2 tmp{*this};
- --*this;
- return tmp;
- }
- #endif
- // ACCESSORS
- __HOST_DEVICE__
- operator decltype(data)() const { return data; }
- __HOST_DEVICE__
- operator __half2_raw() const {
- __half2_raw r;
- r.data = data;
- return r;
- }
- // ACCESSORS - DEVICE ONLY
- #if !defined(__HIP_NO_HALF_OPERATORS__)
- __device__
- __half2 operator+() const { return *this; }
- __device__
- __half2 operator-() const
- {
- __half2 tmp{*this};
- tmp.data = -tmp.data;
- return tmp;
- }
- #endif
- // FRIENDS
- #if !defined(__HIP_NO_HALF_OPERATORS__)
- friend
- inline
- __device__
- __half2 operator+(const __half2& xx, const __half2& yy)
- {
- return __half2{xx} += yy;
- }
- friend
- inline
- __device__
- __half2 operator-(const __half2& xx, const __half2& yy)
- {
- return __half2{xx} -= yy;
- }
- friend
- inline
- __device__
- __half2 operator*(const __half2& xx, const __half2& yy)
- {
- return __half2{xx} *= yy;
- }
- friend
- inline
- __device__
- __half2 operator/(const __half2& xx, const __half2& yy)
- {
- return __half2{xx} /= yy;
- }
- friend
- inline
- __device__
- bool operator==(const __half2& xx, const __half2& yy)
- {
- auto r = xx.data == yy.data;
- return r.x != 0 && r.y != 0;
- }
- friend
- inline
- __device__
- bool operator!=(const __half2& xx, const __half2& yy)
- {
- return !(xx == yy);
- }
- friend
- inline
- __device__
- bool operator<(const __half2& xx, const __half2& yy)
- {
- auto r = xx.data < yy.data;
- return r.x != 0 && r.y != 0;
- }
- friend
- inline
- __device__
- bool operator>(const __half2& xx, const __half2& yy)
- {
- return yy < xx;
- }
- friend
- inline
- __device__
- bool operator<=(const __half2& xx, const __half2& yy)
- {
- return !(yy < xx);
- }
- friend
- inline
- __device__
- bool operator>=(const __half2& xx, const __half2& yy)
- {
- return !(xx < yy);
- }
- #endif // !defined(__HIP_NO_HALF_OPERATORS__)
- };
- // END STRUCT __HALF2
- namespace
- {
- inline
- __HOST_DEVICE__
- __half2 make_half2(__half x, __half y)
- {
- return __half2{x, y};
- }
- inline
- __HOST_DEVICE__
- __half __low2half(__half2 x)
- {
- return __half{__half_raw{static_cast<__half2_raw>(x).data.x}};
- }
- inline
- __HOST_DEVICE__
- __half __high2half(__half2 x)
- {
- return __half{__half_raw{static_cast<__half2_raw>(x).data.y}};
- }
- inline
- __HOST_DEVICE__
- __half2 __half2half2(__half x)
- {
- return __half2{x, x};
- }
- inline
- __HOST_DEVICE__
- __half2 __halves2half2(__half x, __half y)
- {
- return __half2{x, y};
- }
- inline
- __HOST_DEVICE__
- __half2 __low2half2(__half2 x)
- {
- return __half2{
- _Float16_2{
- static_cast<__half2_raw>(x).data.x,
- static_cast<__half2_raw>(x).data.x}};
- }
- inline
- __HOST_DEVICE__
- __half2 __high2half2(__half2 x)
- {
- return __half2{
- _Float16_2{
- static_cast<__half2_raw>(x).data.y,
- static_cast<__half2_raw>(x).data.y}};
- }
- inline
- __HOST_DEVICE__
- __half2 __lows2half2(__half2 x, __half2 y)
- {
- return __half2{
- _Float16_2{
- static_cast<__half2_raw>(x).data.x,
- static_cast<__half2_raw>(y).data.x}};
- }
- inline
- __HOST_DEVICE__
- __half2 __highs2half2(__half2 x, __half2 y)
- {
- return __half2{
- _Float16_2{
- static_cast<__half2_raw>(x).data.y,
- static_cast<__half2_raw>(y).data.y}};
- }
- inline
- __HOST_DEVICE__
- __half2 __lowhigh2highlow(__half2 x)
- {
- return __half2{
- _Float16_2{
- static_cast<__half2_raw>(x).data.y,
- static_cast<__half2_raw>(x).data.x}};
- }
- // Bitcasts
- inline
- __device__
- short __half_as_short(__half x)
- {
- return static_cast<__half_raw>(x).x;
- }
- inline
- __device__
- unsigned short __half_as_ushort(__half x)
- {
- return static_cast<__half_raw>(x).x;
- }
- inline
- __device__
- __half __short_as_half(short x)
- {
- __half_raw r; r.x = x;
- return r;
- }
- inline
- __device__
- __half __ushort_as_half(unsigned short x)
- {
- __half_raw r; r.x = x;
- return r;
- }
- // float -> half | half2
- inline
- __HOST_DEVICE__
- __half __float2half(float x)
- {
- return __half_raw{static_cast<_Float16>(x)};
- }
- inline
- __HOST_DEVICE__
- __half __float2half_rn(float x)
- {
- return __half_raw{static_cast<_Float16>(x)};
- }
- #if !defined(__HIPCC_RTC__)
- // TODO: rounding behaviour is not correct for host functions.
- inline
- __host__
- __half __float2half_rz(float x)
- {
- return __half_raw{static_cast<_Float16>(x)};
- }
- inline
- __host__
- __half __float2half_rd(float x)
- {
- return __half_raw{static_cast<_Float16>(x)};
- }
- inline
- __host__
- __half __float2half_ru(float x)
- {
- return __half_raw{static_cast<_Float16>(x)};
- }
- #endif
- inline
- __device__
- __half __float2half_rz(float x)
- {
- return __half_raw{__ocml_cvtrtz_f16_f32(x)};
- }
- inline
- __device__
- __half __float2half_rd(float x)
- {
- return __half_raw{__ocml_cvtrtn_f16_f32(x)};
- }
- inline
- __device__
- __half __float2half_ru(float x)
- {
- return __half_raw{__ocml_cvtrtp_f16_f32(x)};
- }
- inline
- __HOST_DEVICE__
- __half2 __float2half2_rn(float x)
- {
- return __half2{
- _Float16_2{
- static_cast<_Float16>(x), static_cast<_Float16>(x)}};
- }
- inline
- __HOST_DEVICE__
- __half2 __floats2half2_rn(float x, float y)
- {
- return __half2{_Float16_2{
- static_cast<_Float16>(x), static_cast<_Float16>(y)}};
- }
- inline
- __HOST_DEVICE__
- __half2 __float22half2_rn(float2 x)
- {
- return __floats2half2_rn(x.x, x.y);
- }
- // half | half2 -> float
- inline
- __HOST_DEVICE__
- float __half2float(__half x)
- {
- return static_cast<__half_raw>(x).data;
- }
- inline
- __HOST_DEVICE__
- float __low2float(__half2 x)
- {
- return static_cast<__half2_raw>(x).data.x;
- }
- inline
- __HOST_DEVICE__
- float __high2float(__half2 x)
- {
- return static_cast<__half2_raw>(x).data.y;
- }
- inline
- __HOST_DEVICE__
- float2 __half22float2(__half2 x)
- {
- return make_float2(
- static_cast<__half2_raw>(x).data.x,
- static_cast<__half2_raw>(x).data.y);
- }
- // half -> int
- inline
- __device__
- int __half2int_rn(__half x)
- {
- return static_cast<__half_raw>(x).data;
- }
- inline
- __device__
- int __half2int_rz(__half x)
- {
- return static_cast<__half_raw>(x).data;
- }
- inline
- __device__
- int __half2int_rd(__half x)
- {
- return static_cast<__half_raw>(x).data;
- }
- inline
- __device__
- int __half2int_ru(__half x)
- {
- return static_cast<__half_raw>(x).data;
- }
- // int -> half
- inline
- __device__
- __half __int2half_rn(int x)
- {
- return __half_raw{static_cast<_Float16>(x)};
- }
- inline
- __device__
- __half __int2half_rz(int x)
- {
- return __half_raw{static_cast<_Float16>(x)};
- }
- inline
- __device__
- __half __int2half_rd(int x)
- {
- return __half_raw{static_cast<_Float16>(x)};
- }
- inline
- __device__
- __half __int2half_ru(int x)
- {
- return __half_raw{static_cast<_Float16>(x)};
- }
- // half -> short
- inline
- __device__
- short __half2short_rn(__half x)
- {
- return static_cast<__half_raw>(x).data;
- }
- inline
- __device__
- short __half2short_rz(__half x)
- {
- return static_cast<__half_raw>(x).data;
- }
- inline
- __device__
- short __half2short_rd(__half x)
- {
- return static_cast<__half_raw>(x).data;
- }
- inline
- __device__
- short __half2short_ru(__half x)
- {
- return static_cast<__half_raw>(x).data;
- }
- // short -> half
- inline
- __device__
- __half __short2half_rn(short x)
- {
- return __half_raw{static_cast<_Float16>(x)};
- }
- inline
- __device__
- __half __short2half_rz(short x)
- {
- return __half_raw{static_cast<_Float16>(x)};
- }
- inline
- __device__
- __half __short2half_rd(short x)
- {
- return __half_raw{static_cast<_Float16>(x)};
- }
- inline
- __device__
- __half __short2half_ru(short x)
- {
- return __half_raw{static_cast<_Float16>(x)};
- }
- // half -> long long
- inline
- __device__
- long long __half2ll_rn(__half x)
- {
- return static_cast<__half_raw>(x).data;
- }
- inline
- __device__
- long long __half2ll_rz(__half x)
- {
- return static_cast<__half_raw>(x).data;
- }
- inline
- __device__
- long long __half2ll_rd(__half x)
- {
- return static_cast<__half_raw>(x).data;
- }
- inline
- __device__
- long long __half2ll_ru(__half x)
- {
- return static_cast<__half_raw>(x).data;
- }
- // long long -> half
- inline
- __device__
- __half __ll2half_rn(long long x)
- {
- return __half_raw{static_cast<_Float16>(x)};
- }
- inline
- __device__
- __half __ll2half_rz(long long x)
- {
- return __half_raw{static_cast<_Float16>(x)};
- }
- inline
- __device__
- __half __ll2half_rd(long long x)
- {
- return __half_raw{static_cast<_Float16>(x)};
- }
- inline
- __device__
- __half __ll2half_ru(long long x)
- {
- return __half_raw{static_cast<_Float16>(x)};
- }
- // half -> unsigned int
- inline
- __device__
- unsigned int __half2uint_rn(__half x)
- {
- return static_cast<__half_raw>(x).data;
- }
- inline
- __device__
- unsigned int __half2uint_rz(__half x)
- {
- return static_cast<__half_raw>(x).data;
- }
- inline
- __device__
- unsigned int __half2uint_rd(__half x)
- {
- return static_cast<__half_raw>(x).data;
- }
- inline
- __device__
- unsigned int __half2uint_ru(__half x)
- {
- return static_cast<__half_raw>(x).data;
- }
- // unsigned int -> half
- inline
- __device__
- __half __uint2half_rn(unsigned int x)
- {
- return __half_raw{static_cast<_Float16>(x)};
- }
- inline
- __device__
- __half __uint2half_rz(unsigned int x)
- {
- return __half_raw{static_cast<_Float16>(x)};
- }
- inline
- __device__
- __half __uint2half_rd(unsigned int x)
- {
- return __half_raw{static_cast<_Float16>(x)};
- }
- inline
- __device__
- __half __uint2half_ru(unsigned int x)
- {
- return __half_raw{static_cast<_Float16>(x)};
- }
- // half -> unsigned short
- inline
- __device__
- unsigned short __half2ushort_rn(__half x)
- {
- return static_cast<__half_raw>(x).data;
- }
- inline
- __device__
- unsigned short __half2ushort_rz(__half x)
- {
- return static_cast<__half_raw>(x).data;
- }
- inline
- __device__
- unsigned short __half2ushort_rd(__half x)
- {
- return static_cast<__half_raw>(x).data;
- }
- inline
- __device__
- unsigned short __half2ushort_ru(__half x)
- {
- return static_cast<__half_raw>(x).data;
- }
- // unsigned short -> half
- inline
- __device__
- __half __ushort2half_rn(unsigned short x)
- {
- return __half_raw{static_cast<_Float16>(x)};
- }
- inline
- __device__
- __half __ushort2half_rz(unsigned short x)
- {
- return __half_raw{static_cast<_Float16>(x)};
- }
- inline
- __device__
- __half __ushort2half_rd(unsigned short x)
- {
- return __half_raw{static_cast<_Float16>(x)};
- }
- inline
- __device__
- __half __ushort2half_ru(unsigned short x)
- {
- return __half_raw{static_cast<_Float16>(x)};
- }
- // half -> unsigned long long
- inline
- __device__
- unsigned long long __half2ull_rn(__half x)
- {
- return static_cast<__half_raw>(x).data;
- }
- inline
- __device__
- unsigned long long __half2ull_rz(__half x)
- {
- return static_cast<__half_raw>(x).data;
- }
- inline
- __device__
- unsigned long long __half2ull_rd(__half x)
- {
- return static_cast<__half_raw>(x).data;
- }
- inline
- __device__
- unsigned long long __half2ull_ru(__half x)
- {
- return static_cast<__half_raw>(x).data;
- }
- // unsigned long long -> half
- inline
- __device__
- __half __ull2half_rn(unsigned long long x)
- {
- return __half_raw{static_cast<_Float16>(x)};
- }
- inline
- __device__
- __half __ull2half_rz(unsigned long long x)
- {
- return __half_raw{static_cast<_Float16>(x)};
- }
- inline
- __device__
- __half __ull2half_rd(unsigned long long x)
- {
- return __half_raw{static_cast<_Float16>(x)};
- }
- inline
- __device__
- __half __ull2half_ru(unsigned long long x)
- {
- return __half_raw{static_cast<_Float16>(x)};
- }
- // Load primitives
- inline
- __device__
- __half __ldg(const __half* ptr) { return *ptr; }
- inline
- __device__
- __half __ldcg(const __half* ptr) { return *ptr; }
- inline
- __device__
- __half __ldca(const __half* ptr) { return *ptr; }
- inline
- __device__
- __half __ldcs(const __half* ptr) { return *ptr; }
- inline
- __HOST_DEVICE__
- __half2 __ldg(const __half2* ptr) { return *ptr; }
- inline
- __HOST_DEVICE__
- __half2 __ldcg(const __half2* ptr) { return *ptr; }
- inline
- __HOST_DEVICE__
- __half2 __ldca(const __half2* ptr) { return *ptr; }
- inline
- __HOST_DEVICE__
- __half2 __ldcs(const __half2* ptr) { return *ptr; }
- // Relations
- inline
- __device__
- bool __heq(__half x, __half y)
- {
- return static_cast<__half_raw>(x).data ==
- static_cast<__half_raw>(y).data;
- }
- inline
- __device__
- bool __hne(__half x, __half y)
- {
- return static_cast<__half_raw>(x).data !=
- static_cast<__half_raw>(y).data;
- }
- inline
- __device__
- bool __hle(__half x, __half y)
- {
- return static_cast<__half_raw>(x).data <=
- static_cast<__half_raw>(y).data;
- }
- inline
- __device__
- bool __hge(__half x, __half y)
- {
- return static_cast<__half_raw>(x).data >=
- static_cast<__half_raw>(y).data;
- }
- inline
- __device__
- bool __hlt(__half x, __half y)
- {
- return static_cast<__half_raw>(x).data <
- static_cast<__half_raw>(y).data;
- }
- inline
- __device__
- bool __hgt(__half x, __half y)
- {
- return static_cast<__half_raw>(x).data >
- static_cast<__half_raw>(y).data;
- }
- inline __device__
- bool __hequ(__half x, __half y) {
- return !(static_cast<__half_raw>(x).data < static_cast<__half_raw>(y).data) &&
- !(static_cast<__half_raw>(x).data > static_cast<__half_raw>(y).data);
- }
- inline __device__
- bool __hneu(__half x, __half y) {
- return !(static_cast<__half_raw>(x).data == static_cast<__half_raw>(y).data);
- }
- inline __device__
- bool __hleu(__half x, __half y) {
- return !(static_cast<__half_raw>(x).data > static_cast<__half_raw>(y).data);
- }
- inline
- __device__
- bool __hgeu(__half x, __half y) {
- return !(static_cast<__half_raw>(x).data < static_cast<__half_raw>(y).data);
- }
- inline
- __device__
- bool __hltu(__half x, __half y) {
- return !(static_cast<__half_raw>(x).data >= static_cast<__half_raw>(y).data);
- }
- inline
- __device__
- bool __hgtu(__half x, __half y) {
- return !(static_cast<__half_raw>(x).data <= static_cast<__half_raw>(y).data);
- }
- inline
- __HOST_DEVICE__
- __half2 __heq2(__half2 x, __half2 y)
- {
- auto r = static_cast<__half2_raw>(x).data ==
- static_cast<__half2_raw>(y).data;
- return __builtin_convertvector(-r, _Float16_2);
- }
- inline
- __HOST_DEVICE__
- __half2 __hne2(__half2 x, __half2 y)
- {
- auto r = static_cast<__half2_raw>(x).data !=
- static_cast<__half2_raw>(y).data;
- return __builtin_convertvector(-r, _Float16_2);
- }
- inline
- __HOST_DEVICE__
- __half2 __hle2(__half2 x, __half2 y)
- {
- auto r = static_cast<__half2_raw>(x).data <=
- static_cast<__half2_raw>(y).data;
- return __builtin_convertvector(-r, _Float16_2);
- }
- inline
- __HOST_DEVICE__
- __half2 __hge2(__half2 x, __half2 y)
- {
- auto r = static_cast<__half2_raw>(x).data >=
- static_cast<__half2_raw>(y).data;
- return __builtin_convertvector(-r, _Float16_2);
- }
- inline
- __HOST_DEVICE__
- __half2 __hlt2(__half2 x, __half2 y)
- {
- auto r = static_cast<__half2_raw>(x).data <
- static_cast<__half2_raw>(y).data;
- return __builtin_convertvector(-r, _Float16_2);
- }
- inline
- __HOST_DEVICE__
- __half2 __hgt2(__half2 x, __half2 y)
- {
- auto r = static_cast<__half2_raw>(x).data >
- static_cast<__half2_raw>(y).data;
- return __builtin_convertvector(-r, _Float16_2);
- }
- inline __HOST_DEVICE__
- __half2 __hequ2(__half2 x, __half2 y) {
- auto r = !(static_cast<__half2_raw>(x).data < static_cast<__half2_raw>(y).data) &&
- !(static_cast<__half2_raw>(x).data > static_cast<__half2_raw>(y).data);
- return __builtin_convertvector(-r, _Float16_2);
- }
- inline
- __HOST_DEVICE__
- __half2 __hneu2(__half2 x, __half2 y) {
- auto r = !(static_cast<__half2_raw>(x).data == static_cast<__half2_raw>(y).data);
- return __builtin_convertvector(-r, _Float16_2);
- }
- inline
- __HOST_DEVICE__
- __half2 __hleu2(__half2 x, __half2 y) {
- auto r = !(static_cast<__half2_raw>(x).data > static_cast<__half2_raw>(y).data);
- return __builtin_convertvector(-r, _Float16_2);
- }
- inline
- __HOST_DEVICE__
- __half2 __hgeu2(__half2 x, __half2 y) {
- auto r = !(static_cast<__half2_raw>(x).data < static_cast<__half2_raw>(y).data);
- return __builtin_convertvector(-r, _Float16_2);
- }
- inline
- __HOST_DEVICE__
- __half2 __hltu2(__half2 x, __half2 y) {
- auto r = !(static_cast<__half2_raw>(x).data >= static_cast<__half2_raw>(y).data);
- return __builtin_convertvector(-r, _Float16_2);
- }
- inline
- __HOST_DEVICE__
- __half2 __hgtu2(__half2 x, __half2 y) {
- auto r = !(static_cast<__half2_raw>(x).data <= static_cast<__half2_raw>(y).data);
- return __builtin_convertvector(-r, _Float16_2);
- }
- inline
- __HOST_DEVICE__
- bool __hbeq2(__half2 x, __half2 y)
- {
- auto r = static_cast<__half2_raw>(__heq2(x, y));
- return r.data.x != 0 && r.data.y != 0;
- }
- inline
- __HOST_DEVICE__
- bool __hbne2(__half2 x, __half2 y)
- {
- auto r = static_cast<__half2_raw>(__hne2(x, y));
- return r.data.x != 0 && r.data.y != 0;
- }
- inline
- __HOST_DEVICE__
- bool __hble2(__half2 x, __half2 y)
- {
- auto r = static_cast<__half2_raw>(__hle2(x, y));
- return r.data.x != 0 && r.data.y != 0;
- }
- inline
- __HOST_DEVICE__
- bool __hbge2(__half2 x, __half2 y)
- {
- auto r = static_cast<__half2_raw>(__hge2(x, y));
- return r.data.x != 0 && r.data.y != 0;
- }
- inline
- __HOST_DEVICE__
- bool __hblt2(__half2 x, __half2 y)
- {
- auto r = static_cast<__half2_raw>(__hlt2(x, y));
- return r.data.x != 0 && r.data.y != 0;
- }
- inline
- __HOST_DEVICE__
- bool __hbgt2(__half2 x, __half2 y)
- {
- auto r = static_cast<__half2_raw>(__hgt2(x, y));
- return r.data.x != 0 && r.data.y != 0;
- }
- inline
- __HOST_DEVICE__
- bool __hbequ2(__half2 x, __half2 y) { return __hbeq2(x, y); }
- inline
- __HOST_DEVICE__
- bool __hbneu2(__half2 x, __half2 y) { return __hbne2(x, y); }
- inline
- __HOST_DEVICE__
- bool __hbleu2(__half2 x, __half2 y) { return __hble2(x, y); }
- inline
- __HOST_DEVICE__
- bool __hbgeu2(__half2 x, __half2 y) { return __hbge2(x, y); }
- inline
- __HOST_DEVICE__
- bool __hbltu2(__half2 x, __half2 y) { return __hblt2(x, y); }
- inline
- __HOST_DEVICE__
- bool __hbgtu2(__half2 x, __half2 y) { return __hbgt2(x, y); }
- inline
- __device__
- __half __hmax(const __half x, const __half y) {
- return __half_raw{__ocml_fmax_f16(static_cast<__half_raw>(x).data,
- static_cast<__half_raw>(y).data)};
- }
- inline
- __device__
- __half __hmax_nan(const __half x, const __half y) {
- if(__ocml_isnan_f16(static_cast<__half_raw>(x).data)) {
- return x;
- } else if (__ocml_isnan_f16(static_cast<__half_raw>(y).data)) {
- return y;
- }
- return __hmax(x, y);
- }
- inline
- __device__
- __half __hmin(const __half x, const __half y) {
- return __half_raw{__ocml_fmin_f16(static_cast<__half_raw>(x).data,
- static_cast<__half_raw>(y).data)};
- }
- inline
- __device__
- __half __hmin_nan(const __half x, const __half y) {
- if(__ocml_isnan_f16(static_cast<__half_raw>(x).data)) {
- return x;
- } else if (__ocml_isnan_f16(static_cast<__half_raw>(y).data)) {
- return y;
- }
- return __hmin(x, y);
- }
- // Arithmetic
- inline
- __device__
- __half __clamp_01(__half x)
- {
- auto r = static_cast<__half_raw>(x);
- if (__hlt(x, __half_raw{0})) return __half_raw{0};
- if (__hlt(__half_raw{1}, x)) return __half_raw{1};
- return r;
- }
- inline
- __device__
- __half __hadd(__half x, __half y)
- {
- return __half_raw{
- static_cast<__half_raw>(x).data +
- static_cast<__half_raw>(y).data};
- }
- inline
- __device__
- __half __habs(__half x)
- {
- return __half_raw{
- __ocml_fabs_f16(static_cast<__half_raw>(x).data)};
- }
- inline
- __device__
- __half __hsub(__half x, __half y)
- {
- return __half_raw{
- static_cast<__half_raw>(x).data -
- static_cast<__half_raw>(y).data};
- }
- inline
- __device__
- __half __hmul(__half x, __half y)
- {
- return __half_raw{
- static_cast<__half_raw>(x).data *
- static_cast<__half_raw>(y).data};
- }
- inline
- __device__
- __half __hadd_sat(__half x, __half y)
- {
- return __clamp_01(__hadd(x, y));
- }
- inline
- __device__
- __half __hsub_sat(__half x, __half y)
- {
- return __clamp_01(__hsub(x, y));
- }
- inline
- __device__
- __half __hmul_sat(__half x, __half y)
- {
- return __clamp_01(__hmul(x, y));
- }
- inline
- __device__
- __half __hfma(__half x, __half y, __half z)
- {
- return __half_raw{__ocml_fma_f16(
- static_cast<__half_raw>(x).data,
- static_cast<__half_raw>(y).data,
- static_cast<__half_raw>(z).data)};
- }
- inline
- __device__
- __half __hfma_sat(__half x, __half y, __half z)
- {
- return __clamp_01(__hfma(x, y, z));
- }
- inline
- __device__
- __half __hdiv(__half x, __half y)
- {
- return __half_raw{
- static_cast<__half_raw>(x).data /
- static_cast<__half_raw>(y).data};
- }
- inline
- __HOST_DEVICE__
- __half2 __hadd2(__half2 x, __half2 y)
- {
- return __half2{
- static_cast<__half2_raw>(x).data +
- static_cast<__half2_raw>(y).data};
- }
- inline
- __HOST_DEVICE__
- __half2 __habs2(__half2 x)
- {
- return __half2{
- __ocml_fabs_2f16(static_cast<__half2_raw>(x).data)};
- }
- inline
- __HOST_DEVICE__
- __half2 __hsub2(__half2 x, __half2 y)
- {
- return __half2{
- static_cast<__half2_raw>(x).data -
- static_cast<__half2_raw>(y).data};
- }
- inline
- __HOST_DEVICE__
- __half2 __hmul2(__half2 x, __half2 y)
- {
- return __half2{
- static_cast<__half2_raw>(x).data *
- static_cast<__half2_raw>(y).data};
- }
- inline
- __HOST_DEVICE__
- __half2 __hadd2_sat(__half2 x, __half2 y)
- {
- auto r = static_cast<__half2_raw>(__hadd2(x, y));
- return __half2{
- __clamp_01(__half_raw{r.data.x}),
- __clamp_01(__half_raw{r.data.y})};
- }
- inline
- __HOST_DEVICE__
- __half2 __hsub2_sat(__half2 x, __half2 y)
- {
- auto r = static_cast<__half2_raw>(__hsub2(x, y));
- return __half2{
- __clamp_01(__half_raw{r.data.x}),
- __clamp_01(__half_raw{r.data.y})};
- }
- inline
- __HOST_DEVICE__
- __half2 __hmul2_sat(__half2 x, __half2 y)
- {
- auto r = static_cast<__half2_raw>(__hmul2(x, y));
- return __half2{
- __clamp_01(__half_raw{r.data.x}),
- __clamp_01(__half_raw{r.data.y})};
- }
- inline
- __HOST_DEVICE__
- __half2 __hfma2(__half2 x, __half2 y, __half2 z)
- {
- return __half2{__ocml_fma_2f16(x, y, z)};
- }
- inline
- __HOST_DEVICE__
- __half2 __hfma2_sat(__half2 x, __half2 y, __half2 z)
- {
- auto r = static_cast<__half2_raw>(__hfma2(x, y, z));
- return __half2{
- __clamp_01(__half_raw{r.data.x}),
- __clamp_01(__half_raw{r.data.y})};
- }
- inline
- __HOST_DEVICE__
- __half2 __h2div(__half2 x, __half2 y)
- {
- return __half2{
- static_cast<__half2_raw>(x).data /
- static_cast<__half2_raw>(y).data};
- }
- // Math functions
- #if defined(__clang__) && defined(__HIP__)
- inline
- __device__
- float amd_mixed_dot(__half2 a, __half2 b, float c, bool saturate) {
- return __ockl_fdot2(static_cast<__half2_raw>(a).data,
- static_cast<__half2_raw>(b).data,
- c, saturate);
- }
- #endif
- inline
- __device__
- __half htrunc(__half x)
- {
- return __half_raw{
- __ocml_trunc_f16(static_cast<__half_raw>(x).data)};
- }
- inline
- __device__
- __half hceil(__half x)
- {
- return __half_raw{
- __ocml_ceil_f16(static_cast<__half_raw>(x).data)};
- }
- inline
- __device__
- __half hfloor(__half x)
- {
- return __half_raw{
- __ocml_floor_f16(static_cast<__half_raw>(x).data)};
- }
- inline
- __device__
- __half hrint(__half x)
- {
- return __half_raw{
- __ocml_rint_f16(static_cast<__half_raw>(x).data)};
- }
- inline
- __device__
- __half hsin(__half x)
- {
- return __half_raw{
- __ocml_sin_f16(static_cast<__half_raw>(x).data)};
- }
- inline
- __device__
- __half hcos(__half x)
- {
- return __half_raw{
- __ocml_cos_f16(static_cast<__half_raw>(x).data)};
- }
- inline
- __device__
- __half hexp(__half x)
- {
- return __half_raw{
- __ocml_exp_f16(static_cast<__half_raw>(x).data)};
- }
- inline
- __device__
- __half hexp2(__half x)
- {
- return __half_raw{
- __ocml_exp2_f16(static_cast<__half_raw>(x).data)};
- }
- inline
- __device__
- __half hexp10(__half x)
- {
- return __half_raw{
- __ocml_exp10_f16(static_cast<__half_raw>(x).data)};
- }
- inline
- __device__
- __half hlog2(__half x)
- {
- return __half_raw{
- __ocml_log2_f16(static_cast<__half_raw>(x).data)};
- }
- inline
- __device__
- __half hlog(__half x)
- {
- return __half_raw{
- __ocml_log_f16(static_cast<__half_raw>(x).data)};
- }
- inline
- __device__
- __half hlog10(__half x)
- {
- return __half_raw{
- __ocml_log10_f16(static_cast<__half_raw>(x).data)};
- }
- inline
- __device__
- __half hrcp(__half x)
- {
- return __half_raw{
- static_cast<_Float16>(1.0f) /static_cast<__half_raw>(x).data};
- }
- inline
- __device__
- __half hrsqrt(__half x)
- {
- return __half_raw{
- __ocml_rsqrt_f16(static_cast<__half_raw>(x).data)};
- }
- inline
- __device__
- __half hsqrt(__half x)
- {
- return __half_raw{
- __ocml_sqrt_f16(static_cast<__half_raw>(x).data)};
- }
- inline
- __device__
- bool __hisinf(__half x)
- {
- return __ocml_isinf_f16(static_cast<__half_raw>(x).data);
- }
- inline
- __device__
- bool __hisnan(__half x)
- {
- return __ocml_isnan_f16(static_cast<__half_raw>(x).data);
- }
- inline
- __device__
- __half __hneg(__half x)
- {
- return __half_raw{-static_cast<__half_raw>(x).data};
- }
- inline
- __HOST_DEVICE__
- __half2 h2trunc(__half2 x)
- {
- return __half2{__ocml_trunc_2f16(x)};
- }
- inline
- __HOST_DEVICE__
- __half2 h2ceil(__half2 x)
- {
- return __half2{__ocml_ceil_2f16(x)};
- }
- inline
- __HOST_DEVICE__
- __half2 h2floor(__half2 x)
- {
- return __half2{__ocml_floor_2f16(x)};
- }
- inline
- __HOST_DEVICE__
- __half2 h2rint(__half2 x)
- {
- return __half2{__ocml_rint_2f16(x)};
- }
- inline
- __HOST_DEVICE__
- __half2 h2sin(__half2 x)
- {
- return __half2{__ocml_sin_2f16(x)};
- }
- inline
- __HOST_DEVICE__
- __half2 h2cos(__half2 x)
- {
- return __half2{__ocml_cos_2f16(x)};
- }
- inline
- __HOST_DEVICE__
- __half2 h2exp(__half2 x)
- {
- return __half2{__ocml_exp_2f16(x)};
- }
- inline
- __HOST_DEVICE__
- __half2 h2exp2(__half2 x)
- {
- return __half2{__ocml_exp2_2f16(x)};
- }
- inline
- __HOST_DEVICE__
- __half2 h2exp10(__half2 x)
- {
- return __half2{__ocml_exp10_2f16(x)};
- }
- inline
- __HOST_DEVICE__
- __half2 h2log2(__half2 x)
- {
- return __half2{__ocml_log2_2f16(x)};
- }
- inline
- __HOST_DEVICE__
- __half2 h2log(__half2 x) { return __ocml_log_2f16(x); }
- inline
- __HOST_DEVICE__
- __half2 h2log10(__half2 x) { return __ocml_log10_2f16(x); }
- inline
- __HOST_DEVICE__
- __half2 h2rcp(__half2 x) {
- return _Float16_2{
- _Float16_2{static_cast<_Float16>(1.0f), static_cast<_Float16>(1.0f)} / x.data};
- }
- inline
- __HOST_DEVICE__
- __half2 h2rsqrt(__half2 x) { return __ocml_rsqrt_2f16(x); }
- inline
- __HOST_DEVICE__
- __half2 h2sqrt(__half2 x) { return __ocml_sqrt_2f16(x); }
- inline
- __HOST_DEVICE__
- __half2 __hisinf2(__half2 x)
- {
- auto r = __ocml_isinf_2f16(x);
- return __half2{_Float16_2{
- static_cast<_Float16>(r.x), static_cast<_Float16>(r.y)}};
- }
- inline
- __HOST_DEVICE__
- __half2 __hisnan2(__half2 x)
- {
- auto r = __ocml_isnan_2f16(x);
- return __half2{_Float16_2{
- static_cast<_Float16>(r.x), static_cast<_Float16>(r.y)}};
- }
- inline
- __HOST_DEVICE__
- __half2 __hneg2(__half2 x)
- {
- return __half2{-static_cast<__half2_raw>(x).data};
- }
- } // Anonymous namespace.
- #if !defined(HIP_NO_HALF)
- using half = __half;
- using half2 = __half2;
- #endif
- __device__
- inline
- __half __shfl(__half var, int src_lane, int width = warpSize) {
- union { int i; __half h; } tmp; tmp.h = var;
- tmp.i = __shfl(tmp.i, src_lane, width);
- return tmp.h;
- }
- __device__
- inline
- __half2 __shfl(__half2 var, int src_lane, int width = warpSize) {
- union { int i; __half2 h; } tmp; tmp.h = var;
- tmp.i = __shfl(tmp.i, src_lane, width);
- return tmp.h;
- }
- __device__
- inline
- __half __shfl_up(__half var, unsigned int lane_delta, int width = warpSize) {
- union { int i; __half h; } tmp; tmp.h = var;
- tmp.i = __shfl_up(tmp.i, lane_delta, width);
- return tmp.h;
- }
- __device__
- inline
- __half2 __shfl_up(__half2 var, unsigned int lane_delta, int width = warpSize) {
- union { int i; __half2 h; } tmp; tmp.h = var;
- tmp.i = __shfl_up(tmp.i, lane_delta, width);
- return tmp.h;
- }
- __device__
- inline
- __half __shfl_down(__half var, unsigned int lane_delta, int width = warpSize) {
- union { int i; __half h; } tmp; tmp.h = var;
- tmp.i = __shfl_down(tmp.i, lane_delta, width);
- return tmp.h;
- }
- __device__
- inline
- __half2 __shfl_down(__half2 var, unsigned int lane_delta, int width = warpSize) {
- union { int i; __half2 h; } tmp; tmp.h = var;
- tmp.i = __shfl_down(tmp.i, lane_delta, width);
- return tmp.h;
- }
- __device__
- inline
- __half __shfl_xor(__half var, int lane_mask, int width = warpSize) {
- union { int i; __half h; } tmp; tmp.h = var;
- tmp.i = __shfl_xor(tmp.i, lane_mask, width);
- return tmp.h;
- }
- __device__
- inline
- __half2 __shfl_xor(__half2 var, int lane_mask, int width = warpSize) {
- union { int i; __half2 h; } tmp; tmp.h = var;
- tmp.i = __shfl_xor(tmp.i, lane_mask, width);
- return tmp.h;
- }
- #endif // defined(__cplusplus)
- #elif defined(__GNUC__)
- #if !defined(__HIPCC_RTC__)
- #include "hip_fp16_gcc.h"
- #endif
- #endif // !defined(__clang__) && defined(__GNUC__)
- #endif // HIP_INCLUDE_HIP_AMD_DETAIL_HIP_FP16_H
- /*
- Copyright (c) 2015 - 2023 Advanced Micro Devices, Inc. All rights reserved.
- Permission is hereby granted, free of charge, to any person obtaining a copy
- of this software and associated documentation files (the "Software"), to deal
- in the Software without restriction, including without limitation the rights
- to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
- copies of the Software, and to permit persons to whom the Software is
- furnished to do so, subject to the following conditions:
- The above copyright notice and this permission notice shall be included in
- all copies or substantial portions of the Software.
- THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
- AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
- OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
- THE SOFTWARE.
- */
- #pragma once
- #if !defined(__HIPCC_RTC__)
- #include "hip_fp16_math_fwd.h"
- #include "amd_hip_vector_types.h"
- #include "math_fwd.h"
- #include <hip/amd_detail/host_defines.h>
- #include <algorithm>
- // assert.h is only for the host version of assert.
- // The device version of assert is implemented in hip/amd_detail/hip_runtime.h.
- // Users should include hip_runtime.h for the device version of assert.
- #if !__HIP_DEVICE_COMPILE__
- #include <assert.h>
- #endif
- #include <limits.h>
- #include <limits>
- #include <stdint.h>
- #endif // !defined(__HIPCC_RTC__)
- #if _LIBCPP_VERSION && __HIP__
- namespace std {
- template <>
- struct __numeric_type<_Float16>
- {
- static _Float16 __test(_Float16);
- typedef _Float16 type;
- static const bool value = true;
- };
- }
- #endif // _LIBCPP_VERSION
- #pragma push_macro("__DEVICE__")
- #pragma push_macro("__RETURN_TYPE")
- #define __DEVICE__ static __device__
- #define __RETURN_TYPE bool
- // DOT FUNCTIONS
- #if __HIP_CLANG_ONLY__
- __DEVICE__
- inline
- int amd_mixed_dot(short2 a, short2 b, int c, bool saturate) {
- return __ockl_sdot2(a.data, b.data, c, saturate);
- }
- __DEVICE__
- inline
- uint amd_mixed_dot(ushort2 a, ushort2 b, uint c, bool saturate) {
- return __ockl_udot2(a.data, b.data, c, saturate);
- }
- __DEVICE__
- inline
- int amd_mixed_dot(char4 a, char4 b, int c, bool saturate) {
- return __ockl_sdot4(a.data, b.data, c, saturate);
- }
- __DEVICE__
- inline
- uint amd_mixed_dot(uchar4 a, uchar4 b, uint c, bool saturate) {
- return __ockl_udot4(a.data, b.data, c, saturate);
- }
- __DEVICE__
- inline
- int amd_mixed_dot(int a, int b, int c, bool saturate) {
- return __ockl_sdot8(a, b, c, saturate);
- }
- __DEVICE__
- inline
- uint amd_mixed_dot(uint a, uint b, uint c, bool saturate) {
- return __ockl_udot8(a, b, c, saturate);
- }
- #endif
- #pragma pop_macro("__DEVICE__")
- #pragma pop_macro("__RETURN_TYPE")
- // For backward compatibility.
- // There are HIP applications e.g. TensorFlow, expecting __HIP_ARCH_* macros
- // defined after including math_functions.h.
- #if !defined(__HIPCC_RTC__)
- #include <hip/amd_detail/amd_hip_runtime.h>
- #endif
|