;
33using namespaceCodeGen;
36constexpr unsignedCudaFatMagic = 0x466243b1;
37constexpr unsignedHIPFatMagic = 0x48495046;
44StringRef SectionPrefix;
47llvm::IntegerType *IntTy, *SizeTy;
49llvm::PointerType *PtrTy;
52llvm::LLVMContext &Context;
54llvm::Module &TheModule;
64llvm::DenseMap<StringRef, llvm::GlobalValue *> KernelHandles;
66llvm::DenseMap<llvm::GlobalValue *, llvm::Function *> KernelStubs;
68llvm::GlobalVariable *Var;
76llvm::GlobalVariable *GpuBinaryHandle =
nullptr;
78 boolRelocatableDeviceCode;
80std::unique_ptr<MangleContext> DeviceMC;
82llvm::FunctionCallee getSetupArgumentFn()
const;
83llvm::FunctionCallee getLaunchFn()
const;
85llvm::FunctionType *getRegisterGlobalsFnTy()
const;
86llvm::FunctionType *getCallbackFnTy()
const;
87llvm::FunctionType *getRegisterLinkedBinaryFnTy()
const;
88std::string addPrefixToName(StringRef FuncName)
const;
89std::string addUnderscoredPrefixToName(StringRef FuncName)
const;
92llvm::Function *makeRegisterGlobalsFn();
97llvm::Constant *makeConstantString(
conststd::string &Str,
98 conststd::string &Name =
"") {
99 returnCGM.GetAddrOfConstantCString(Str, Name.c_str()).getPointer();
105llvm::Constant *makeConstantArray(StringRef Str,
106StringRef Name =
"",
107StringRef SectionName =
"",
108 unsignedAlignment = 0,
109 boolAddNull =
false) {
110llvm::Constant *
Value=
111llvm::ConstantDataArray::getString(Context, Str, AddNull);
112 auto*GV =
newllvm::GlobalVariable(
114llvm::GlobalValue::PrivateLinkage,
Value, Name);
115 if(!SectionName.empty()) {
116GV->setSection(SectionName);
119GV->setUnnamedAddr(llvm::GlobalValue::UnnamedAddr::None);
122GV->setAlignment(llvm::Align(Alignment));
127llvm::Function *makeDummyFunction(llvm::FunctionType *FnTy) {
128assert(FnTy->getReturnType()->isVoidTy() &&
129 "Can only generate dummy functions returning void!");
130llvm::Function *DummyFunc = llvm::Function::Create(
131FnTy, llvm::GlobalValue::InternalLinkage,
"dummy", &TheModule);
133llvm::BasicBlock *DummyBlock =
134llvm::BasicBlock::Create(Context,
"", DummyFunc);
136FuncBuilder.SetInsertPoint(DummyBlock);
137FuncBuilder.CreateRetVoid();
149 voidregisterDeviceVar(
const VarDecl*VD, llvm::GlobalVariable &Var,
150 boolExtern,
boolConstant) {
151DeviceVars.push_back({&Var,
154VD->hasAttr<HIPManagedAttr>(),
157 voidregisterDeviceSurf(
const VarDecl*VD, llvm::GlobalVariable &Var,
158 boolExtern,
int Type) {
159DeviceVars.push_back({&Var,
165 voidregisterDeviceTex(
const VarDecl*VD, llvm::GlobalVariable &Var,
166 boolExtern,
int Type,
boolNormalized) {
167DeviceVars.push_back({&Var,
170 false, Normalized,
Type}});
174llvm::Function *makeModuleCtorFunction();
176llvm::Function *makeModuleDtorFunction();
178 voidtransformManagedVars();
180 voidcreateOffloadingEntries();
186llvm::Function *
getKernelStub(llvm::GlobalValue *Handle)
override{
187 auto Loc= KernelStubs.find(Handle);
188assert(
Loc!= KernelStubs.end());
193llvm::GlobalVariable &Var)
override;
196llvm::GlobalValue::LinkageTypes &
Linkage)
override;
203std::string CGNVCUDARuntime::addPrefixToName(StringRef FuncName)
const{
204 return(Prefix + FuncName).str();
207CGNVCUDARuntime::addUnderscoredPrefixToName(StringRef FuncName)
const{
208 return(
"__"+ Prefix + FuncName).str();
218 returnstd::unique_ptr<MangleContext>(
229TheModule(CGM.getModule()),
230RelocatableDeviceCode(CGM.getLangOpts().GPURelocatableDeviceCode),
239SectionPrefix =
"omp";
241SectionPrefix = Prefix =
"hip";
243SectionPrefix = Prefix =
"cuda";
246llvm::FunctionCallee CGNVCUDARuntime::getSetupArgumentFn()
const{
248llvm::Type *Params[] = {PtrTy, SizeTy, SizeTy};
250llvm::FunctionType::get(IntTy, Params,
false),
251addPrefixToName(
"SetupArgument"));
254llvm::FunctionCallee CGNVCUDARuntime::getLaunchFn()
const{
258llvm::FunctionType::get(IntTy, PtrTy,
false),
"hipLaunchByPtr");
265llvm::FunctionType *CGNVCUDARuntime::getRegisterGlobalsFnTy()
const{
266 returnllvm::FunctionType::get(VoidTy, PtrTy,
false);
269llvm::FunctionType *CGNVCUDARuntime::getCallbackFnTy()
const{
270 returnllvm::FunctionType::get(VoidTy, PtrTy,
false);
273llvm::FunctionType *CGNVCUDARuntime::getRegisterLinkedBinaryFnTy()
const{
274llvm::Type *Params[] = {llvm::PointerType::getUnqual(Context), PtrTy, PtrTy,
275llvm::PointerType::getUnqual(Context)};
276 returnllvm::FunctionType::get(VoidTy, Params,
false);
279std::string CGNVCUDARuntime::getDeviceSideName(
const NamedDecl*ND) {
282 if(
auto*FD = dyn_cast<FunctionDecl>(ND))
283GD =
GlobalDecl(FD, KernelReferenceKind::Kernel);
286std::string DeviceSideName;
294llvm::raw_svector_ostream Out(Buffer);
296DeviceSideName = std::string(Out.str());
304llvm::raw_svector_ostream Out(Buffer);
305Out << DeviceSideName;
307DeviceSideName = std::string(Out.str());
309 returnDeviceSideName;
316dyn_cast<llvm::GlobalVariable>(KernelHandles[CGF.
CurFn->getName()])) {
317GV->setLinkage(CGF.
CurFn->getLinkage());
318GV->setInitializer(CGF.
CurFn);
321CudaFeature::CUDA_USES_NEW_LAUNCH) ||
324emitDeviceStubBodyNew(CGF, Args);
326emitDeviceStubBodyLegacy(CGF, Args);
339 for(
auto&Arg : Args)
341llvm::StructType *KernelArgsTy = llvm::StructType::create(ArgTypes);
343 auto*Int64Ty = CGF.
Builder.getInt64Ty();
344KernelLaunchParamsTypes.push_back(Int64Ty);
345KernelLaunchParamsTypes.push_back(PtrTy);
346KernelLaunchParamsTypes.push_back(PtrTy);
348llvm::StructType *KernelLaunchParamsTy =
349llvm::StructType::create(KernelLaunchParamsTypes);
354 "kernel_launch_params");
356 autoKernelArgsSize = CGM.
getDataLayout().getTypeAllocSize(KernelArgsTy);
364 for(
unsignedi = 0; i < Args.size(); ++i) {
369 returnKernelLaunchParams;
379llvm::ConstantInt::get(SizeTy, std::max<size_t>(1, Args.size())));
381 for(
unsignedi = 0; i < Args.size(); ++i) {
383llvm::Value *VoidVarPtr = CGF.
Builder.CreatePointerCast(VarPtr, PtrTy);
385VoidVarPtr, CGF.
Builder.CreateConstGEP1_32(
397? prepareKernelArgsLLVMOffload(CGF, Args)
398: prepareKernelArgs(CGF, Args);
414std::string KernelLaunchAPI =
"LaunchKernel";
416LangOptions::GPUDefaultStreamKind::PerThread) {
418KernelLaunchAPI = KernelLaunchAPI +
"_spt";
420KernelLaunchAPI = KernelLaunchAPI +
"_ptsz";
422 autoLaunchKernelName = addPrefixToName(KernelLaunchAPI);
426 for(
auto*Result : DC->
lookup(&cudaLaunchKernelII)) {
428cudaLaunchKernelFD = FD;
431 if(cudaLaunchKernelFD ==
nullptr) {
433 "Can't find declaration for "+ LaunchKernelName);
447llvm::FunctionType::get(IntTy,
453addUnderscoredPrefixToName(
"PopCallConfiguration"));
462CGF.
Builder.CreatePointerCast(KernelHandles[CGF.
CurFn->getName()], PtrTy);
478llvm::FunctionType *FTy = cast<llvm::FunctionType>(Ty);
482llvm::FunctionCallee cudaLaunchKernelFn =
492llvm::Function *KernelFunction = llvm::cast<llvm::Function>(
Kernel);
493std::string GlobalVarName = (KernelFunction->getName() +
".id").str();
495llvm::GlobalVariable *HandleVar =
496CGM.
getModule().getNamedGlobal(GlobalVarName);
498HandleVar =
newllvm::GlobalVariable(
500 false, KernelFunction->getLinkage(),
501llvm::ConstantInt::get(CGM.
Int8Ty, 0), GlobalVarName);
502HandleVar->setDSOLocal(KernelFunction->isDSOLocal());
503HandleVar->setVisibility(KernelFunction->getVisibility());
504 if(KernelFunction->hasComdat())
505HandleVar->setComdat(CGM.
getModule().getOrInsertComdat(GlobalVarName));
521llvm::FunctionCallee cudaSetupArgFn = getSetupArgumentFn();
524 for(
const VarDecl*A : Args) {
526Offset = Offset.alignTo(TInfo.Align);
527llvm::Value *Args[] = {
528CGF.
Builder.CreatePointerCast(
530llvm::ConstantInt::get(SizeTy, TInfo.Width.getQuantity()),
531llvm::ConstantInt::get(SizeTy, Offset.getQuantity()),
534llvm::Constant *
Zero= llvm::ConstantInt::get(IntTy, 0);
535llvm::Value *CBZero = CGF.
Builder.CreateICmpEQ(CB, Zero);
537CGF.
Builder.CreateCondBr(CBZero, NextBlock, EndBlock);
539Offset += TInfo.Width;
543llvm::FunctionCallee cudaLaunchFn = getLaunchFn();
545CGF.
Builder.CreatePointerCast(KernelHandles[CGF.
CurFn->getName()], PtrTy);
555llvm::GlobalVariable *ManagedVar) {
557 for(
auto&&VarUse : Var->uses()) {
558WorkList.push_back({VarUse.getUser()});
560 while(!WorkList.empty()) {
561 auto&&WorkItem = WorkList.pop_back_val();
562 auto*
U= WorkItem.back();
563 if(isa<llvm::ConstantExpr>(
U)) {
564 for(
auto&&UU :
U->uses()) {
565WorkItem.push_back(UU.getUser());
566WorkList.push_back(WorkItem);
571 if(
auto*I = dyn_cast<llvm::Instruction>(
U)) {
572llvm::Value *OldV = Var;
573llvm::Instruction *NewV =
newllvm::LoadInst(
574Var->getType(), ManagedVar,
"ld.managed",
false,
575llvm::Align(Var->getAlignment()), I->getIterator());
579 for(
auto&&Op : WorkItem) {
580 auto*CE = cast<llvm::ConstantExpr>(Op);
581 auto*NewInst = CE->getAsInstruction();
582NewInst->insertBefore(*I->getParent(), I->getIterator());
583NewInst->replaceUsesOfWith(OldV, NewV);
587I->replaceUsesOfWith(OldV, NewV);
589llvm_unreachable(
"Invalid use of managed variable");
608llvm::Function *CGNVCUDARuntime::makeRegisterGlobalsFn() {
610 if(EmittedKernels.empty() && DeviceVars.empty())
613llvm::Function *RegisterKernelsFunc = llvm::Function::Create(
614getRegisterGlobalsFnTy(), llvm::GlobalValue::InternalLinkage,
615addUnderscoredPrefixToName(
"_register_globals"), &TheModule);
616llvm::BasicBlock *EntryBB =
617llvm::BasicBlock::Create(Context,
"entry", RegisterKernelsFunc);
619Builder.SetInsertPoint(EntryBB);
623llvm::Type *RegisterFuncParams[] = {
624PtrTy, PtrTy, PtrTy, PtrTy, IntTy,
625PtrTy, PtrTy, PtrTy, PtrTy, llvm::PointerType::getUnqual(Context)};
627llvm::FunctionType::get(IntTy, RegisterFuncParams,
false),
628addUnderscoredPrefixToName(
"RegisterFunction"));
633llvm::Argument &GpuBinaryHandlePtr = *RegisterKernelsFunc->arg_begin();
634 for(
auto&&I : EmittedKernels) {
635llvm::Constant *KernelName =
636makeConstantString(getDeviceSideName(cast<NamedDecl>(I.D)));
637llvm::Constant *NullPtr = llvm::ConstantPointerNull::get(PtrTy);
638llvm::Value *Args[] = {
640KernelHandles[I.Kernel->getName()],
643llvm::ConstantInt::get(IntTy, -1),
648llvm::ConstantPointerNull::get(llvm::PointerType::getUnqual(Context))};
649Builder.CreateCall(RegisterFunc, Args);
652llvm::Type *VarSizeTy = IntTy;
660llvm::Type *RegisterVarParams[] = {PtrTy, PtrTy, PtrTy, PtrTy,
661IntTy, VarSizeTy, IntTy, IntTy};
663llvm::FunctionType::get(VoidTy, RegisterVarParams,
false),
664addUnderscoredPrefixToName(
"RegisterVar"));
667llvm::Type *RegisterManagedVarParams[] = {PtrTy, PtrTy, PtrTy,
668PtrTy, VarSizeTy, IntTy};
670llvm::FunctionType::get(VoidTy, RegisterManagedVarParams,
false),
671addUnderscoredPrefixToName(
"RegisterManagedVar"));
675llvm::FunctionType::get(
676VoidTy, {PtrTy, PtrTy, PtrTy, PtrTy, IntTy, IntTy},
false),
677addUnderscoredPrefixToName(
"RegisterSurface"));
681llvm::FunctionType::get(
682VoidTy, {PtrTy, PtrTy, PtrTy, PtrTy, IntTy, IntTy, IntTy},
false),
683addUnderscoredPrefixToName(
"RegisterTexture"));
684 for(
auto&&Info : DeviceVars) {
685llvm::GlobalVariable *Var = Info.Var;
686assert((!Var->isDeclaration() || Info.Flags.isManaged()) &&
687 "External variables should not show up here, except HIP managed " 689llvm::Constant *VarName = makeConstantString(getDeviceSideName(Info.D));
690 switch(Info.Flags.getKind()) {
691 caseDeviceVarFlags::Variable: {
694 if(Info.Flags.isManaged()) {
695assert(Var->getName().ends_with(
".managed") &&
696 "HIP managed variables not transformed");
697 auto*ManagedVar = CGM.
getModule().getNamedGlobal(
698Var->getName().drop_back(StringRef(
".managed").size()));
699llvm::Value *Args[] = {
704llvm::ConstantInt::get(VarSizeTy, VarSize),
705llvm::ConstantInt::get(IntTy, Var->getAlignment())};
706 if(!Var->isDeclaration())
707Builder.CreateCall(RegisterManagedVar, Args);
709llvm::Value *Args[] = {
714llvm::ConstantInt::get(IntTy, Info.Flags.isExtern()),
715llvm::ConstantInt::get(VarSizeTy, VarSize),
716llvm::ConstantInt::get(IntTy, Info.Flags.isConstant()),
717llvm::ConstantInt::get(IntTy, 0)};
718Builder.CreateCall(RegisterVar, Args);
722 caseDeviceVarFlags::Surface:
725{&GpuBinaryHandlePtr, Var, VarName, VarName,
726llvm::ConstantInt::get(IntTy, Info.Flags.getSurfTexType()),
727llvm::ConstantInt::get(IntTy, Info.Flags.isExtern())});
729 caseDeviceVarFlags::Texture:
732{&GpuBinaryHandlePtr, Var, VarName, VarName,
733llvm::ConstantInt::get(IntTy, Info.Flags.getSurfTexType()),
734llvm::ConstantInt::get(IntTy, Info.Flags.isNormalized()),
735llvm::ConstantInt::get(IntTy, Info.Flags.isExtern())});
740Builder.CreateRetVoid();
741 returnRegisterKernelsFunc;
763llvm::Function *CGNVCUDARuntime::makeModuleCtorFunction() {
768 if(CudaGpuBinaryFileName.empty() && !IsHIP)
770 if((IsHIP || (IsCUDA && !RelocatableDeviceCode)) && EmittedKernels.empty() &&
775llvm::Function *RegisterGlobalsFunc = makeRegisterGlobalsFn();
778 if(RelocatableDeviceCode && !RegisterGlobalsFunc)
779RegisterGlobalsFunc = makeDummyFunction(getRegisterGlobalsFnTy());
783llvm::FunctionType::get(PtrTy, PtrTy,
false),
784addUnderscoredPrefixToName(
"RegisterFatBinary"));
786llvm::StructType *FatbinWrapperTy =
787llvm::StructType::get(IntTy, IntTy, PtrTy, PtrTy);
793std::unique_ptr<llvm::MemoryBuffer> CudaGpuBinary =
nullptr;
794 if(!CudaGpuBinaryFileName.empty()) {
796 autoCudaGpuBinaryOrErr =
797 VFS->getBufferForFile(CudaGpuBinaryFileName, -1,
false);
798 if(std::error_code EC = CudaGpuBinaryOrErr.getError()) {
800<< CudaGpuBinaryFileName << EC.message();
803CudaGpuBinary = std::move(CudaGpuBinaryOrErr.get());
806llvm::Function *ModuleCtorFunc = llvm::Function::Create(
807llvm::FunctionType::get(VoidTy,
false),
808llvm::GlobalValue::InternalLinkage,
809addUnderscoredPrefixToName(
"_module_ctor"), &TheModule);
810llvm::BasicBlock *CtorEntryBB =
811llvm::BasicBlock::Create(Context,
"entry", ModuleCtorFunc);
814CtorBuilder.SetInsertPoint(CtorEntryBB);
816 const char*FatbinConstantName;
817 const char*FatbinSectionName;
818 const char*ModuleIDSectionName;
819StringRef ModuleIDPrefix;
820llvm::Constant *FatBinStr;
823FatbinConstantName =
".hip_fatbin";
824FatbinSectionName =
".hipFatBinSegment";
826ModuleIDSectionName =
"__hip_module_id";
827ModuleIDPrefix =
"__hip_";
832 const unsignedHIPCodeObjectAlign = 4096;
833FatBinStr = makeConstantArray(std::string(CudaGpuBinary->getBuffer()),
"",
834FatbinConstantName, HIPCodeObjectAlign);
840FatBinStr =
newllvm::GlobalVariable(
842 true, llvm::GlobalValue::ExternalLinkage,
nullptr,
846 nullptr, llvm::GlobalVariable::NotThreadLocal);
847cast<llvm::GlobalVariable>(FatBinStr)->setSection(FatbinConstantName);
850FatMagic = HIPFatMagic;
852 if(RelocatableDeviceCode)
853FatbinConstantName = CGM.
getTriple().isMacOSX()
854?
"__NV_CUDA,__nv_relfatbin" 858CGM.
getTriple().isMacOSX() ?
"__NV_CUDA,__nv_fatbin":
".nv_fatbin";
861CGM.
getTriple().isMacOSX() ?
"__NV_CUDA,__fatbin":
".nvFatBinSegment";
863ModuleIDSectionName = CGM.
getTriple().isMacOSX()
864?
"__NV_CUDA,__nv_module_id" 866ModuleIDPrefix =
"__nv_";
870FatBinStr = makeConstantArray(std::string(CudaGpuBinary->getBuffer()),
"",
871FatbinConstantName, 8);
872FatMagic = CudaFatMagic;
877 autoValues = Builder.beginStruct(FatbinWrapperTy);
879Values.addInt(IntTy, FatMagic);
881Values.addInt(IntTy, 1);
883Values.add(FatBinStr);
885Values.add(llvm::ConstantPointerNull::get(PtrTy));
886llvm::GlobalVariable *FatbinWrapper = Values.finishAndCreateGlobal(
887addUnderscoredPrefixToName(
"_fatbin_wrapper"), CGM.
getPointerAlign(),
889FatbinWrapper->setSection(FatbinSectionName);
899 auto Linkage= RelocatableDeviceCode ? llvm::GlobalValue::ExternalLinkage
900: llvm::GlobalValue::InternalLinkage;
901llvm::BasicBlock *IfBlock =
902llvm::BasicBlock::Create(Context,
"if", ModuleCtorFunc);
903llvm::BasicBlock *ExitBlock =
904llvm::BasicBlock::Create(Context,
"exit", ModuleCtorFunc);
907GpuBinaryHandle =
newllvm::GlobalVariable(
908TheModule, PtrTy,
false,
Linkage,
910!RelocatableDeviceCode ? llvm::ConstantPointerNull::get(PtrTy)
917 if(
Linkage!= llvm::GlobalValue::InternalLinkage)
918GpuBinaryHandle->setVisibility(llvm::GlobalValue::HiddenVisibility);
920GpuBinaryHandle, PtrTy,
923 auto*HandleValue = CtorBuilder.CreateLoad(GpuBinaryAddr);
924llvm::Constant *
Zero=
925llvm::Constant::getNullValue(HandleValue->getType());
926llvm::Value *EQZero = CtorBuilder.CreateICmpEQ(HandleValue, Zero);
927CtorBuilder.CreateCondBr(EQZero, IfBlock, ExitBlock);
930CtorBuilder.SetInsertPoint(IfBlock);
932llvm::CallInst *RegisterFatbinCall =
933CtorBuilder.CreateCall(RegisterFatbinFunc, FatbinWrapper);
934CtorBuilder.CreateStore(RegisterFatbinCall, GpuBinaryAddr);
935CtorBuilder.CreateBr(ExitBlock);
938CtorBuilder.SetInsertPoint(ExitBlock);
940 if(RegisterGlobalsFunc) {
941 auto*HandleValue = CtorBuilder.CreateLoad(GpuBinaryAddr);
942CtorBuilder.CreateCall(RegisterGlobalsFunc, HandleValue);
945}
else if(!RelocatableDeviceCode) {
949llvm::CallInst *RegisterFatbinCall =
950CtorBuilder.CreateCall(RegisterFatbinFunc, FatbinWrapper);
951GpuBinaryHandle =
newllvm::GlobalVariable(
952TheModule, PtrTy,
false, llvm::GlobalValue::InternalLinkage,
953llvm::ConstantPointerNull::get(PtrTy),
"__cuda_gpubin_handle");
955CtorBuilder.CreateAlignedStore(RegisterFatbinCall, GpuBinaryHandle,
959 if(RegisterGlobalsFunc)
960CtorBuilder.CreateCall(RegisterGlobalsFunc, RegisterFatbinCall);
964CudaFeature::CUDA_USES_FATBIN_REGISTER_END)) {
967llvm::FunctionType::get(VoidTy, PtrTy,
false),
968 "__cudaRegisterFatBinaryEnd");
969CtorBuilder.CreateCall(RegisterFatbinEndFunc, RegisterFatbinCall);
974llvm::raw_svector_ostream OS(ModuleID);
975OS << ModuleIDPrefix << llvm::format(
"%"PRIx64, FatbinWrapper->getGUID());
976llvm::Constant *ModuleIDConstant = makeConstantArray(
977std::string(ModuleID),
"", ModuleIDSectionName, 32,
true);
980llvm::GlobalAlias::create(llvm::GlobalValue::ExternalLinkage,
981Twine(
"__fatbinwrap") + ModuleID, FatbinWrapper);
986RegisterLinkedBinaryName += ModuleID;
988getRegisterLinkedBinaryFnTy(), RegisterLinkedBinaryName);
990assert(RegisterGlobalsFunc &&
"Expecting at least dummy function!");
991llvm::Value *Args[] = {RegisterGlobalsFunc, FatbinWrapper, ModuleIDConstant,
992makeDummyFunction(getCallbackFnTy())};
993CtorBuilder.CreateCall(RegisterLinkedBinaryFunc, Args);
999 if(llvm::Function *CleanupFn = makeModuleDtorFunction()) {
1001llvm::FunctionType *AtExitTy =
1002llvm::FunctionType::get(IntTy, CleanupFn->getType(),
false);
1003llvm::FunctionCallee AtExitFunc =
1006CtorBuilder.CreateCall(AtExitFunc, CleanupFn);
1009CtorBuilder.CreateRetVoid();
1010 returnModuleCtorFunc;
1032llvm::Function *CGNVCUDARuntime::makeModuleDtorFunction() {
1034 if(!GpuBinaryHandle)
1039llvm::FunctionType::get(VoidTy, PtrTy,
false),
1040addUnderscoredPrefixToName(
"UnregisterFatBinary"));
1042llvm::Function *ModuleDtorFunc = llvm::Function::Create(
1043llvm::FunctionType::get(VoidTy,
false),
1044llvm::GlobalValue::InternalLinkage,
1045addUnderscoredPrefixToName(
"_module_dtor"), &TheModule);
1047llvm::BasicBlock *DtorEntryBB =
1048llvm::BasicBlock::Create(Context,
"entry", ModuleDtorFunc);
1050DtorBuilder.SetInsertPoint(DtorEntryBB);
1053GpuBinaryHandle, GpuBinaryHandle->getValueType(),
1055 auto*HandleValue = DtorBuilder.CreateLoad(GpuBinaryAddr);
1060llvm::BasicBlock *IfBlock =
1061llvm::BasicBlock::Create(Context,
"if", ModuleDtorFunc);
1062llvm::BasicBlock *ExitBlock =
1063llvm::BasicBlock::Create(Context,
"exit", ModuleDtorFunc);
1064llvm::Constant *
Zero= llvm::Constant::getNullValue(HandleValue->getType());
1065llvm::Value *NEZero = DtorBuilder.CreateICmpNE(HandleValue, Zero);
1066DtorBuilder.CreateCondBr(NEZero, IfBlock, ExitBlock);
1068DtorBuilder.SetInsertPoint(IfBlock);
1069DtorBuilder.CreateCall(UnregisterFatbinFunc, HandleValue);
1070DtorBuilder.CreateStore(Zero, GpuBinaryAddr);
1071DtorBuilder.CreateBr(ExitBlock);
1073DtorBuilder.SetInsertPoint(ExitBlock);
1075DtorBuilder.CreateCall(UnregisterFatbinFunc, HandleValue);
1077DtorBuilder.CreateRetVoid();
1078 returnModuleDtorFunc;
1082 return newCGNVCUDARuntime(CGM);
1085voidCGNVCUDARuntime::internalizeDeviceSideVar(
1102 if(
D->
hasAttr<CUDADeviceAttr>() ||
D->
hasAttr<CUDAConstantAttr>() ||
1104 D->getType()->isCUDADeviceBuiltinSurfaceType() ||
1105 D->getType()->isCUDADeviceBuiltinTextureType()) {
1106 Linkage= llvm::GlobalValue::InternalLinkage;
1110voidCGNVCUDARuntime::handleVarRegistration(
const VarDecl*
D,
1111llvm::GlobalVariable &GV) {
1112 if(
D->
hasAttr<CUDADeviceAttr>() ||
D->
hasAttr<CUDAConstantAttr>()) {
1126 if((!
D->hasExternalStorage() && !
D->isInline()) ||
1129registerDeviceVar(
D, GV, !
D->hasDefinition(),
1130 D->
hasAttr<CUDAConstantAttr>());
1132}
else if(
D->getType()->isCUDADeviceBuiltinSurfaceType() ||
1133 D->getType()->isCUDADeviceBuiltinTextureType()) {
1136 const auto*TD = cast<ClassTemplateSpecializationDecl>(
1139 if(TD->hasAttr<CUDADeviceBuiltinSurfaceTypeAttr>()) {
1140assert(Args.
size() == 2 &&
1141 "Unexpected number of template arguments of CUDA device " 1142 "builtin surface type.");
1143 autoSurfType = Args[1].getAsIntegral();
1144 if(!
D->hasExternalStorage())
1145registerDeviceSurf(
D, GV, !
D->hasDefinition(), SurfType.getSExtValue());
1147assert(Args.
size() == 3 &&
1148 "Unexpected number of template arguments of CUDA device " 1149 "builtin texture type.");
1150 autoTexType = Args[1].getAsIntegral();
1151 autoNormalized = Args[2].getAsIntegral();
1152 if(!
D->hasExternalStorage())
1153registerDeviceTex(
D, GV, !
D->hasDefinition(), TexType.getSExtValue(),
1154Normalized.getZExtValue());
1163voidCGNVCUDARuntime::transformManagedVars() {
1164 for(
auto&&Info : DeviceVars) {
1165llvm::GlobalVariable *Var = Info.Var;
1166 if(Info.Flags.getKind() == DeviceVarFlags::Variable &&
1167Info.Flags.isManaged()) {
1168 auto*ManagedVar =
newllvm::GlobalVariable(
1170 false, Var->getLinkage(),
1171Var->isDeclaration()
1173: llvm::ConstantPointerNull::get(Var->getType()),
1175llvm::GlobalVariable::NotThreadLocal,
1177? LangAS::cuda_device
1178: LangAS::Default));
1179ManagedVar->setDSOLocal(Var->isDSOLocal());
1180ManagedVar->setVisibility(Var->getVisibility());
1181ManagedVar->setExternallyInitialized(
true);
1183ManagedVar->takeName(Var);
1184Var->setName(Twine(ManagedVar->getName()) +
".managed");
1187 if(CGM.
getLangOpts().CUDAIsDevice && !Var->isDeclaration()) {
1188assert(!ManagedVar->isDeclaration());
1199voidCGNVCUDARuntime::createOffloadingEntries() {
1201StringRef Section = (SectionPrefix +
"_offloading_entries").toStringRef(Out);
1203? llvm::object::OffloadKind::OFK_HIP
1204: llvm::object::OffloadKind::OFK_Cuda;
1207 for(KernelInfo &I : EmittedKernels)
1208llvm::offloading::emitOffloadingEntry(
1209M, Kind, KernelHandles[I.Kernel->getName()],
1210getDeviceSideName(cast<NamedDecl>(I.D)),
0,
0,
1211llvm::offloading::OffloadGlobalEntry, Section);
1213 for(VarInfo &I : DeviceVars) {
1215CGM.
getDataLayout().getTypeAllocSize(I.Var->getValueType());
1218?
static_cast<int32_t>(llvm::offloading::OffloadGlobalExtern)
1220(I.Flags.isConstant()
1221?
static_cast<int32_t>(llvm::offloading::OffloadGlobalConstant)
1223(I.Flags.isNormalized()
1224?
static_cast<int32_t>(llvm::offloading::OffloadGlobalNormalized)
1226 if(I.Flags.getKind() == DeviceVarFlags::Variable) {
1227 if(I.Flags.isManaged()) {
1228assert(I.Var->getName().ends_with(
".managed") &&
1229 "HIP managed variables not transformed");
1231 auto*ManagedVar = M.getNamedGlobal(
1232I.Var->getName().drop_back(StringRef(
".managed").size()));
1233llvm::offloading::emitOffloadingEntry(
1234M, Kind, I.Var, getDeviceSideName(I.D), VarSize,
1235llvm::offloading::OffloadGlobalManagedEntry | Flags,
1236I.Var->getAlignment(), Section, ManagedVar);
1238llvm::offloading::emitOffloadingEntry(
1239M, Kind, I.Var, getDeviceSideName(I.D), VarSize,
1240llvm::offloading::OffloadGlobalEntry | Flags,
1243}
else if(I.Flags.getKind() == DeviceVarFlags::Surface) {
1244llvm::offloading::emitOffloadingEntry(
1245M, Kind, I.Var, getDeviceSideName(I.D), VarSize,
1246llvm::offloading::OffloadGlobalSurfaceEntry | Flags,
1247I.Flags.getSurfTexType(), Section);
1248}
else if(I.Flags.getKind() == DeviceVarFlags::Texture) {
1249llvm::offloading::emitOffloadingEntry(
1250M, Kind, I.Var, getDeviceSideName(I.D), VarSize,
1251llvm::offloading::OffloadGlobalTextureEntry | Flags,
1252I.Flags.getSurfTexType(), Section);
1258llvm::Function *CGNVCUDARuntime::finalizeModule() {
1259transformManagedVars();
1271 for(
auto&&Info : DeviceVars) {
1272 auto Kind= Info.Flags.getKind();
1273 if(!Info.Var->isDeclaration() &&
1274!llvm::GlobalValue::isLocalLinkage(Info.Var->getLinkage()) &&
1275(Kind == DeviceVarFlags::Variable ||
1276Kind == DeviceVarFlags::Surface ||
1277Kind == DeviceVarFlags::Texture) &&
1278Info.D->isUsed() && !Info.D->hasAttr<UsedAttr>()) {
1285(CGM.
getLangOpts().OffloadingNewDriver && RelocatableDeviceCode))
1286createOffloadingEntries();
1288 returnmakeModuleCtorFunction();
1293llvm::GlobalValue *CGNVCUDARuntime::getKernelHandle(llvm::Function *F,
1295 auto Loc= KernelHandles.find(F->getName());
1296 if(
Loc!= KernelHandles.end()) {
1297 autoOldHandle =
Loc->second;
1298 if(KernelStubs[OldHandle] == F)
1306KernelStubs[OldHandle] = F;
1311KernelStubs.erase(OldHandle);
1315KernelHandles[F->getName()] = F;
1320 auto*Var =
newllvm::GlobalVariable(
1321TheModule, F->getType(),
true, F->getLinkage(),
1326Var->setDSOLocal(F->isDSOLocal());
1327Var->setVisibility(F->getVisibility());
1328 auto*FD = cast<FunctionDecl>(GD.
getDecl());
1329 auto*FT = FD->getPrimaryTemplate();
1330 if(!FT || FT->isThisDeclarationADefinition())
1332KernelHandles[F->getName()] = Var;
1333KernelStubs[Var] = F;
static std::unique_ptr< MangleContext > InitDeviceMC(CodeGenModule &CGM)
static void replaceManagedVar(llvm::GlobalVariable *Var, llvm::GlobalVariable *ManagedVar)
TranslationUnitDecl * getTranslationUnitDecl() const
MangleContext * createMangleContext(const TargetInfo *T=nullptr)
If T is null pointer, assume the target in ASTContext.
bool shouldExternalize(const Decl *D) const
Whether a C++ static variable or CUDA/HIP kernel should be externalized.
StringRef getCUIDHash() const
const TargetInfo * getAuxTargetInfo() const
llvm::DenseSet< const VarDecl * > CUDADeviceVarODRUsedByHost
Keep track of CUDA/HIP device-side variables ODR-used by host code.
MangleContext * createDeviceMangleContext(const TargetInfo &T)
Creates a device mangle context to correctly mangle lambdas in a mixed architecture compile by settin...
TypeInfoChars getTypeInfoInChars(const Type *T) const
const TargetInfo & getTargetInfo() const
unsigned getTargetAddressSpace(LangAS AS) const
CharUnits - This is an opaque type for sizes expressed in character units.
llvm::Align getAsAlign() const
getAsAlign - Returns Quantity as a valid llvm::Align, Beware llvm::Align assumes power of two 8-bit b...
static CharUnits One()
One - Construct a CharUnits quantity of one.
static CharUnits fromQuantity(QuantityType Quantity)
fromQuantity - Construct a CharUnits quantity from a raw integer type.
static CharUnits Zero()
Zero - Construct a CharUnits quantity of zero.
std::string CudaGpuBinaryFileName
Name of file passed with -fcuda-include-gpubinary option to forward to CUDA runtime back-end for inco...
Like RawAddress, an abstract representation of an aligned address, but the pointer contained in this ...
llvm::Value * emitRawPointer(CodeGenFunction &CGF) const
Return the pointer contained in this class after authenticating it and adding offset to it if necessa...
llvm::PointerType * getType() const
Return the type of the pointer value.
llvm::StoreInst * CreateStore(llvm::Value *Val, Address Addr, bool IsVolatile=false)
llvm::StoreInst * CreateAlignedStore(llvm::Value *Val, llvm::Value *Addr, CharUnits Align, bool IsVolatile=false)
llvm::StoreInst * CreateDefaultAlignedStore(llvm::Value *Val, llvm::Value *Addr, bool IsVolatile=false)
Address CreateStructGEP(Address Addr, unsigned Index, const llvm::Twine &Name="")
llvm::LoadInst * CreateLoad(Address Addr, const llvm::Twine &Name="")
virtual std::string getDeviceSideName(const NamedDecl *ND)=0
Returns function or variable name on device side even if the current compilation is for host.
virtual void emitDeviceStub(CodeGenFunction &CGF, FunctionArgList &Args)=0
Emits a kernel launch stub.
virtual llvm::Function * getKernelStub(llvm::GlobalValue *Handle)=0
Get kernel stub by kernel handle.
virtual void handleVarRegistration(const VarDecl *VD, llvm::GlobalVariable &Var)=0
Check whether a variable is a device variable and register it if true.
virtual llvm::Function * finalizeModule()=0
Finalize generated LLVM module.
virtual llvm::GlobalValue * getKernelHandle(llvm::Function *Stub, GlobalDecl GD)=0
Get kernel handle by stub function.
virtual void internalizeDeviceSideVar(const VarDecl *D, llvm::GlobalValue::LinkageTypes &Linkage)=0
Adjust linkage of shadow variables in host compilation.
MangleContext & getMangleContext()
Gets the mangle context.
static CGCallee forDirect(llvm::Constant *functionPtr, const CGCalleeInfo &abstractInfo=CGCalleeInfo())
CGFunctionInfo - Class to encapsulate the information about a function definition.
CallArgList - Type for representing both the value and type of arguments in a call.
void add(RValue rvalue, QualType type)
CodeGenFunction - This class organizes the per-function state that is used while generating LLVM code...
RawAddress CreateTempAllocaWithoutCast(llvm::Type *Ty, CharUnits align, const Twine &Name="tmp", llvm::Value *ArraySize=nullptr)
llvm::BasicBlock * createBasicBlock(const Twine &name="", llvm::Function *parent=nullptr, llvm::BasicBlock *before=nullptr)
createBasicBlock - Create an LLVM basic block.
const LangOptions & getLangOpts() const
void EmitBlock(llvm::BasicBlock *BB, bool IsFinished=false)
EmitBlock - Emit the given block.
llvm::AllocaInst * CreateTempAlloca(llvm::Type *Ty, const Twine &Name="tmp", llvm::Value *ArraySize=nullptr)
CreateTempAlloca - This creates an alloca and inserts it into the entry block if ArraySize is nullptr...
llvm::Type * ConvertTypeForMem(QualType T)
RawAddress CreateMemTemp(QualType T, const Twine &Name="tmp", RawAddress *Alloca=nullptr)
CreateMemTemp - Create a temporary memory object of the given type, with appropriate alignmen and cas...
void EmitBranch(llvm::BasicBlock *Block)
EmitBranch - Emit a branch to the specified basic block from the current insert block,...
RValue EmitCall(const CGFunctionInfo &CallInfo, const CGCallee &Callee, ReturnValueSlot ReturnValue, const CallArgList &Args, llvm::CallBase **CallOrInvoke, bool IsMustTail, SourceLocation Loc, bool IsVirtualFunctionPointerThunk=false)
EmitCall - Generate a call of the given function, expecting the given result type,...
const Decl * CurFuncDecl
CurFuncDecl - Holds the Decl for the current outermost non-closure context.
llvm::CallBase * EmitRuntimeCallOrInvoke(llvm::FunctionCallee callee, ArrayRef< llvm::Value * > args, const Twine &name="")
Address GetAddrOfLocalVar(const VarDecl *VD)
GetAddrOfLocalVar - Return the address of a local variable.
This class organizes the cross-function state that is used while generating LLVM code.
llvm::Module & getModule() const
llvm::FunctionCallee CreateRuntimeFunction(llvm::FunctionType *Ty, StringRef Name, llvm::AttributeList ExtraAttrs=llvm::AttributeList(), bool Local=false, bool AssumeConvergent=false)
Create or return a runtime function declaration with the specified type and name.
void addCompilerUsedGlobal(llvm::GlobalValue *GV)
Add a global to a list to be added to the llvm.compiler.used metadata.
const IntrusiveRefCntPtr< llvm::vfs::FileSystem > & getFileSystem() const
DiagnosticsEngine & getDiags() const
const LangOptions & getLangOpts() const
CodeGenTypes & getTypes()
const TargetInfo & getTarget() const
const llvm::DataLayout & getDataLayout() const
void Error(SourceLocation loc, StringRef error)
Emit a general error that something can't be done.
CGCXXABI & getCXXABI() const
const llvm::Triple & getTriple() const
ASTContext & getContext() const
const CodeGenOptions & getCodeGenOpts() const
StringRef getMangledName(GlobalDecl GD)
void maybeSetTrivialComdat(const Decl &D, llvm::GlobalObject &GO)
void printPostfixForExternalizedDecl(llvm::raw_ostream &OS, const Decl *D) const
Print the postfix for externalized static variable or kernels for single source offloading languages ...
llvm::Type * ConvertType(QualType T)
ConvertType - Convert type T into a llvm::Type.
const CGFunctionInfo & arrangeFunctionDeclaration(const FunctionDecl *FD)
Free functions are functions that are compatible with an ordinary C function pointer type.
The standard implementation of ConstantInitBuilder used in Clang.
FunctionArgList - Type for representing both the decl and type of parameters to a function.
static RValue get(llvm::Value *V)
static RValue getAggregate(Address addr, bool isVolatile=false)
Convert an Address to an RValue.
ReturnValueSlot - Contains the address where the return value of a function can be stored,...
DeclContext - This is used only as base class of specific decl types that can act as declaration cont...
lookup_result lookup(DeclarationName Name) const
lookup - Find the declarations (if any) with the given Name in this context.
Decl - This represents one declaration (or definition), e.g.
SourceLocation getLocation() const
DiagnosticBuilder Report(SourceLocation Loc, unsigned DiagID)
Issue the message to the client.
Represents a function declaration or definition.
const ParmVarDecl * getParamDecl(unsigned i) const
GlobalDecl - represents a global declaration.
GlobalDecl getWithKernelReferenceKind(KernelReferenceKind Kind)
const Decl * getDecl() const
One of these records is kept for each identifier that is lexed.
StringRef getName() const
Return the actual identifier string.
IdentifierInfo & get(StringRef Name)
Return the identifier token info for the specified named identifier.
std::string CUID
The user provided compilation unit ID, if non-empty.
GPUDefaultStreamKind GPUDefaultStream
The default stream kind used for HIP kernel launching.
MangleContext - Context for tracking state which persists across multiple calls to the C++ name mangl...
bool shouldMangleDeclName(const NamedDecl *D)
void mangleName(GlobalDecl GD, raw_ostream &)
This represents a decl that may have a name.
IdentifierInfo * getIdentifier() const
Get the identifier that names this declaration, if there is one.
Represents a parameter to a function.
A (possibly-)qualified type.
QualType getCanonicalType() const
A helper class that allows the use of isa/cast/dyncast to detect TagType objects of structs/unions/cl...
RecordDecl * getDecl() const
bool isMicrosoft() const
Is this ABI an MSVC-compatible ABI?
bool isItaniumFamily() const
Does this ABI generally fall into the Itanium family of ABIs?
TargetCXXABI getCXXABI() const
Get the C++ ABI currently in use.
const llvm::VersionTuple & getSDKVersion() const
A template argument list.
unsigned size() const
Retrieve the number of template arguments in this template argument list.
The top declaration context.
static DeclContext * castToDeclContext(const TranslationUnitDecl *D)
The base class of the type hierarchy.
Represents a variable declaration or definition.
CGCUDARuntime * CreateNVCUDARuntime(CodeGenModule &CGM)
Creates an instance of a CUDA runtime class.
bool Zero(InterpState &S, CodePtr OpPC)
The JSON file list parser is used to communicate input to InstallAPI.
CudaVersion ToCudaVersion(llvm::VersionTuple)
bool CudaFeatureEnabled(llvm::VersionTuple, CudaFeature)
Linkage
Describes the different kinds of linkage (C++ [basic.link], C99 6.2.2) that an entity may have.
llvm::IntegerType * Int8Ty
i8, i16, i32, and i64
llvm::IntegerType * SizeTy
llvm::IntegerType * IntTy
int
CharUnits getSizeAlign() const
llvm::PointerType * UnqualPtrTy
CharUnits getPointerAlign() const
RetroSearch is an open source project built by @garambo | Open a GitHub Issue
Search and Browse the WWW like it's 1997 | Search results from DuckDuckGo
HTML:
3.2
| Encoding:
UTF-8
| Version:
0.7.4