A RetroSearch Logo

Home - News ( United States | United Kingdom | Italy | Germany ) - Football scores

Search Query:

Showing content from https://clang.llvm.org/doxygen/CGCUDANV_8cpp_source.html below:

clang: lib/CodeGen/CGCUDANV.cpp Source File

23#include "llvm/ADT/StringRef.h" 24#include "llvm/Frontend/Offloading/Utility.h" 25#include "llvm/IR/BasicBlock.h" 26#include "llvm/IR/Constants.h" 27#include "llvm/IR/DerivedTypes.h" 28#include "llvm/IR/ReplaceConstant.h" 29#include "llvm/Support/Format.h" 30#include "llvm/Support/VirtualFileSystem.h" 32using namespace clang

;

33using namespace

CodeGen;

36constexpr unsigned

CudaFatMagic = 0x466243b1;

37constexpr unsigned

HIPFatMagic = 0x48495046;

44

StringRef SectionPrefix;

47

llvm::IntegerType *IntTy, *SizeTy;

49

llvm::PointerType *PtrTy;

52

llvm::LLVMContext &Context;

54

llvm::Module &TheModule;

64

llvm::DenseMap<StringRef, llvm::GlobalValue *> KernelHandles;

66

llvm::DenseMap<llvm::GlobalValue *, llvm::Function *> KernelStubs;

68

llvm::GlobalVariable *Var;

76

llvm::GlobalVariable *GpuBinaryHandle =

nullptr

;

78 bool

RelocatableDeviceCode;

80

std::unique_ptr<MangleContext> DeviceMC;

82

llvm::FunctionCallee getSetupArgumentFn()

const

;

83

llvm::FunctionCallee getLaunchFn()

const

;

85

llvm::FunctionType *getRegisterGlobalsFnTy()

const

;

86

llvm::FunctionType *getCallbackFnTy()

const

;

87

llvm::FunctionType *getRegisterLinkedBinaryFnTy()

const

;

88

std::string addPrefixToName(StringRef FuncName)

const

;

89

std::string addUnderscoredPrefixToName(StringRef FuncName)

const

;

92

llvm::Function *makeRegisterGlobalsFn();

97

llvm::Constant *makeConstantString(

const

std::string &Str,

98 const

std::string &Name =

""

) {

99 return

CGM.GetAddrOfConstantCString(Str, Name.c_str()).getPointer();

105

llvm::Constant *makeConstantArray(StringRef Str,

106

StringRef Name =

""

,

107

StringRef SectionName =

""

,

108 unsigned

Alignment = 0,

109 bool

AddNull =

false

) {

110

llvm::Constant *

Value

=

111

llvm::ConstantDataArray::getString(Context, Str, AddNull);

112 auto

*GV =

new

llvm::GlobalVariable(

114

llvm::GlobalValue::PrivateLinkage,

Value

, Name);

115 if

(!SectionName.empty()) {

116

GV->setSection(SectionName);

119

GV->setUnnamedAddr(llvm::GlobalValue::UnnamedAddr::None);

122

GV->setAlignment(llvm::Align(Alignment));

127

llvm::Function *makeDummyFunction(llvm::FunctionType *FnTy) {

128

assert(FnTy->getReturnType()->isVoidTy() &&

129 "Can only generate dummy functions returning void!"

);

130

llvm::Function *DummyFunc = llvm::Function::Create(

131

FnTy, llvm::GlobalValue::InternalLinkage,

"dummy"

, &TheModule);

133

llvm::BasicBlock *DummyBlock =

134

llvm::BasicBlock::Create(Context,

""

, DummyFunc);

136

FuncBuilder.SetInsertPoint(DummyBlock);

137

FuncBuilder.CreateRetVoid();

149 void

registerDeviceVar(

const VarDecl

*VD, llvm::GlobalVariable &Var,

150 bool

Extern,

bool

Constant) {

151

DeviceVars.push_back({&Var,

154

VD->hasAttr<HIPManagedAttr>(),

157 void

registerDeviceSurf(

const VarDecl

*VD, llvm::GlobalVariable &Var,

158 bool

Extern,

int Type

) {

159

DeviceVars.push_back({&Var,

165 void

registerDeviceTex(

const VarDecl

*VD, llvm::GlobalVariable &Var,

166 bool

Extern,

int Type

,

bool

Normalized) {

167

DeviceVars.push_back({&Var,

170 false

, Normalized,

Type

}});

174

llvm::Function *makeModuleCtorFunction();

176

llvm::Function *makeModuleDtorFunction();

178 void

transformManagedVars();

180 void

createOffloadingEntries();

186

llvm::Function *

getKernelStub

(llvm::GlobalValue *Handle)

override

{

187 auto Loc

= KernelStubs.find(Handle);

188

assert(

Loc

!= KernelStubs.end());

193

llvm::GlobalVariable &Var)

override

;

196

llvm::GlobalValue::LinkageTypes &

Linkage

)

override

;

203

std::string CGNVCUDARuntime::addPrefixToName(StringRef FuncName)

const

{

204 return

(Prefix + FuncName).str();

207

CGNVCUDARuntime::addUnderscoredPrefixToName(StringRef FuncName)

const

{

208 return

(

"__"

+ Prefix + FuncName).str();

218 return

std::unique_ptr<MangleContext>(

229

TheModule(CGM.getModule()),

230

RelocatableDeviceCode(CGM.getLangOpts().GPURelocatableDeviceCode),

239

SectionPrefix =

"omp"

;

241

SectionPrefix = Prefix =

"hip"

;

243

SectionPrefix = Prefix =

"cuda"

;

246

llvm::FunctionCallee CGNVCUDARuntime::getSetupArgumentFn()

const

{

248

llvm::Type *Params[] = {PtrTy, SizeTy, SizeTy};

250

llvm::FunctionType::get(IntTy, Params,

false

),

251

addPrefixToName(

"SetupArgument"

));

254

llvm::FunctionCallee CGNVCUDARuntime::getLaunchFn()

const

{

258

llvm::FunctionType::get(IntTy, PtrTy,

false

),

"hipLaunchByPtr"

);

265

llvm::FunctionType *CGNVCUDARuntime::getRegisterGlobalsFnTy()

const

{

266 return

llvm::FunctionType::get(VoidTy, PtrTy,

false

);

269

llvm::FunctionType *CGNVCUDARuntime::getCallbackFnTy()

const

{

270 return

llvm::FunctionType::get(VoidTy, PtrTy,

false

);

273

llvm::FunctionType *CGNVCUDARuntime::getRegisterLinkedBinaryFnTy()

const

{

274

llvm::Type *Params[] = {llvm::PointerType::getUnqual(Context), PtrTy, PtrTy,

275

llvm::PointerType::getUnqual(Context)};

276 return

llvm::FunctionType::get(VoidTy, Params,

false

);

279

std::string CGNVCUDARuntime::getDeviceSideName(

const NamedDecl

*ND) {

282 if

(

auto

*FD = dyn_cast<FunctionDecl>(ND))

283

GD =

GlobalDecl

(FD, KernelReferenceKind::Kernel);

286

std::string DeviceSideName;

294

llvm::raw_svector_ostream Out(Buffer);

296

DeviceSideName = std::string(Out.str());

304

llvm::raw_svector_ostream Out(Buffer);

305

Out << DeviceSideName;

307

DeviceSideName = std::string(Out.str());

309 return

DeviceSideName;

316

dyn_cast<llvm::GlobalVariable>(KernelHandles[CGF.

CurFn

->getName()])) {

317

GV->setLinkage(CGF.

CurFn

->getLinkage());

318

GV->setInitializer(CGF.

CurFn

);

321

CudaFeature::CUDA_USES_NEW_LAUNCH) ||

324

emitDeviceStubBodyNew(CGF, Args);

326

emitDeviceStubBodyLegacy(CGF, Args);

339 for

(

auto

&Arg : Args)

341

llvm::StructType *KernelArgsTy = llvm::StructType::create(ArgTypes);

343 auto

*Int64Ty = CGF.

Builder

.getInt64Ty();

344

KernelLaunchParamsTypes.push_back(Int64Ty);

345

KernelLaunchParamsTypes.push_back(PtrTy);

346

KernelLaunchParamsTypes.push_back(PtrTy);

348

llvm::StructType *KernelLaunchParamsTy =

349

llvm::StructType::create(KernelLaunchParamsTypes);

354 "kernel_launch_params"

);

356 auto

KernelArgsSize = CGM.

getDataLayout

().getTypeAllocSize(KernelArgsTy);

364 for

(

unsigned

i = 0; i < Args.size(); ++i) {

369 return

KernelLaunchParams;

379

llvm::ConstantInt::get(SizeTy, std::max<size_t>(1, Args.size())));

381 for

(

unsigned

i = 0; i < Args.size(); ++i) {

383

llvm::Value *VoidVarPtr = CGF.

Builder

.CreatePointerCast(VarPtr, PtrTy);

385

VoidVarPtr, CGF.

Builder

.CreateConstGEP1_32(

397

? prepareKernelArgsLLVMOffload(CGF, Args)

398

: prepareKernelArgs(CGF, Args);

414

std::string KernelLaunchAPI =

"LaunchKernel"

;

416

LangOptions::GPUDefaultStreamKind::PerThread) {

418

KernelLaunchAPI = KernelLaunchAPI +

"_spt"

;

420

KernelLaunchAPI = KernelLaunchAPI +

"_ptsz"

;

422 auto

LaunchKernelName = addPrefixToName(KernelLaunchAPI);

426 for

(

auto

*Result : DC->

lookup

(&cudaLaunchKernelII)) {

428

cudaLaunchKernelFD = FD;

431 if

(cudaLaunchKernelFD ==

nullptr

) {

433 "Can't find declaration for "

+ LaunchKernelName);

447

llvm::FunctionType::get(IntTy,

453

addUnderscoredPrefixToName(

"PopCallConfiguration"

));

462

CGF.

Builder

.CreatePointerCast(KernelHandles[CGF.

CurFn

->getName()], PtrTy);

478

llvm::FunctionType *FTy = cast<llvm::FunctionType>(Ty);

482

llvm::FunctionCallee cudaLaunchKernelFn =

492

llvm::Function *KernelFunction = llvm::cast<llvm::Function>(

Kernel

);

493

std::string GlobalVarName = (KernelFunction->getName() +

".id"

).str();

495

llvm::GlobalVariable *HandleVar =

496

CGM.

getModule

().getNamedGlobal(GlobalVarName);

498

HandleVar =

new

llvm::GlobalVariable(

500 false

, KernelFunction->getLinkage(),

501

llvm::ConstantInt::get(CGM.

Int8Ty

, 0), GlobalVarName);

502

HandleVar->setDSOLocal(KernelFunction->isDSOLocal());

503

HandleVar->setVisibility(KernelFunction->getVisibility());

504 if

(KernelFunction->hasComdat())

505

HandleVar->setComdat(CGM.

getModule

().getOrInsertComdat(GlobalVarName));

521

llvm::FunctionCallee cudaSetupArgFn = getSetupArgumentFn();

524 for

(

const VarDecl

*A : Args) {

526

Offset = Offset.alignTo(TInfo.Align);

527

llvm::Value *Args[] = {

528

CGF.

Builder

.CreatePointerCast(

530

llvm::ConstantInt::get(SizeTy, TInfo.Width.getQuantity()),

531

llvm::ConstantInt::get(SizeTy, Offset.getQuantity()),

534

llvm::Constant *

Zero

= llvm::ConstantInt::get(IntTy, 0);

535

llvm::Value *CBZero = CGF.

Builder

.CreateICmpEQ(CB, Zero);

537

CGF.

Builder

.CreateCondBr(CBZero, NextBlock, EndBlock);

539

Offset += TInfo.Width;

543

llvm::FunctionCallee cudaLaunchFn = getLaunchFn();

545

CGF.

Builder

.CreatePointerCast(KernelHandles[CGF.

CurFn

->getName()], PtrTy);

555

llvm::GlobalVariable *ManagedVar) {

557 for

(

auto

&&VarUse : Var->uses()) {

558

WorkList.push_back({VarUse.getUser()});

560 while

(!WorkList.empty()) {

561 auto

&&WorkItem = WorkList.pop_back_val();

562 auto

*

U

= WorkItem.back();

563 if

(isa<llvm::ConstantExpr>(

U

)) {

564 for

(

auto

&&UU :

U

->uses()) {

565

WorkItem.push_back(UU.getUser());

566

WorkList.push_back(WorkItem);

571 if

(

auto

*I = dyn_cast<llvm::Instruction>(

U

)) {

572

llvm::Value *OldV = Var;

573

llvm::Instruction *NewV =

new

llvm::LoadInst(

574

Var->getType(), ManagedVar,

"ld.managed"

,

false

,

575

llvm::Align(Var->getAlignment()), I->getIterator());

579 for

(

auto

&&Op : WorkItem) {

580 auto

*CE = cast<llvm::ConstantExpr>(Op);

581 auto

*NewInst = CE->getAsInstruction();

582

NewInst->insertBefore(*I->getParent(), I->getIterator());

583

NewInst->replaceUsesOfWith(OldV, NewV);

587

I->replaceUsesOfWith(OldV, NewV);

589

llvm_unreachable(

"Invalid use of managed variable"

);

608

llvm::Function *CGNVCUDARuntime::makeRegisterGlobalsFn() {

610 if

(EmittedKernels.empty() && DeviceVars.empty())

613

llvm::Function *RegisterKernelsFunc = llvm::Function::Create(

614

getRegisterGlobalsFnTy(), llvm::GlobalValue::InternalLinkage,

615

addUnderscoredPrefixToName(

"_register_globals"

), &TheModule);

616

llvm::BasicBlock *EntryBB =

617

llvm::BasicBlock::Create(Context,

"entry"

, RegisterKernelsFunc);

619

Builder.SetInsertPoint(EntryBB);

623

llvm::Type *RegisterFuncParams[] = {

624

PtrTy, PtrTy, PtrTy, PtrTy, IntTy,

625

PtrTy, PtrTy, PtrTy, PtrTy, llvm::PointerType::getUnqual(Context)};

627

llvm::FunctionType::get(IntTy, RegisterFuncParams,

false

),

628

addUnderscoredPrefixToName(

"RegisterFunction"

));

633

llvm::Argument &GpuBinaryHandlePtr = *RegisterKernelsFunc->arg_begin();

634 for

(

auto

&&I : EmittedKernels) {

635

llvm::Constant *KernelName =

636

makeConstantString(getDeviceSideName(cast<NamedDecl>(I.D)));

637

llvm::Constant *NullPtr = llvm::ConstantPointerNull::get(PtrTy);

638

llvm::Value *Args[] = {

640

KernelHandles[I.Kernel->getName()],

643

llvm::ConstantInt::get(IntTy, -1),

648

llvm::ConstantPointerNull::get(llvm::PointerType::getUnqual(Context))};

649

Builder.CreateCall(RegisterFunc, Args);

652

llvm::Type *VarSizeTy = IntTy;

660

llvm::Type *RegisterVarParams[] = {PtrTy, PtrTy, PtrTy, PtrTy,

661

IntTy, VarSizeTy, IntTy, IntTy};

663

llvm::FunctionType::get(VoidTy, RegisterVarParams,

false

),

664

addUnderscoredPrefixToName(

"RegisterVar"

));

667

llvm::Type *RegisterManagedVarParams[] = {PtrTy, PtrTy, PtrTy,

668

PtrTy, VarSizeTy, IntTy};

670

llvm::FunctionType::get(VoidTy, RegisterManagedVarParams,

false

),

671

addUnderscoredPrefixToName(

"RegisterManagedVar"

));

675

llvm::FunctionType::get(

676

VoidTy, {PtrTy, PtrTy, PtrTy, PtrTy, IntTy, IntTy},

false

),

677

addUnderscoredPrefixToName(

"RegisterSurface"

));

681

llvm::FunctionType::get(

682

VoidTy, {PtrTy, PtrTy, PtrTy, PtrTy, IntTy, IntTy, IntTy},

false

),

683

addUnderscoredPrefixToName(

"RegisterTexture"

));

684 for

(

auto

&&Info : DeviceVars) {

685

llvm::GlobalVariable *Var = Info.Var;

686

assert((!Var->isDeclaration() || Info.Flags.isManaged()) &&

687 "External variables should not show up here, except HIP managed " 689

llvm::Constant *VarName = makeConstantString(getDeviceSideName(Info.D));

690 switch

(Info.Flags.getKind()) {

691 case

DeviceVarFlags::Variable: {

694 if

(Info.Flags.isManaged()) {

695

assert(Var->getName().ends_with(

".managed"

) &&

696 "HIP managed variables not transformed"

);

697 auto

*ManagedVar = CGM.

getModule

().getNamedGlobal(

698

Var->getName().drop_back(StringRef(

".managed"

).size()));

699

llvm::Value *Args[] = {

704

llvm::ConstantInt::get(VarSizeTy, VarSize),

705

llvm::ConstantInt::get(IntTy, Var->getAlignment())};

706 if

(!Var->isDeclaration())

707

Builder.CreateCall(RegisterManagedVar, Args);

709

llvm::Value *Args[] = {

714

llvm::ConstantInt::get(IntTy, Info.Flags.isExtern()),

715

llvm::ConstantInt::get(VarSizeTy, VarSize),

716

llvm::ConstantInt::get(IntTy, Info.Flags.isConstant()),

717

llvm::ConstantInt::get(IntTy, 0)};

718

Builder.CreateCall(RegisterVar, Args);

722 case

DeviceVarFlags::Surface:

725

{&GpuBinaryHandlePtr, Var, VarName, VarName,

726

llvm::ConstantInt::get(IntTy, Info.Flags.getSurfTexType()),

727

llvm::ConstantInt::get(IntTy, Info.Flags.isExtern())});

729 case

DeviceVarFlags::Texture:

732

{&GpuBinaryHandlePtr, Var, VarName, VarName,

733

llvm::ConstantInt::get(IntTy, Info.Flags.getSurfTexType()),

734

llvm::ConstantInt::get(IntTy, Info.Flags.isNormalized()),

735

llvm::ConstantInt::get(IntTy, Info.Flags.isExtern())});

740

Builder.CreateRetVoid();

741 return

RegisterKernelsFunc;

763

llvm::Function *CGNVCUDARuntime::makeModuleCtorFunction() {

768 if

(CudaGpuBinaryFileName.empty() && !IsHIP)

770 if

((IsHIP || (IsCUDA && !RelocatableDeviceCode)) && EmittedKernels.empty() &&

775

llvm::Function *RegisterGlobalsFunc = makeRegisterGlobalsFn();

778 if

(RelocatableDeviceCode && !RegisterGlobalsFunc)

779

RegisterGlobalsFunc = makeDummyFunction(getRegisterGlobalsFnTy());

783

llvm::FunctionType::get(PtrTy, PtrTy,

false

),

784

addUnderscoredPrefixToName(

"RegisterFatBinary"

));

786

llvm::StructType *FatbinWrapperTy =

787

llvm::StructType::get(IntTy, IntTy, PtrTy, PtrTy);

793

std::unique_ptr<llvm::MemoryBuffer> CudaGpuBinary =

nullptr

;

794 if

(!CudaGpuBinaryFileName.empty()) {

796 auto

CudaGpuBinaryOrErr =

797 VFS

->getBufferForFile(CudaGpuBinaryFileName, -1,

false

);

798 if

(std::error_code EC = CudaGpuBinaryOrErr.getError()) {

800

<< CudaGpuBinaryFileName << EC.message();

803

CudaGpuBinary = std::move(CudaGpuBinaryOrErr.get());

806

llvm::Function *ModuleCtorFunc = llvm::Function::Create(

807

llvm::FunctionType::get(VoidTy,

false

),

808

llvm::GlobalValue::InternalLinkage,

809

addUnderscoredPrefixToName(

"_module_ctor"

), &TheModule);

810

llvm::BasicBlock *CtorEntryBB =

811

llvm::BasicBlock::Create(Context,

"entry"

, ModuleCtorFunc);

814

CtorBuilder.SetInsertPoint(CtorEntryBB);

816 const char

*FatbinConstantName;

817 const char

*FatbinSectionName;

818 const char

*ModuleIDSectionName;

819

StringRef ModuleIDPrefix;

820

llvm::Constant *FatBinStr;

823

FatbinConstantName =

".hip_fatbin"

;

824

FatbinSectionName =

".hipFatBinSegment"

;

826

ModuleIDSectionName =

"__hip_module_id"

;

827

ModuleIDPrefix =

"__hip_"

;

832 const unsigned

HIPCodeObjectAlign = 4096;

833

FatBinStr = makeConstantArray(std::string(CudaGpuBinary->getBuffer()),

""

,

834

FatbinConstantName, HIPCodeObjectAlign);

840

FatBinStr =

new

llvm::GlobalVariable(

842 true

, llvm::GlobalValue::ExternalLinkage,

nullptr

,

846 nullptr

, llvm::GlobalVariable::NotThreadLocal);

847

cast<llvm::GlobalVariable>(FatBinStr)->setSection(FatbinConstantName);

850

FatMagic = HIPFatMagic;

852 if

(RelocatableDeviceCode)

853

FatbinConstantName = CGM.

getTriple

().isMacOSX()

854

?

"__NV_CUDA,__nv_relfatbin" 858

CGM.

getTriple

().isMacOSX() ?

"__NV_CUDA,__nv_fatbin"

:

".nv_fatbin"

;

861

CGM.

getTriple

().isMacOSX() ?

"__NV_CUDA,__fatbin"

:

".nvFatBinSegment"

;

863

ModuleIDSectionName = CGM.

getTriple

().isMacOSX()

864

?

"__NV_CUDA,__nv_module_id" 866

ModuleIDPrefix =

"__nv_"

;

870

FatBinStr = makeConstantArray(std::string(CudaGpuBinary->getBuffer()),

""

,

871

FatbinConstantName, 8);

872

FatMagic = CudaFatMagic;

877 auto

Values = Builder.beginStruct(FatbinWrapperTy);

879

Values.addInt(IntTy, FatMagic);

881

Values.addInt(IntTy, 1);

883

Values.add(FatBinStr);

885

Values.add(llvm::ConstantPointerNull::get(PtrTy));

886

llvm::GlobalVariable *FatbinWrapper = Values.finishAndCreateGlobal(

887

addUnderscoredPrefixToName(

"_fatbin_wrapper"

), CGM.

getPointerAlign

(),

889

FatbinWrapper->setSection(FatbinSectionName);

899 auto Linkage

= RelocatableDeviceCode ? llvm::GlobalValue::ExternalLinkage

900

: llvm::GlobalValue::InternalLinkage;

901

llvm::BasicBlock *IfBlock =

902

llvm::BasicBlock::Create(Context,

"if"

, ModuleCtorFunc);

903

llvm::BasicBlock *ExitBlock =

904

llvm::BasicBlock::Create(Context,

"exit"

, ModuleCtorFunc);

907

GpuBinaryHandle =

new

llvm::GlobalVariable(

908

TheModule, PtrTy,

false

,

Linkage

,

910

!RelocatableDeviceCode ? llvm::ConstantPointerNull::get(PtrTy)

917 if

(

Linkage

!= llvm::GlobalValue::InternalLinkage)

918

GpuBinaryHandle->setVisibility(llvm::GlobalValue::HiddenVisibility);

920

GpuBinaryHandle, PtrTy,

923 auto

*HandleValue = CtorBuilder.CreateLoad(GpuBinaryAddr);

924

llvm::Constant *

Zero

=

925

llvm::Constant::getNullValue(HandleValue->getType());

926

llvm::Value *EQZero = CtorBuilder.CreateICmpEQ(HandleValue, Zero);

927

CtorBuilder.CreateCondBr(EQZero, IfBlock, ExitBlock);

930

CtorBuilder.SetInsertPoint(IfBlock);

932

llvm::CallInst *RegisterFatbinCall =

933

CtorBuilder.CreateCall(RegisterFatbinFunc, FatbinWrapper);

934

CtorBuilder.CreateStore(RegisterFatbinCall, GpuBinaryAddr);

935

CtorBuilder.CreateBr(ExitBlock);

938

CtorBuilder.SetInsertPoint(ExitBlock);

940 if

(RegisterGlobalsFunc) {

941 auto

*HandleValue = CtorBuilder.CreateLoad(GpuBinaryAddr);

942

CtorBuilder.CreateCall(RegisterGlobalsFunc, HandleValue);

945

}

else if

(!RelocatableDeviceCode) {

949

llvm::CallInst *RegisterFatbinCall =

950

CtorBuilder.CreateCall(RegisterFatbinFunc, FatbinWrapper);

951

GpuBinaryHandle =

new

llvm::GlobalVariable(

952

TheModule, PtrTy,

false

, llvm::GlobalValue::InternalLinkage,

953

llvm::ConstantPointerNull::get(PtrTy),

"__cuda_gpubin_handle"

);

955

CtorBuilder.CreateAlignedStore(RegisterFatbinCall, GpuBinaryHandle,

959 if

(RegisterGlobalsFunc)

960

CtorBuilder.CreateCall(RegisterGlobalsFunc, RegisterFatbinCall);

964

CudaFeature::CUDA_USES_FATBIN_REGISTER_END)) {

967

llvm::FunctionType::get(VoidTy, PtrTy,

false

),

968 "__cudaRegisterFatBinaryEnd"

);

969

CtorBuilder.CreateCall(RegisterFatbinEndFunc, RegisterFatbinCall);

974

llvm::raw_svector_ostream OS(ModuleID);

975

OS << ModuleIDPrefix << llvm::format(

"%"

PRIx64, FatbinWrapper->getGUID());

976

llvm::Constant *ModuleIDConstant = makeConstantArray(

977

std::string(ModuleID),

""

, ModuleIDSectionName, 32,

true

);

980

llvm::GlobalAlias::create(llvm::GlobalValue::ExternalLinkage,

981

Twine(

"__fatbinwrap"

) + ModuleID, FatbinWrapper);

986

RegisterLinkedBinaryName += ModuleID;

988

getRegisterLinkedBinaryFnTy(), RegisterLinkedBinaryName);

990

assert(RegisterGlobalsFunc &&

"Expecting at least dummy function!"

);

991

llvm::Value *Args[] = {RegisterGlobalsFunc, FatbinWrapper, ModuleIDConstant,

992

makeDummyFunction(getCallbackFnTy())};

993

CtorBuilder.CreateCall(RegisterLinkedBinaryFunc, Args);

999 if

(llvm::Function *CleanupFn = makeModuleDtorFunction()) {

1001

llvm::FunctionType *AtExitTy =

1002

llvm::FunctionType::get(IntTy, CleanupFn->getType(),

false

);

1003

llvm::FunctionCallee AtExitFunc =

1006

CtorBuilder.CreateCall(AtExitFunc, CleanupFn);

1009

CtorBuilder.CreateRetVoid();

1010 return

ModuleCtorFunc;

1032

llvm::Function *CGNVCUDARuntime::makeModuleDtorFunction() {

1034 if

(!GpuBinaryHandle)

1039

llvm::FunctionType::get(VoidTy, PtrTy,

false

),

1040

addUnderscoredPrefixToName(

"UnregisterFatBinary"

));

1042

llvm::Function *ModuleDtorFunc = llvm::Function::Create(

1043

llvm::FunctionType::get(VoidTy,

false

),

1044

llvm::GlobalValue::InternalLinkage,

1045

addUnderscoredPrefixToName(

"_module_dtor"

), &TheModule);

1047

llvm::BasicBlock *DtorEntryBB =

1048

llvm::BasicBlock::Create(Context,

"entry"

, ModuleDtorFunc);

1050

DtorBuilder.SetInsertPoint(DtorEntryBB);

1053

GpuBinaryHandle, GpuBinaryHandle->getValueType(),

1055 auto

*HandleValue = DtorBuilder.CreateLoad(GpuBinaryAddr);

1060

llvm::BasicBlock *IfBlock =

1061

llvm::BasicBlock::Create(Context,

"if"

, ModuleDtorFunc);

1062

llvm::BasicBlock *ExitBlock =

1063

llvm::BasicBlock::Create(Context,

"exit"

, ModuleDtorFunc);

1064

llvm::Constant *

Zero

= llvm::Constant::getNullValue(HandleValue->getType());

1065

llvm::Value *NEZero = DtorBuilder.CreateICmpNE(HandleValue, Zero);

1066

DtorBuilder.CreateCondBr(NEZero, IfBlock, ExitBlock);

1068

DtorBuilder.SetInsertPoint(IfBlock);

1069

DtorBuilder.CreateCall(UnregisterFatbinFunc, HandleValue);

1070

DtorBuilder.CreateStore(Zero, GpuBinaryAddr);

1071

DtorBuilder.CreateBr(ExitBlock);

1073

DtorBuilder.SetInsertPoint(ExitBlock);

1075

DtorBuilder.CreateCall(UnregisterFatbinFunc, HandleValue);

1077

DtorBuilder.CreateRetVoid();

1078 return

ModuleDtorFunc;

1082 return new

CGNVCUDARuntime(CGM);

1085void

CGNVCUDARuntime::internalizeDeviceSideVar(

1102 if

(

D

->

hasAttr

<CUDADeviceAttr>() ||

D

->

hasAttr

<CUDAConstantAttr>() ||

1104 D

->getType()->isCUDADeviceBuiltinSurfaceType() ||

1105 D

->getType()->isCUDADeviceBuiltinTextureType()) {

1106 Linkage

= llvm::GlobalValue::InternalLinkage;

1110void

CGNVCUDARuntime::handleVarRegistration(

const VarDecl

*

D

,

1111

llvm::GlobalVariable &GV) {

1112 if

(

D

->

hasAttr

<CUDADeviceAttr>() ||

D

->

hasAttr

<CUDAConstantAttr>()) {

1126 if

((!

D

->hasExternalStorage() && !

D

->isInline()) ||

1129

registerDeviceVar(

D

, GV, !

D

->hasDefinition(),

1130 D

->

hasAttr

<CUDAConstantAttr>());

1132

}

else if

(

D

->getType()->isCUDADeviceBuiltinSurfaceType() ||

1133 D

->getType()->isCUDADeviceBuiltinTextureType()) {

1136 const auto

*TD = cast<ClassTemplateSpecializationDecl>(

1139 if

(TD->hasAttr<CUDADeviceBuiltinSurfaceTypeAttr>()) {

1140

assert(Args.

size

() == 2 &&

1141 "Unexpected number of template arguments of CUDA device " 1142 "builtin surface type."

);

1143 auto

SurfType = Args[1].getAsIntegral();

1144 if

(!

D

->hasExternalStorage())

1145

registerDeviceSurf(

D

, GV, !

D

->hasDefinition(), SurfType.getSExtValue());

1147

assert(Args.

size

() == 3 &&

1148 "Unexpected number of template arguments of CUDA device " 1149 "builtin texture type."

);

1150 auto

TexType = Args[1].getAsIntegral();

1151 auto

Normalized = Args[2].getAsIntegral();

1152 if

(!

D

->hasExternalStorage())

1153

registerDeviceTex(

D

, GV, !

D

->hasDefinition(), TexType.getSExtValue(),

1154

Normalized.getZExtValue());

1163void

CGNVCUDARuntime::transformManagedVars() {

1164 for

(

auto

&&Info : DeviceVars) {

1165

llvm::GlobalVariable *Var = Info.Var;

1166 if

(Info.Flags.getKind() == DeviceVarFlags::Variable &&

1167

Info.Flags.isManaged()) {

1168 auto

*ManagedVar =

new

llvm::GlobalVariable(

1170 false

, Var->getLinkage(),

1171

Var->isDeclaration()

1173

: llvm::ConstantPointerNull::get(Var->getType()),

1175

llvm::GlobalVariable::NotThreadLocal,

1177

? LangAS::cuda_device

1178

: LangAS::Default));

1179

ManagedVar->setDSOLocal(Var->isDSOLocal());

1180

ManagedVar->setVisibility(Var->getVisibility());

1181

ManagedVar->setExternallyInitialized(

true

);

1183

ManagedVar->takeName(Var);

1184

Var->setName(Twine(ManagedVar->getName()) +

".managed"

);

1187 if

(CGM.

getLangOpts

().CUDAIsDevice && !Var->isDeclaration()) {

1188

assert(!ManagedVar->isDeclaration());

1199void

CGNVCUDARuntime::createOffloadingEntries() {

1201

StringRef Section = (SectionPrefix +

"_offloading_entries"

).toStringRef(Out);

1203

? llvm::object::OffloadKind::OFK_HIP

1204

: llvm::object::OffloadKind::OFK_Cuda;

1207 for

(KernelInfo &I : EmittedKernels)

1208

llvm::offloading::emitOffloadingEntry(

1209

M, Kind, KernelHandles[I.Kernel->getName()],

1210

getDeviceSideName(cast<NamedDecl>(I.D)),

0,

0,

1211

llvm::offloading::OffloadGlobalEntry, Section);

1213 for

(VarInfo &I : DeviceVars) {

1215

CGM.

getDataLayout

().getTypeAllocSize(I.Var->getValueType());

1218

?

static_cast<int32_t>

(llvm::offloading::OffloadGlobalExtern)

1220

(I.Flags.isConstant()

1221

?

static_cast<int32_t>

(llvm::offloading::OffloadGlobalConstant)

1223

(I.Flags.isNormalized()

1224

?

static_cast<int32_t>

(llvm::offloading::OffloadGlobalNormalized)

1226 if

(I.Flags.getKind() == DeviceVarFlags::Variable) {

1227 if

(I.Flags.isManaged()) {

1228

assert(I.Var->getName().ends_with(

".managed"

) &&

1229 "HIP managed variables not transformed"

);

1231 auto

*ManagedVar = M.getNamedGlobal(

1232

I.Var->getName().drop_back(StringRef(

".managed"

).size()));

1233

llvm::offloading::emitOffloadingEntry(

1234

M, Kind, I.Var, getDeviceSideName(I.D), VarSize,

1235

llvm::offloading::OffloadGlobalManagedEntry | Flags,

1236

I.Var->getAlignment(), Section, ManagedVar);

1238

llvm::offloading::emitOffloadingEntry(

1239

M, Kind, I.Var, getDeviceSideName(I.D), VarSize,

1240

llvm::offloading::OffloadGlobalEntry | Flags,

1243

}

else if

(I.Flags.getKind() == DeviceVarFlags::Surface) {

1244

llvm::offloading::emitOffloadingEntry(

1245

M, Kind, I.Var, getDeviceSideName(I.D), VarSize,

1246

llvm::offloading::OffloadGlobalSurfaceEntry | Flags,

1247

I.Flags.getSurfTexType(), Section);

1248

}

else if

(I.Flags.getKind() == DeviceVarFlags::Texture) {

1249

llvm::offloading::emitOffloadingEntry(

1250

M, Kind, I.Var, getDeviceSideName(I.D), VarSize,

1251

llvm::offloading::OffloadGlobalTextureEntry | Flags,

1252

I.Flags.getSurfTexType(), Section);

1258

llvm::Function *CGNVCUDARuntime::finalizeModule() {

1259

transformManagedVars();

1271 for

(

auto

&&Info : DeviceVars) {

1272 auto Kind

= Info.Flags.getKind();

1273 if

(!Info.Var->isDeclaration() &&

1274

!llvm::GlobalValue::isLocalLinkage(Info.Var->getLinkage()) &&

1275

(Kind == DeviceVarFlags::Variable ||

1276

Kind == DeviceVarFlags::Surface ||

1277

Kind == DeviceVarFlags::Texture) &&

1278

Info.D->isUsed() && !Info.D->hasAttr<UsedAttr>()) {

1285

(CGM.

getLangOpts

().OffloadingNewDriver && RelocatableDeviceCode))

1286

createOffloadingEntries();

1288 return

makeModuleCtorFunction();

1293

llvm::GlobalValue *CGNVCUDARuntime::getKernelHandle(llvm::Function *F,

1295 auto Loc

= KernelHandles.find(F->getName());

1296 if

(

Loc

!= KernelHandles.end()) {

1297 auto

OldHandle =

Loc

->second;

1298 if

(KernelStubs[OldHandle] == F)

1306

KernelStubs[OldHandle] = F;

1311

KernelStubs.erase(OldHandle);

1315

KernelHandles[F->getName()] = F;

1320 auto

*Var =

new

llvm::GlobalVariable(

1321

TheModule, F->getType(),

true

, F->getLinkage(),

1326

Var->setDSOLocal(F->isDSOLocal());

1327

Var->setVisibility(F->getVisibility());

1328 auto

*FD = cast<FunctionDecl>(GD.

getDecl

());

1329 auto

*FT = FD->getPrimaryTemplate();

1330 if

(!FT || FT->isThisDeclarationADefinition())

1332

KernelHandles[F->getName()] = Var;

1333

KernelStubs[Var] = F;

static std::unique_ptr< MangleContext > InitDeviceMC(CodeGenModule &CGM)

static void replaceManagedVar(llvm::GlobalVariable *Var, llvm::GlobalVariable *ManagedVar)

TranslationUnitDecl * getTranslationUnitDecl() const

MangleContext * createMangleContext(const TargetInfo *T=nullptr)

If T is null pointer, assume the target in ASTContext.

bool shouldExternalize(const Decl *D) const

Whether a C++ static variable or CUDA/HIP kernel should be externalized.

StringRef getCUIDHash() const

const TargetInfo * getAuxTargetInfo() const

llvm::DenseSet< const VarDecl * > CUDADeviceVarODRUsedByHost

Keep track of CUDA/HIP device-side variables ODR-used by host code.

MangleContext * createDeviceMangleContext(const TargetInfo &T)

Creates a device mangle context to correctly mangle lambdas in a mixed architecture compile by settin...

TypeInfoChars getTypeInfoInChars(const Type *T) const

const TargetInfo & getTargetInfo() const

unsigned getTargetAddressSpace(LangAS AS) const

CharUnits - This is an opaque type for sizes expressed in character units.

llvm::Align getAsAlign() const

getAsAlign - Returns Quantity as a valid llvm::Align, Beware llvm::Align assumes power of two 8-bit b...

static CharUnits One()

One - Construct a CharUnits quantity of one.

static CharUnits fromQuantity(QuantityType Quantity)

fromQuantity - Construct a CharUnits quantity from a raw integer type.

static CharUnits Zero()

Zero - Construct a CharUnits quantity of zero.

std::string CudaGpuBinaryFileName

Name of file passed with -fcuda-include-gpubinary option to forward to CUDA runtime back-end for inco...

Like RawAddress, an abstract representation of an aligned address, but the pointer contained in this ...

llvm::Value * emitRawPointer(CodeGenFunction &CGF) const

Return the pointer contained in this class after authenticating it and adding offset to it if necessa...

llvm::PointerType * getType() const

Return the type of the pointer value.

llvm::StoreInst * CreateStore(llvm::Value *Val, Address Addr, bool IsVolatile=false)

llvm::StoreInst * CreateAlignedStore(llvm::Value *Val, llvm::Value *Addr, CharUnits Align, bool IsVolatile=false)

llvm::StoreInst * CreateDefaultAlignedStore(llvm::Value *Val, llvm::Value *Addr, bool IsVolatile=false)

Address CreateStructGEP(Address Addr, unsigned Index, const llvm::Twine &Name="")

llvm::LoadInst * CreateLoad(Address Addr, const llvm::Twine &Name="")

virtual std::string getDeviceSideName(const NamedDecl *ND)=0

Returns function or variable name on device side even if the current compilation is for host.

virtual void emitDeviceStub(CodeGenFunction &CGF, FunctionArgList &Args)=0

Emits a kernel launch stub.

virtual llvm::Function * getKernelStub(llvm::GlobalValue *Handle)=0

Get kernel stub by kernel handle.

virtual void handleVarRegistration(const VarDecl *VD, llvm::GlobalVariable &Var)=0

Check whether a variable is a device variable and register it if true.

virtual llvm::Function * finalizeModule()=0

Finalize generated LLVM module.

virtual llvm::GlobalValue * getKernelHandle(llvm::Function *Stub, GlobalDecl GD)=0

Get kernel handle by stub function.

virtual void internalizeDeviceSideVar(const VarDecl *D, llvm::GlobalValue::LinkageTypes &Linkage)=0

Adjust linkage of shadow variables in host compilation.

MangleContext & getMangleContext()

Gets the mangle context.

static CGCallee forDirect(llvm::Constant *functionPtr, const CGCalleeInfo &abstractInfo=CGCalleeInfo())

CGFunctionInfo - Class to encapsulate the information about a function definition.

CallArgList - Type for representing both the value and type of arguments in a call.

void add(RValue rvalue, QualType type)

CodeGenFunction - This class organizes the per-function state that is used while generating LLVM code...

RawAddress CreateTempAllocaWithoutCast(llvm::Type *Ty, CharUnits align, const Twine &Name="tmp", llvm::Value *ArraySize=nullptr)

llvm::BasicBlock * createBasicBlock(const Twine &name="", llvm::Function *parent=nullptr, llvm::BasicBlock *before=nullptr)

createBasicBlock - Create an LLVM basic block.

const LangOptions & getLangOpts() const

void EmitBlock(llvm::BasicBlock *BB, bool IsFinished=false)

EmitBlock - Emit the given block.

llvm::AllocaInst * CreateTempAlloca(llvm::Type *Ty, const Twine &Name="tmp", llvm::Value *ArraySize=nullptr)

CreateTempAlloca - This creates an alloca and inserts it into the entry block if ArraySize is nullptr...

llvm::Type * ConvertTypeForMem(QualType T)

RawAddress CreateMemTemp(QualType T, const Twine &Name="tmp", RawAddress *Alloca=nullptr)

CreateMemTemp - Create a temporary memory object of the given type, with appropriate alignmen and cas...

void EmitBranch(llvm::BasicBlock *Block)

EmitBranch - Emit a branch to the specified basic block from the current insert block,...

RValue EmitCall(const CGFunctionInfo &CallInfo, const CGCallee &Callee, ReturnValueSlot ReturnValue, const CallArgList &Args, llvm::CallBase **CallOrInvoke, bool IsMustTail, SourceLocation Loc, bool IsVirtualFunctionPointerThunk=false)

EmitCall - Generate a call of the given function, expecting the given result type,...

const Decl * CurFuncDecl

CurFuncDecl - Holds the Decl for the current outermost non-closure context.

llvm::CallBase * EmitRuntimeCallOrInvoke(llvm::FunctionCallee callee, ArrayRef< llvm::Value * > args, const Twine &name="")

Address GetAddrOfLocalVar(const VarDecl *VD)

GetAddrOfLocalVar - Return the address of a local variable.

This class organizes the cross-function state that is used while generating LLVM code.

llvm::Module & getModule() const

llvm::FunctionCallee CreateRuntimeFunction(llvm::FunctionType *Ty, StringRef Name, llvm::AttributeList ExtraAttrs=llvm::AttributeList(), bool Local=false, bool AssumeConvergent=false)

Create or return a runtime function declaration with the specified type and name.

void addCompilerUsedGlobal(llvm::GlobalValue *GV)

Add a global to a list to be added to the llvm.compiler.used metadata.

const IntrusiveRefCntPtr< llvm::vfs::FileSystem > & getFileSystem() const

DiagnosticsEngine & getDiags() const

const LangOptions & getLangOpts() const

CodeGenTypes & getTypes()

const TargetInfo & getTarget() const

const llvm::DataLayout & getDataLayout() const

void Error(SourceLocation loc, StringRef error)

Emit a general error that something can't be done.

CGCXXABI & getCXXABI() const

const llvm::Triple & getTriple() const

ASTContext & getContext() const

const CodeGenOptions & getCodeGenOpts() const

StringRef getMangledName(GlobalDecl GD)

void maybeSetTrivialComdat(const Decl &D, llvm::GlobalObject &GO)

void printPostfixForExternalizedDecl(llvm::raw_ostream &OS, const Decl *D) const

Print the postfix for externalized static variable or kernels for single source offloading languages ...

llvm::Type * ConvertType(QualType T)

ConvertType - Convert type T into a llvm::Type.

const CGFunctionInfo & arrangeFunctionDeclaration(const FunctionDecl *FD)

Free functions are functions that are compatible with an ordinary C function pointer type.

The standard implementation of ConstantInitBuilder used in Clang.

FunctionArgList - Type for representing both the decl and type of parameters to a function.

static RValue get(llvm::Value *V)

static RValue getAggregate(Address addr, bool isVolatile=false)

Convert an Address to an RValue.

ReturnValueSlot - Contains the address where the return value of a function can be stored,...

DeclContext - This is used only as base class of specific decl types that can act as declaration cont...

lookup_result lookup(DeclarationName Name) const

lookup - Find the declarations (if any) with the given Name in this context.

Decl - This represents one declaration (or definition), e.g.

SourceLocation getLocation() const

DiagnosticBuilder Report(SourceLocation Loc, unsigned DiagID)

Issue the message to the client.

Represents a function declaration or definition.

const ParmVarDecl * getParamDecl(unsigned i) const

GlobalDecl - represents a global declaration.

GlobalDecl getWithKernelReferenceKind(KernelReferenceKind Kind)

const Decl * getDecl() const

One of these records is kept for each identifier that is lexed.

StringRef getName() const

Return the actual identifier string.

IdentifierInfo & get(StringRef Name)

Return the identifier token info for the specified named identifier.

std::string CUID

The user provided compilation unit ID, if non-empty.

GPUDefaultStreamKind GPUDefaultStream

The default stream kind used for HIP kernel launching.

MangleContext - Context for tracking state which persists across multiple calls to the C++ name mangl...

bool shouldMangleDeclName(const NamedDecl *D)

void mangleName(GlobalDecl GD, raw_ostream &)

This represents a decl that may have a name.

IdentifierInfo * getIdentifier() const

Get the identifier that names this declaration, if there is one.

Represents a parameter to a function.

A (possibly-)qualified type.

QualType getCanonicalType() const

A helper class that allows the use of isa/cast/dyncast to detect TagType objects of structs/unions/cl...

RecordDecl * getDecl() const

bool isMicrosoft() const

Is this ABI an MSVC-compatible ABI?

bool isItaniumFamily() const

Does this ABI generally fall into the Itanium family of ABIs?

TargetCXXABI getCXXABI() const

Get the C++ ABI currently in use.

const llvm::VersionTuple & getSDKVersion() const

A template argument list.

unsigned size() const

Retrieve the number of template arguments in this template argument list.

The top declaration context.

static DeclContext * castToDeclContext(const TranslationUnitDecl *D)

The base class of the type hierarchy.

Represents a variable declaration or definition.

CGCUDARuntime * CreateNVCUDARuntime(CodeGenModule &CGM)

Creates an instance of a CUDA runtime class.

bool Zero(InterpState &S, CodePtr OpPC)

The JSON file list parser is used to communicate input to InstallAPI.

CudaVersion ToCudaVersion(llvm::VersionTuple)

bool CudaFeatureEnabled(llvm::VersionTuple, CudaFeature)

Linkage

Describes the different kinds of linkage (C++ [basic.link], C99 6.2.2) that an entity may have.

llvm::IntegerType * Int8Ty

i8, i16, i32, and i64

llvm::IntegerType * SizeTy

llvm::IntegerType * IntTy

int

CharUnits getSizeAlign() const

llvm::PointerType * UnqualPtrTy

CharUnits getPointerAlign() const


RetroSearch is an open source project built by @garambo | Open a GitHub Issue

Search and Browse the WWW like it's 1997 | Search results from DuckDuckGo

HTML: 3.2 | Encoding: UTF-8 | Version: 0.7.4