A RetroSearch Logo

Home - News ( United States | United Kingdom | Italy | Germany ) - Football scores

Search Query:

Showing content from https://clang.llvm.org/doxygen/CGOpenMPRuntimeGPU_8cpp_source.html below:

clang: lib/CodeGen/CGOpenMPRuntimeGPU.cpp Source File

22#include "llvm/ADT/SmallPtrSet.h" 23#include "llvm/Frontend/OpenMP/OMPDeviceConstants.h" 24#include "llvm/Frontend/OpenMP/OMPGridValues.h" 26using namespace clang

;

27using namespace

CodeGen;

28using namespace

llvm::omp;

33

llvm::FunctionCallee EnterCallee =

nullptr

;

35

llvm::FunctionCallee ExitCallee =

nullptr

;

38

llvm::BasicBlock *ContBlock =

nullptr

;

41

NVPTXActionTy(llvm::FunctionCallee EnterCallee,

43

llvm::FunctionCallee ExitCallee,

45

: EnterCallee(EnterCallee), EnterArgs(EnterArgs), ExitCallee(ExitCallee),

50

llvm::Value *CallBool = CGF.

Builder

.CreateIsNotNull(EnterRes);

54

CGF.

Builder

.CreateCondBr(CallBool, ThenBlock, ContBlock);

72class

ExecutionRuntimeModesRAII {

81

: ExecMode(ExecMode) {

82

SavedExecMode = ExecMode;

85

~ExecutionRuntimeModesRAII() { ExecMode = SavedExecMode; }

90 if

(

const auto

*ASE = dyn_cast<ArraySubscriptExpr>(RefExpr)) {

91 const Expr

*

Base

= ASE->getBase()->IgnoreParenImpCasts();

92 while

(

const auto

*TempASE = dyn_cast<ArraySubscriptExpr>(

Base

))

93 Base

= TempASE->getBase()->IgnoreParenImpCasts();

95

}

else if

(

auto

*OASE = dyn_cast<ArraySectionExpr>(RefExpr)) {

96 const Expr

*

Base

= OASE->getBase()->IgnoreParenImpCasts();

97 while

(

const auto

*TempOASE = dyn_cast<ArraySectionExpr>(

Base

))

98 Base

= TempOASE->getBase()->IgnoreParenImpCasts();

99 while

(

const auto

*TempASE = dyn_cast<ArraySubscriptExpr>(

Base

))

100 Base

= TempASE->getBase()->IgnoreParenImpCasts();

104 if

(

const auto

*DE = dyn_cast<DeclRefExpr>(RefExpr))

105 return

cast<ValueDecl>(DE->getDecl()->getCanonicalDecl());

106 const auto

*ME = cast<MemberExpr>(RefExpr);

107 return

cast<ValueDecl>(ME->getMemberDecl()->getCanonicalDecl());

110static RecordDecl

*buildRecordForGlobalizedVars(

113

llvm::SmallDenseMap<const ValueDecl *, const FieldDecl *>

117 if

(EscapedDecls.empty() && EscapedDeclsForTeams.empty())

121

GlobalizedVars.emplace_back(

C

.getDeclAlign(

D

),

D

);

122 for

(

const ValueDecl

*

D

: EscapedDeclsForTeams)

123

GlobalizedVars.emplace_back(

C

.getDeclAlign(

D

),

D

);

129 RecordDecl

*GlobalizedRD =

C

.buildImplicitRecord(

"_globalized_locals_ty"

);

132

EscapedDeclsForTeams.begin(), EscapedDeclsForTeams.end());

133 for

(

const auto

&Pair : GlobalizedVars) {

137 Type

=

C

.getPointerType(

Type

.getNonReferenceType());

142 if

(SingleEscaped.count(VD)) {

157

llvm::APInt ArraySize(32, BufSize);

158 Type

=

C

.getConstantArrayType(

Type

, ArraySize,

nullptr

,

159

ArraySizeModifier::Normal, 0);

167

llvm::APInt Align(32, Pair.first.getQuantity());

168 Field

->addAttr(AlignedAttr::CreateImplicit(

171 C

.getIntTypeForBitwidth(32,

0),

173

{}, AlignedAttr::GNU_aligned));

175

GlobalizedRD->

addDecl

(Field);

176

MappedDeclsFields.try_emplace(VD, Field);

183class

CheckVarsEscapingDeclContext final

186

llvm::SetVector<const ValueDecl *> EscapedDecls;

187

llvm::SetVector<const ValueDecl *> EscapedVariableLengthDecls;

188

llvm::SetVector<const ValueDecl *> DelayedVariableLengthDecls;

191

llvm::SmallDenseMap<const ValueDecl *, const FieldDecl *> MappedDeclsFields;

192 bool

AllEscaped =

false

;

193 bool

IsForCombinedParallelRegion =

false

;

195 void

markAsEscaped(

const ValueDecl

*VD) {

197 if

(!isa<VarDecl>(VD) ||

198

OMPDeclareTargetDeclAttr::isDeclareTargetDeclaration(VD))

205 bool

IsCaptured =

false

;

206 if

(

auto

*CSI = CGF.CapturedStmtInfo) {

207 if

(

const FieldDecl

*FD = CSI->lookup(cast<VarDecl>(VD))) {

211 if

(!IsForCombinedParallelRegion) {

214 const auto

*

Attr

= FD->getAttr<OMPCaptureKindAttr>();

217 if

(((

Attr

->getCaptureKind() != OMPC_map) &&

219

((

Attr

->getCaptureKind() == OMPC_map) &&

220

!FD->getType()->isAnyPointerType()))

223 if

(!FD->getType()->isReferenceType()) {

225 "Parameter captured by value with variably modified type"

);

226

EscapedParameters.insert(VD);

227

}

else if

(!IsForCombinedParallelRegion) {

232 if

((!CGF.CapturedStmtInfo ||

233

(IsForCombinedParallelRegion && CGF.CapturedStmtInfo)) &&

241

EscapedVariableLengthDecls.insert(VD);

243

DelayedVariableLengthDecls.insert(VD);

245

EscapedDecls.insert(VD);

248 void

VisitValueDecl(

const ValueDecl

*VD) {

251 if

(

const auto

*VarD = dyn_cast<VarDecl>(VD)) {

252 if

(!isa<ParmVarDecl>(VarD) && VarD->hasInit()) {

253 const bool

SavedAllEscaped = AllEscaped;

255 Visit

(VarD->getInit());

256

AllEscaped = SavedAllEscaped;

262 bool

IsCombinedParallelRegion) {

266 if

(

C

.capturesVariable() && !

C

.capturesVariableByCopy()) {

268 bool

SavedIsForCombinedParallelRegion = IsForCombinedParallelRegion;

269 if

(IsCombinedParallelRegion) {

273

IsForCombinedParallelRegion =

false

;

276 C

->getClauseKind() == OMPC_reduction ||

277 C

->getClauseKind() == OMPC_linear ||

278 C

->getClauseKind() == OMPC_private)

281 if

(

const auto

*PC = dyn_cast<OMPFirstprivateClause>(

C

))

282

Vars = PC->getVarRefs();

283 else if

(

const auto

*PC = dyn_cast<OMPLastprivateClause>(

C

))

284

Vars = PC->getVarRefs();

286

llvm_unreachable(

"Unexpected clause."

);

287 for

(

const auto

*

E

: Vars) {

291

IsForCombinedParallelRegion =

true

;

295 if

(IsForCombinedParallelRegion)

300 if

(isa<OMPCapturedExprDecl>(VD))

302

IsForCombinedParallelRegion = SavedIsForCombinedParallelRegion;

307 void

buildRecordForGlobalizedVars(

bool

IsInTTDRegion) {

308

assert(!GlobalizedRD &&

309 "Record for globalized variables is built already."

);

311 unsigned

WarpSize = CGF.getTarget().getGridValue().GV_Warp_Size;

313

EscapedDeclsForTeams = EscapedDecls.getArrayRef();

315

EscapedDeclsForParallel = EscapedDecls.getArrayRef();

316

GlobalizedRD = ::buildRecordForGlobalizedVars(

317

CGF.getContext(), EscapedDeclsForParallel, EscapedDeclsForTeams,

318

MappedDeclsFields, WarpSize);

324

: CGF(CGF), EscapedDecls(TeamsReductions.begin(), TeamsReductions.end()) {

326 virtual

~CheckVarsEscapingDeclContext() =

default

;

327 void

VisitDeclStmt(

const DeclStmt

*S) {

330 for

(

const Decl

*

D

: S->decls())

331 if

(

const auto

*VD = dyn_cast_or_null<ValueDecl>(

D

))

337 if

(!

D

->hasAssociatedStmt())

340

dyn_cast_or_null<CapturedStmt>(

D

->getAssociatedStmt())) {

345 if

(CaptureRegions.size() == 1 && CaptureRegions.back() == OMPD_unknown) {

346

VisitStmt(S->getCapturedStmt());

349

VisitOpenMPCapturedStmt(

351

CaptureRegions.back() == OMPD_parallel &&

359 if

(

C

.capturesVariable() && !

C

.capturesVariableByCopy()) {

362 if

(isa<OMPCapturedExprDecl>(VD))

371 if

(

C

.capturesVariable()) {

375 if

(

E

->isInitCapture(&

C

) || isa<OMPCapturedExprDecl>(VD))

386 const VarDecl

*VD =

C

.getVariable();

393 void

VisitCallExpr(

const CallExpr

*

E

) {

396 for

(

const Expr

*Arg :

E

->arguments()) {

399 if

(Arg->isLValue()) {

400 const bool

SavedAllEscaped = AllEscaped;

403

AllEscaped = SavedAllEscaped;

416 if

(isa<OMPCapturedExprDecl>(VD))

424 if

(

E

->getOpcode() == UO_AddrOf) {

425 const bool

SavedAllEscaped = AllEscaped;

427 Visit

(

E

->getSubExpr());

428

AllEscaped = SavedAllEscaped;

430 Visit

(

E

->getSubExpr());

436 if

(

E

->getCastKind() == CK_ArrayToPointerDecay) {

437 const bool

SavedAllEscaped = AllEscaped;

439 Visit

(

E

->getSubExpr());

440

AllEscaped = SavedAllEscaped;

442 Visit

(

E

->getSubExpr());

445 void

VisitExpr(

const Expr

*

E

) {

448 bool

SavedAllEscaped = AllEscaped;

454

AllEscaped = SavedAllEscaped;

456 void

VisitStmt(

const Stmt

*S) {

459 for

(

const Stmt

*Child : S->children())

466 const RecordDecl

*getGlobalizedRecord(

bool

IsInTTDRegion) {

468

buildRecordForGlobalizedVars(IsInTTDRegion);

474

assert(GlobalizedRD &&

475 "Record for globalized variables must be generated already."

);

476 return

MappedDeclsFields.lookup(VD);

481 return

EscapedDecls.getArrayRef();

486 const

llvm::SmallPtrSetImpl<const Decl *> &getEscapedParameters()

const

{

487 return

EscapedParameters;

493 return

EscapedVariableLengthDecls.getArrayRef();

499 return

DelayedVariableLengthDecls.getArrayRef();

505

CGOpenMPRuntimeGPU::getExecutionMode()

const

{

506 return

CurrentExecutionMode;

510

CGOpenMPRuntimeGPU::getDataSharingMode()

const

{

511 return

CurrentDataSharingMode;

517 const auto

*CS =

D

.getInnermostCapturedStmt();

519

CS->getCapturedStmt()->IgnoreContainers(

true

);

522 if

(

const auto

*NestedDir =

523

dyn_cast_or_null<OMPExecutableDirective>(ChildStmt)) {

525 switch

(

D

.getDirectiveKind()) {

529 if

(DKind == OMPD_teams) {

530

Body = NestedDir->getInnermostCapturedStmt()->IgnoreContainers(

535 if

(

const auto

*NND =

536

dyn_cast_or_null<OMPExecutableDirective>(ChildStmt)) {

537

DKind = NND->getDirectiveKind();

543 case

OMPD_target_teams:

545 case

OMPD_target_simd:

546 case

OMPD_target_parallel:

547 case

OMPD_target_parallel_for:

548 case

OMPD_target_parallel_for_simd:

549 case

OMPD_target_teams_distribute:

550 case

OMPD_target_teams_distribute_simd:

551 case

OMPD_target_teams_distribute_parallel_for:

552 case

OMPD_target_teams_distribute_parallel_for_simd:

555 case

OMPD_parallel_for:

556 case

OMPD_parallel_master:

557 case

OMPD_parallel_sections:

559 case

OMPD_parallel_for_simd:

561 case

OMPD_cancellation_point:

563 case

OMPD_threadprivate:

581 case

OMPD_target_data:

582 case

OMPD_target_exit_data:

583 case

OMPD_target_enter_data:

584 case

OMPD_distribute:

585 case

OMPD_distribute_simd:

586 case

OMPD_distribute_parallel_for:

587 case

OMPD_distribute_parallel_for_simd:

588 case

OMPD_teams_distribute:

589 case

OMPD_teams_distribute_simd:

590 case

OMPD_teams_distribute_parallel_for:

591 case

OMPD_teams_distribute_parallel_for_simd:

592 case

OMPD_target_update:

593 case

OMPD_declare_simd:

594 case

OMPD_declare_variant:

595 case

OMPD_begin_declare_variant:

596 case

OMPD_end_declare_variant:

597 case

OMPD_declare_target:

598 case

OMPD_end_declare_target:

599 case

OMPD_declare_reduction:

600 case

OMPD_declare_mapper:

602 case

OMPD_taskloop_simd:

603 case

OMPD_master_taskloop:

604 case

OMPD_master_taskloop_simd:

605 case

OMPD_parallel_master_taskloop:

606 case

OMPD_parallel_master_taskloop_simd:

610

llvm_unreachable(

"Unexpected directive."

);

620 switch

(DirectiveKind) {

622 case

OMPD_target_teams:

624 case

OMPD_target_parallel_loop:

625 case

OMPD_target_parallel:

626 case

OMPD_target_parallel_for:

627 case

OMPD_target_parallel_for_simd:

628 case

OMPD_target_teams_distribute_parallel_for:

629 case

OMPD_target_teams_distribute_parallel_for_simd:

630 case

OMPD_target_simd:

631 case

OMPD_target_teams_distribute_simd:

633 case

OMPD_target_teams_distribute:

635 case

OMPD_target_teams_loop:

638 if

(

auto

*TTLD = dyn_cast<OMPTargetTeamsGenericLoopDirective>(&

D

))

639 return

TTLD->canBeParallelFor();

643 case

OMPD_parallel_for:

644 case

OMPD_parallel_master:

645 case

OMPD_parallel_sections:

647 case

OMPD_parallel_for_simd:

649 case

OMPD_cancellation_point:

651 case

OMPD_threadprivate:

669 case

OMPD_target_data:

670 case

OMPD_target_exit_data:

671 case

OMPD_target_enter_data:

672 case

OMPD_distribute:

673 case

OMPD_distribute_simd:

674 case

OMPD_distribute_parallel_for:

675 case

OMPD_distribute_parallel_for_simd:

676 case

OMPD_teams_distribute:

677 case

OMPD_teams_distribute_simd:

678 case

OMPD_teams_distribute_parallel_for:

679 case

OMPD_teams_distribute_parallel_for_simd:

680 case

OMPD_target_update:

681 case

OMPD_declare_simd:

682 case

OMPD_declare_variant:

683 case

OMPD_begin_declare_variant:

684 case

OMPD_end_declare_variant:

685 case

OMPD_declare_target:

686 case

OMPD_end_declare_target:

687 case

OMPD_declare_reduction:

688 case

OMPD_declare_mapper:

690 case

OMPD_taskloop_simd:

691 case

OMPD_master_taskloop:

692 case

OMPD_master_taskloop_simd:

693 case

OMPD_parallel_master_taskloop:

694 case

OMPD_parallel_master_taskloop_simd:

701 "Unknown programming model for OpenMP directive on NVPTX target."

);

705

StringRef ParentName,

706

llvm::Function *&OutlinedFn,

707

llvm::Constant *&OutlinedFnID,

710

ExecutionRuntimeModesRAII ModeRAII(CurrentExecutionMode,

EM_NonSPMD

);

711

EntryFunctionState EST;

712

WrapperFunctionsMap.clear();

714

[[maybe_unused]]

bool

IsBareKernel =

D

.getSingleClause<

OMPXBareClause

>();

715

assert(!IsBareKernel &&

"bare kernel should not be at generic mode"

);

719

CGOpenMPRuntimeGPU::EntryFunctionState &EST;

723

NVPTXPrePostActionTy(CGOpenMPRuntimeGPU::EntryFunctionState &EST,

725

: EST(EST),

D

(

D

) {}

728

RT.emitKernelInit(

D

, CGF, EST,

false

);

730

RT.setLocThreadIdInsertPt(CGF,

true

);

735

RT.emitKernelDeinit(CGF, EST,

false

);

739

IsInTTDRegion =

true

;

741

IsOffloadEntry, CodeGen);

742

IsInTTDRegion =

false

;

747

EntryFunctionState &EST,

bool

IsSPMD) {

748

llvm::OpenMPIRBuilder::TargetKernelDefaultAttrs Attrs;

750

IsSPMD ? llvm::omp::OMPTgtExecModeFlags::OMP_TGT_EXEC_MODE_SPMD

751

: llvm::omp::OMPTgtExecModeFlags::OMP_TGT_EXEC_MODE_GENERIC;

755

Bld.restoreIP(

OMPBuilder

.createTargetInit(Bld, Attrs));

757

emitGenericVarsProlog(CGF, EST.Loc);

761

EntryFunctionState &EST,

764

emitGenericVarsEpilog(CGF);

769 "_openmp_teams_reduction_type_$_"

, RecordDecl::TagKind::Union);

771 for

(

const RecordDecl

*TeamReductionRec : TeamsReductions) {

772 QualType

RecTy =

C

.getRecordType(TeamReductionRec);

782 QualType

StaticTy =

C

.getRecordType(StaticRD);

783

llvm::Type *LLVMReductionsBufferTy =

787

TeamsReductions.empty()

789

: DL.getTypeAllocSize(LLVMReductionsBufferTy).getFixedValue();

791 OMPBuilder

.createTargetDeinit(Bld, ReductionDataSize,

792 C

.getLangOpts().OpenMPCUDAReductionBufNum);

793

TeamsReductions.clear();

797

StringRef ParentName,

798

llvm::Function *&OutlinedFn,

799

llvm::Constant *&OutlinedFnID,

802

ExecutionRuntimeModesRAII ModeRAII(CurrentExecutionMode,

EM_SPMD

);

803

EntryFunctionState EST;

810

CGOpenMPRuntimeGPU::EntryFunctionState &EST;

812

DataSharingMode Mode;

817

CGOpenMPRuntimeGPU::EntryFunctionState &EST,

819

: RT(RT), EST(EST), IsBareKernel(IsBareKernel),

820

Mode(RT.CurrentDataSharingMode),

D

(

D

) {}

823

RT.CurrentDataSharingMode = DataSharingMode::DS_CUDA;

826

RT.emitKernelInit(

D

, CGF, EST,

true

);

828

RT.setLocThreadIdInsertPt(CGF,

true

);

832

RT.CurrentDataSharingMode = Mode;

835

RT.clearLocThreadIdInsertPt(CGF);

836

RT.emitKernelDeinit(CGF, EST,

true

);

838

} Action(*

this

, EST, IsBareKernel,

D

);

840

IsInTTDRegion =

true

;

842

IsOffloadEntry, CodeGen);

843

IsInTTDRegion =

false

;

846void

CGOpenMPRuntimeGPU::emitTargetOutlinedFunction(

848

llvm::Function *&OutlinedFn, llvm::Constant *&OutlinedFnID,

853

assert(!ParentName.empty() &&

"Invalid target region parent name!"

);

857 if

(Mode || IsBareKernel)

858

emitSPMDKernel(

D

, ParentName, OutlinedFn, OutlinedFnID, IsOffloadEntry,

861

emitNonSPMDKernel(

D

, ParentName, OutlinedFn, OutlinedFnID, IsOffloadEntry,

867

llvm::OpenMPIRBuilderConfig Config(

875

llvm_unreachable(

"OpenMP can only handle device code."

);

885 "__omp_rtl_debug_kind"

);

887 "__omp_rtl_assume_teams_oversubscription"

);

889 "__omp_rtl_assume_threads_oversubscription"

);

891 "__omp_rtl_assume_no_thread_state"

);

893 "__omp_rtl_assume_no_nested_parallelism"

);

897

ProcBindKind ProcBind,

903

llvm::Value *NumThreads,

909 const Expr

*NumTeams,

910 const Expr

*ThreadLimit,

918 bool

PrevIsInTTDRegion = IsInTTDRegion;

919

IsInTTDRegion =

false

;

922

CGF,

D

, ThreadIDVar, InnermostKind, CodeGen));

923

IsInTTDRegion = PrevIsInTTDRegion;

925

llvm::Function *WrapperFun =

926

createParallelDataSharingWrapper(OutlinedFun,

D

);

927

WrapperFunctionsMap[OutlinedFun] = WrapperFun;

939 "expected teams directive."

);

944 D

.getInnermostCapturedStmt()->getCapturedStmt()->IgnoreContainers(

946

Dir = dyn_cast_or_null<OMPExecutableDirective>(S);

954 for

(

const Expr

*

E

:

C

->getVarRefs())

964 "expected teams directive."

);

966 for

(

const Expr

*

E

:

C

->privates())

979

llvm::SmallDenseMap<const ValueDecl *, const FieldDecl *> MappedDeclsFields;

986 if

(!LastPrivatesReductions.empty()) {

987

GlobalizedRD = ::buildRecordForGlobalizedVars(

988 CGM

.

getContext

(), {}, LastPrivatesReductions, MappedDeclsFields,

991

}

else if

(!LastPrivatesReductions.empty()) {

992

assert(!TeamAndReductions.first &&

993 "Previous team declaration is not expected."

);

994

TeamAndReductions.first =

D

.getCapturedStmt(OMPD_teams)->getCapturedDecl();

995

std::swap(TeamAndReductions.second, LastPrivatesReductions);

1002

llvm::SmallDenseMap<const ValueDecl *, const FieldDecl *>

1006

NVPTXPrePostActionTy(

1008

llvm::SmallDenseMap<const ValueDecl *, const FieldDecl *>

1010

:

Loc

(

Loc

), GlobalizedRD(GlobalizedRD),

1011

MappedDeclsFields(MappedDeclsFields) {}

1016 auto

I = Rt.FunctionGlobalizedDecls.try_emplace(CGF.

CurFn

).first;

1017

I->getSecond().MappedParams =

1018

std::make_unique<CodeGenFunction::OMPMapVars>();

1019

DeclToAddrMapTy &

Data

= I->getSecond().LocalVarData;

1020 for

(

const auto

&Pair : MappedDeclsFields) {

1021

assert(Pair.getFirst()->isCanonicalDecl() &&

1022 "Expected canonical declaration"

);

1023 Data

.insert(std::make_pair(Pair.getFirst(), MappedVarData()));

1026

Rt.emitGenericVarsProlog(CGF,

Loc

);

1030

.emitGenericVarsEpilog(CGF);

1032

} Action(

Loc

, GlobalizedRD, MappedDeclsFields);

1035

CGF,

D

, ThreadIDVar, InnermostKind, CodeGen);

1047 const auto

I = FunctionGlobalizedDecls.find(CGF.

CurFn

);

1048 if

(I == FunctionGlobalizedDecls.end())

1051 for

(

auto

&Rec : I->getSecond().LocalVarData) {

1052 const auto

*VD = cast<VarDecl>(Rec.first);

1053 bool

EscapedParam = I->getSecond().EscapedParameters.count(Rec.first);

1057

llvm::Value *ParValue;

1066

llvm::CallBase *VoidPtr =

1071

VoidPtr->addRetAttr(llvm::Attribute::get(

1077

VoidPtr, Bld.getPtrTy(0), VD->

getName

() +

"_on_stack"

);

1080

Rec.second.PrivateAddr = VarAddr.

getAddress

();

1081

Rec.second.GlobalizedVal = VoidPtr;

1086

I->getSecond().MappedParams->setVarAddr(CGF, VD, VarAddr.

getAddress

());

1089

VoidPtr->setDebugLoc(DI->SourceLocToDebugLoc(VD->

getLocation

()));

1092 for

(

const auto

*ValueD : I->getSecond().EscapedVariableLengthDecls) {

1093 const auto

*VD = cast<VarDecl>(ValueD);

1094

std::pair<llvm::Value *, llvm::Value *> AddrSizePair =

1096

I->getSecond().EscapedVariableLengthDeclsAddrs.emplace_back(AddrSizePair);

1100

I->getSecond().MappedParams->setVarAddr(CGF, VD,

Base

.getAddress());

1102

I->getSecond().MappedParams->apply(CGF);

1107 const auto

I = FunctionGlobalizedDecls.find(CGF.

CurFn

);

1108 if

(I == FunctionGlobalizedDecls.end())

1112 return

llvm::is_contained(I->getSecond().DelayedVariableLengthDecls, VD);

1115

std::pair<llvm::Value *, llvm::Value *>

1123

Size = Bld.CreateNUWAdd(

1125

llvm::Value *AlignVal =

1127

Size = Bld.CreateUDiv(Size, AlignVal);

1128

Size = Bld.CreateNUWMul(Size, AlignVal);

1131

llvm::Value *AllocArgs[] = {Size};

1132

llvm::CallBase *VoidPtr =

1136

VoidPtr->addRetAttr(llvm::Attribute::get(

1139 return

std::make_pair(VoidPtr, Size);

1144 const

std::pair<llvm::Value *, llvm::Value *> &AddrSizePair) {

1148

{AddrSizePair.first, AddrSizePair.second});

1151void

CGOpenMPRuntimeGPU::emitGenericVarsEpilog(

CodeGenFunction

&CGF) {

1155 const auto

I = FunctionGlobalizedDecls.find(CGF.

CurFn

);

1156 if

(I != FunctionGlobalizedDecls.end()) {

1159 for

(

const auto

&AddrSizePair :

1160

llvm::reverse(I->getSecond().EscapedVariableLengthDeclsAddrs)) {

1163

{AddrSizePair.first, AddrSizePair.second});

1166 for

(

auto

&Rec : llvm::reverse(I->getSecond().LocalVarData)) {

1167 const auto

*VD = cast<VarDecl>(Rec.first);

1168

I->getSecond().MappedParams->restore(CGF);

1170

llvm::Value *FreeArgs[] = {Rec.second.GlobalizedVal,

1182

llvm::Function *OutlinedFn,

1196

OutlinedFnArgs.push_back(llvm::ConstantPointerNull::get(

CGM

.

VoidPtrTy

));

1199

OutlinedFnArgs.push_back(ZeroAddr.getPointer());

1200

OutlinedFnArgs.append(CapturedVars.begin(), CapturedVars.end());

1206

llvm::Function *OutlinedFn,

1209

llvm::Value *NumThreads) {

1213 auto

&&ParallelGen = [

this

,

Loc

, OutlinedFn, CapturedVars, IfCond,

1217

llvm::Value *NumThreadsVal = NumThreads;

1218

llvm::Function *WFn = WrapperFunctionsMap[OutlinedFn];

1219

llvm::Value *ID = llvm::ConstantPointerNull::get(

CGM

.

Int8PtrTy

);

1221

ID = Bld.CreateBitOrPointerCast(WFn,

CGM

.

Int8PtrTy

);

1222

llvm::Value *FnPtr = Bld.CreateBitOrPointerCast(OutlinedFn,

CGM

.

Int8PtrTy

);

1230

llvm::ArrayType::get(

CGM

.

VoidPtrTy

, CapturedVars.size()),

1231 "captured_vars_addrs"

);

1233 if

(!CapturedVars.empty()) {

1237 for

(llvm::Value *

V

: CapturedVars) {

1240 if

(

V

->getType()->isIntegerTy())

1241

PtrV = Bld.CreateIntToPtr(

V

, CGF.

VoidPtrTy

);

1250

llvm::Value *IfCondVal =

nullptr

;

1255

IfCondVal = llvm::ConstantInt::get(CGF.

Int32Ty

, 1);

1258

NumThreadsVal = llvm::ConstantInt::get(CGF.

Int32Ty

, -1);

1260

NumThreadsVal = Bld.CreateZExtOrTrunc(NumThreadsVal, CGF.

Int32Ty

),

1262

assert(IfCondVal &&

"Expected a value"

);

1264

llvm::Value *Args[] = {

1269

llvm::ConstantInt::get(CGF.

Int32Ty

, -1),

1272

Bld.CreateBitOrPointerCast(CapturedVarsAddrs.

emitRawPointer

(CGF),

1274

llvm::ConstantInt::get(

CGM

.

SizeTy

, CapturedVars.size())};

1290

llvm::Value *Args[] = {

1291

llvm::ConstantPointerNull::get(

1293

llvm::ConstantInt::get(CGF.

Int32Ty

,

0,

true

)};

1295 CGM

.

getModule

(), OMPRTL___kmpc_barrier_simple_spmd),

1330 CGM

.

getModule

(), OMPRTL___kmpc_warp_active_thread_mask));

1332

llvm::Value *ThreadID = RT.getGPUThreadID(CGF);

1335

llvm::Value *TeamWidth = RT.getGPUNumThreads(CGF);

1348

llvm::Value *CmpLoopBound = CGF.

Builder

.CreateICmpSLT(CounterVal, TeamWidth);

1349

CGF.

Builder

.CreateCondBr(CmpLoopBound, TestBB, ExitBB);

1355

llvm::Value *CmpThreadToCounter =

1356

CGF.

Builder

.CreateICmpEQ(ThreadID, CounterVal);

1357

CGF.

Builder

.CreateCondBr(CmpThreadToCounter, BodyBB, SyncBB);

1376

llvm::Value *IncCounterVal =

1377

CGF.

Builder

.CreateNSWAdd(CounterVal, CGF.

Builder

.getInt32(1));

1390 "Cast type must sized."

);

1392 "Val type must sized."

);

1394 if

(ValTy == CastTy)

1398 return

CGF.

Builder

.CreateBitCast(Val, LLVMCastTy);

1400 return

CGF.

Builder

.CreateIntCast(Val, LLVMCastTy,

1667 if

(Options.SimpleReduction) {

1668

assert(!TeamsReduction && !ParallelReduction &&

1669 "Invalid reduction selection in emitReduction."

);

1670

(void)ParallelReduction;

1672

ReductionOps, Options);

1676

llvm::SmallDenseMap<const ValueDecl *, const FieldDecl *> VarFieldMap;

1679 for

(

const Expr

*DRE : Privates) {

1680

PrivatesReductions[Cnt] = cast<DeclRefExpr>(DRE)->getDecl();

1683 const RecordDecl

*ReductionRec = ::buildRecordForGlobalizedVars(

1684 CGM

.

getContext

(), PrivatesReductions, {}, VarFieldMap, 1);

1687

TeamsReductions.push_back(ReductionRec);

1692 using

InsertPointTy = llvm::OpenMPIRBuilder::InsertPointTy;

1695

InsertPointTy CodeGenIP(CGF.

Builder

.GetInsertBlock(),

1696

CGF.

Builder

.GetInsertPoint());

1697

llvm::OpenMPIRBuilder::LocationDescription OmpLoc(

1704

llvm::Type *ElementType;

1706

llvm::Value *PrivateVariable;

1707

llvm::OpenMPIRBuilder::ReductionGenAtomicCBTy AtomicReductionGen =

nullptr

;

1709 const auto

*RHSVar =

1710

cast<VarDecl>(cast<DeclRefExpr>(RHSExprs[Idx])->getDecl());

1712 const auto

*LHSVar =

1713

cast<VarDecl>(cast<DeclRefExpr>(LHSExprs[Idx])->getDecl());

1715

llvm::OpenMPIRBuilder::EvalKind EvalKind;

1718

EvalKind = llvm::OpenMPIRBuilder::EvalKind::Scalar;

1721

EvalKind = llvm::OpenMPIRBuilder::EvalKind::Complex;

1724

EvalKind = llvm::OpenMPIRBuilder::EvalKind::Aggregate;

1727 auto

ReductionGen = [&](InsertPointTy CodeGenIP,

unsigned

I,

1728

llvm::Value **LHSPtr, llvm::Value **RHSPtr,

1729

llvm::Function *NewFunc) {

1730

CGF.

Builder

.restoreIP(CodeGenIP);

1731 auto

*CurFn = CGF.

CurFn

;

1732

CGF.

CurFn

= NewFunc;

1735

cast<VarDecl>(cast<DeclRefExpr>(LHSExprs[I])->getDecl()))

1736

.emitRawPointer(CGF);

1738

cast<VarDecl>(cast<DeclRefExpr>(RHSExprs[I])->getDecl()))

1739

.emitRawPointer(CGF);

1742

cast<DeclRefExpr>(LHSExprs[I]),

1743

cast<DeclRefExpr>(RHSExprs[I]));

1747 return

InsertPointTy(CGF.

Builder

.GetInsertBlock(),

1748

CGF.

Builder

.GetInsertPoint());

1750

ReductionInfos.emplace_back(llvm::OpenMPIRBuilder::ReductionInfo(

1751

ElementType,

Variable

, PrivateVariable, EvalKind,

1752 nullptr

, ReductionGen, AtomicReductionGen));

1756

llvm::OpenMPIRBuilder::InsertPointTy AfterIP =

1758

OmpLoc, AllocaIP, CodeGenIP, ReductionInfos,

false

, TeamsReduction,

1759

DistributeReduction, llvm::OpenMPIRBuilder::ReductionGenCBKind::Clang,

1761 C

.getLangOpts().OpenMPCUDAReductionBufNum, RTLoc));

1762

CGF.

Builder

.restoreIP(AfterIP);

1768 const VarDecl

*NativeParam)

const

{

1773 const Type

*NonQualTy = QC.

strip

(ArgType);

1775 if

(

const auto

*

Attr

= FD->

getAttr

<OMPCaptureKindAttr>()) {

1776 if

(

Attr

->getCaptureKind() == OMPC_map) {

1783 enum

{ NVPTX_local_addr = 5 };

1786 if

(isa<ImplicitParamDecl>(NativeParam))

1795 nullptr

,

SC_None

,

nullptr

);

1801 const VarDecl

*TargetParam)

const

{

1802

assert(NativeParam != TargetParam &&

1804 "Native arg must not be the same as target arg."

);

1808 const Type

*NonQualTy = QC.

strip

(NativeParamType);

1810 unsigned

NativePointeeAddrSpace =

1818

llvm::PointerType::get(CGF.

getLLVMContext

(), NativePointeeAddrSpace));

1822 return

NativeParamAddr;

1829

TargetArgs.reserve(Args.size());

1830 auto

*FnType = OutlinedFn.getFunctionType();

1831 for

(

unsigned

I = 0,

E

= Args.size(); I <

E

; ++I) {

1832 if

(FnType->isVarArg() && FnType->getNumParams() <= I) {

1833

TargetArgs.append(std::next(Args.begin(), I), Args.end());

1836

llvm::Type *TargetType = FnType->getParamType(I);

1837

llvm::Value *NativeArg = Args[I];

1838 if

(!TargetType->isPointerTy()) {

1839

TargetArgs.emplace_back(NativeArg);

1842

TargetArgs.emplace_back(

1852

llvm::Function *CGOpenMPRuntimeGPU::createParallelDataSharingWrapper(

1855 const auto

&CS = *

D

.getCapturedStmt(OMPD_parallel);

1869

WrapperArgs.emplace_back(&ParallelLevelArg);

1870

WrapperArgs.emplace_back(&WrapperArg);

1875 auto

*Fn = llvm::Function::Create(

1877

Twine(OutlinedParallelFn->getName(),

"_wrapper"

), &

CGM

.

getModule

());

1885

Fn->addFnAttr(llvm::Attribute::NoInline);

1888

Fn->setLinkage(llvm::GlobalValue::InternalLinkage);

1889

Fn->setDoesNotRecurse();

1895 const auto

*RD = CS.getCapturedRecordDecl();

1896 auto

CurField = RD->field_begin();

1908 auto

CI = CS.capture_begin();

1914

llvm::Value *GlobalArgsPtr = GlobalArgs.

getPointer

();

1915

llvm::Value *DataSharingArgs[] = {GlobalArgsPtr};

1917 CGM

.

getModule

(), OMPRTL___kmpc_get_shared_variables),

1923 if

(CS.capture_size() > 0 ||

1934

Src, Bld.getPtrTy(0), CGF.

SizeTy

);

1939

cast<OMPLoopDirective>(

D

).getLowerBoundVariable()->getExprLoc());

1940

Args.emplace_back(LB);

1949

cast<OMPLoopDirective>(

D

).getUpperBoundVariable()->getExprLoc());

1950

Args.emplace_back(UB);

1953 if

(CS.capture_size() > 0) {

1955 for

(

unsigned

I = 0,

E

= CS.capture_size(); I <

E

; ++I, ++CI, ++CurField) {

1956 QualType

ElemTy = CurField->getType();

1965 if

(CI->capturesVariableByCopy() &&

1966

!CI->getCapturedVar()->getType()->isAnyPointerType()) {

1970

Args.emplace_back(Arg);

1984

assert(

D

&&

"Expected function or captured|block decl."

);

1985

assert(FunctionGlobalizedDecls.count(CGF.

CurFn

) == 0 &&

1986 "Function is registered already."

);

1987

assert((!TeamAndReductions.first || TeamAndReductions.first ==

D

) &&

1988 "Team is set but not processed."

);

1989 const Stmt

*Body =

nullptr

;

1990 bool

NeedToDelayGlobalization =

false

;

1991 if

(

const auto

*FD = dyn_cast<FunctionDecl>(

D

)) {

1992

Body = FD->getBody();

1993

}

else if

(

const auto

*BD = dyn_cast<BlockDecl>(

D

)) {

1994

Body = BD->getBody();

1995

}

else if

(

const auto

*CD = dyn_cast<CapturedDecl>(

D

)) {

1996

Body = CD->getBody();

1998 if

(NeedToDelayGlobalization &&

2004

CheckVarsEscapingDeclContext VarChecker(CGF, TeamAndReductions.second);

2005

VarChecker.Visit(Body);

2007

VarChecker.getGlobalizedRecord(IsInTTDRegion);

2008

TeamAndReductions.first =

nullptr

;

2009

TeamAndReductions.second.clear();

2011

VarChecker.getEscapedVariableLengthDecls();

2013

VarChecker.getDelayedVariableLengthDecls();

2014 if

(!GlobalizedVarsRecord && EscapedVariableLengthDecls.empty() &&

2015

DelayedVariableLengthDecls.empty())

2017 auto

I = FunctionGlobalizedDecls.try_emplace(CGF.

CurFn

).first;

2018

I->getSecond().MappedParams =

2019

std::make_unique<CodeGenFunction::OMPMapVars>();

2020

I->getSecond().EscapedParameters.insert(

2021

VarChecker.getEscapedParameters().begin(),

2022

VarChecker.getEscapedParameters().end());

2023

I->getSecond().EscapedVariableLengthDecls.append(

2024

EscapedVariableLengthDecls.begin(), EscapedVariableLengthDecls.end());

2025

I->getSecond().DelayedVariableLengthDecls.append(

2026

DelayedVariableLengthDecls.begin(), DelayedVariableLengthDecls.end());

2027

DeclToAddrMapTy &

Data

= I->getSecond().LocalVarData;

2028 for

(

const ValueDecl

*VD : VarChecker.getEscapedDecls()) {

2030 Data

.insert(std::make_pair(VD, MappedVarData()));

2032 if

(!NeedToDelayGlobalization) {

2035

GlobalizationScope() =

default

;

2039

.emitGenericVarsEpilog(CGF);

2048 if

(VD && VD->

hasAttr

<OMPAllocateDeclAttr>()) {

2049 const auto

*A = VD->

getAttr

<OMPAllocateDeclAttr>();

2051 switch

(A->getAllocatorType()) {

2052 case

OMPAllocateDeclAttr::OMPNullMemAlloc:

2053 case

OMPAllocateDeclAttr::OMPDefaultMemAlloc:

2054 case

OMPAllocateDeclAttr::OMPHighBWMemAlloc:

2055 case

OMPAllocateDeclAttr::OMPLowLatMemAlloc:

2057 case

OMPAllocateDeclAttr::OMPThreadMemAlloc:

2059 case

OMPAllocateDeclAttr::OMPUserDefinedMemAlloc:

2062 case

OMPAllocateDeclAttr::OMPConstMemAlloc:

2065 case

OMPAllocateDeclAttr::OMPPTeamMemAlloc:

2068 case

OMPAllocateDeclAttr::OMPLargeCapMemAlloc:

2069 case

OMPAllocateDeclAttr::OMPCGroupMemAlloc:

2073 auto

*GV =

new

llvm::GlobalVariable(

2075

llvm::GlobalValue::InternalLinkage, llvm::PoisonValue::get(VarTy),

2077 nullptr

, llvm::GlobalValue::NotThreadLocal,

2092 auto

I = FunctionGlobalizedDecls.find(CGF.

CurFn

);

2093 if

(I == FunctionGlobalizedDecls.end())

2095 auto

VDI = I->getSecond().LocalVarData.find(VD);

2096 if

(VDI != I->getSecond().LocalVarData.end())

2097 return

VDI->second.PrivateAddr;

2102 auto

VDI = I->getSecond().LocalVarData.find(

2103

cast<VarDecl>(cast<DeclRefExpr>(IT->getRef())->getDecl())

2104

->getCanonicalDecl());

2105 if

(VDI != I->getSecond().LocalVarData.end())

2106 return

VDI->second.PrivateAddr;

2114

FunctionGlobalizedDecls.erase(CGF.

CurFn

);

2121

llvm::Value *&Chunk)

const

{

2124

ScheduleKind = OMPC_DIST_SCHEDULE_static;

2126

RT.getGPUNumThreads(CGF),

2128

S.getIterationVariable()->getType(), S.getBeginLoc());

2132

CGF, S, ScheduleKind, Chunk);

2138 const Expr

*&ChunkExpr)

const

{

2139

ScheduleKind = OMPC_SCHEDULE_static;

2141

llvm::APInt ChunkSize(32, 1);

2150 " Expected target-based directive."

);

2155 if

(!

C

.capturesVariable())

2157 const VarDecl

*VD =

C

.getCapturedVar();

2158 const auto

*RD = VD->

getType

()

2162 if

(!RD || !RD->isLambda())

2171

llvm::DenseMap<const ValueDecl *, FieldDecl *> Captures;

2173

RD->getCaptureFields(Captures, ThisCapture);

2183 const ValueDecl

*VD = LC.getCapturedVar();

2188 auto

It = Captures.find(VD);

2189

assert(It != Captures.end() &&

"Found lambda capture without field."

);

2203 if

(!VD || !VD->

hasAttr

<OMPAllocateDeclAttr>())

2205 const auto

*A = VD->

getAttr

<OMPAllocateDeclAttr>();

2206 switch

(A->getAllocatorType()) {

2207 case

OMPAllocateDeclAttr::OMPNullMemAlloc:

2208 case

OMPAllocateDeclAttr::OMPDefaultMemAlloc:

2210 case

OMPAllocateDeclAttr::OMPLargeCapMemAlloc:

2211 case

OMPAllocateDeclAttr::OMPCGroupMemAlloc:

2212 case

OMPAllocateDeclAttr::OMPHighBWMemAlloc:

2213 case

OMPAllocateDeclAttr::OMPLowLatMemAlloc:

2214 case

OMPAllocateDeclAttr::OMPThreadMemAlloc:

2217 case

OMPAllocateDeclAttr::OMPConstMemAlloc:

2220 case

OMPAllocateDeclAttr::OMPPTeamMemAlloc:

2223 case

OMPAllocateDeclAttr::OMPUserDefinedMemAlloc:

2224

llvm_unreachable(

"Expected predefined allocator for the variables with the " 2225 "static storage."

);

2235 if

(Feature.getValue()) {

2247 for

(

const OMPClause

*Clause :

D

->clauselists()) {

2248 if

(Clause->getClauseKind() == OMPC_unified_shared_memory) {

2261

llvm::raw_svector_ostream Out(Buffer);

2263

<<

" does not support unified addressing"

;

2264 CGM

.

Error

(Clause->getBeginLoc(), Out.str());

2340

llvm_unreachable(

"Unexpected GPU arch."

);

2350 const char

*LocSize =

"__kmpc_get_hardware_num_threads_in_block"

;

2351

llvm::Function *F = M->getFunction(LocSize);

2353

F = llvm::Function::Create(llvm::FunctionType::get(CGF.

Int32Ty

, {},

false

),

2354

llvm::GlobalVariable::ExternalLinkage, LocSize,

2357 return

Bld.CreateCall(F, {},

"nvptx_num_threads"

);

2364 CGM

.

getModule

(), OMPRTL___kmpc_get_hardware_thread_id_in_block),

static void getTeamsReductionVars(ASTContext &Ctx, const OMPExecutableDirective &D, llvm::SmallVectorImpl< const ValueDecl * > &Vars)

Get list of reduction variables from the teams ... directives.

static llvm::Value * castValueToType(CodeGenFunction &CGF, llvm::Value *Val, QualType ValTy, QualType CastTy, SourceLocation Loc)

Cast value to the specified type.

static void getDistributeLastprivateVars(ASTContext &Ctx, const OMPExecutableDirective &D, llvm::SmallVectorImpl< const ValueDecl * > &Vars)

Get list of lastprivate variables from the teams distribute ... or teams {distribute ....

static bool hasNestedSPMDDirective(ASTContext &Ctx, const OMPExecutableDirective &D)

Check for inner (nested) SPMD construct, if any.

static bool supportsSPMDExecutionMode(ASTContext &Ctx, const OMPExecutableDirective &D)

static OffloadArch getOffloadArch(CodeGenModule &CGM)

This file defines OpenMP nodes for declarative directives.

This file defines OpenMP AST classes for clauses.

static std::pair< ValueDecl *, bool > getPrivateItem(Sema &S, Expr *&RefExpr, SourceLocation &ELoc, SourceRange &ERange, bool AllowArraySection=false, StringRef DiagType="")

This file defines OpenMP AST classes for executable directives and clauses.

Holds long-lived AST nodes (such as types and decls) that can be referred to throughout the semantic ...

QualType getPointerType(QualType T) const

Return the uniqued reference to the type for a pointer to the specified type.

QualType getUIntPtrType() const

Return a type compatible with "uintptr_t" (C99 7.18.1.4), as defined by the target.

QualType getIntTypeForBitwidth(unsigned DestWidth, unsigned Signed) const

getIntTypeForBitwidth - sets integer QualTy according to specified details: bitwidth,...

CanQualType getSizeType() const

Return the unique type for "size_t" (C99 7.17), defined in <stddef.h>.

CharUnits getDeclAlign(const Decl *D, bool ForAlignof=false) const

Return a conservative estimate of the alignment of the specified decl D.

CharUnits getTypeSizeInChars(QualType T) const

Return the size of the specified (complete) type T, in characters.

const TargetInfo & getTargetInfo() const

QualType getAddrSpaceQualType(QualType T, LangAS AddressSpace) const

Return the uniqued reference to the type for an address space qualified type with the specified type ...

unsigned getTargetAddressSpace(LangAS AS) const

Attr - This represents one attribute.

A class which contains all the information about a particular captured value.

BlockExpr - Adaptor class for mixing a BlockDecl with expressions.

CallExpr - Represents a function call (C99 6.5.2.2, C++ [expr.call]).

Describes the capture of either a variable, or 'this', or variable-length array type.

This captures a statement into a function.

bool capturesVariable(const VarDecl *Var) const

True if this variable has been captured.

CharUnits - This is an opaque type for sizes expressed in character units.

bool isZero() const

isZero - Test whether the quantity equals zero.

llvm::Align getAsAlign() const

getAsAlign - Returns Quantity as a valid llvm::Align, Beware llvm::Align assumes power of two 8-bit b...

QuantityType getQuantity() const

getQuantity - Get the raw integer representation of this quantity.

Like RawAddress, an abstract representation of an aligned address, but the pointer contained in this ...

llvm::Value * emitRawPointer(CodeGenFunction &CGF) const

Return the pointer contained in this class after authenticating it and adding offset to it if necessa...

Address withElementType(llvm::Type *ElemTy) const

Return address with different element type, but same pointer and alignment.

llvm::StoreInst * CreateStore(llvm::Value *Val, Address Addr, bool IsVolatile=false)

Address CreatePointerBitCastOrAddrSpaceCast(Address Addr, llvm::Type *Ty, llvm::Type *ElementTy, const llvm::Twine &Name="")

Address CreateConstArrayGEP(Address Addr, uint64_t Index, const llvm::Twine &Name="")

Given addr = [n x T]* ... produce name = getelementptr inbounds addr, i64 0, i64 index where i64 is a...

Address CreateConstInBoundsGEP(Address Addr, uint64_t Index, const llvm::Twine &Name="")

Given addr = T* ... produce name = getelementptr inbounds addr, i64 index where i64 is actually the t...

CGFunctionInfo - Class to encapsulate the information about a function definition.

void emitParallelCall(CodeGenFunction &CGF, SourceLocation Loc, llvm::Function *OutlinedFn, ArrayRef< llvm::Value * > CapturedVars, const Expr *IfCond, llvm::Value *NumThreads) override

Emits code for parallel or serial call of the OutlinedFn with variables captured in a record which ad...

llvm::Function * emitTeamsOutlinedFunction(CodeGenFunction &CGF, const OMPExecutableDirective &D, const VarDecl *ThreadIDVar, OpenMPDirectiveKind InnermostKind, const RegionCodeGenTy &CodeGen) override

Emits inlined function for the specified OpenMP teams.

void emitProcBindClause(CodeGenFunction &CGF, llvm::omp::ProcBindKind ProcBind, SourceLocation Loc) override

Emit call to void __kmpc_push_proc_bind(ident_t *loc, kmp_int32 global_tid, int proc_bind) to generat...

void emitReduction(CodeGenFunction &CGF, SourceLocation Loc, ArrayRef< const Expr * > Privates, ArrayRef< const Expr * > LHSExprs, ArrayRef< const Expr * > RHSExprs, ArrayRef< const Expr * > ReductionOps, ReductionOptionsTy Options) override

Emit a code for reduction clause.

DataSharingMode

Target codegen is specialized based on two data-sharing modes: CUDA, in which the local variables are...

@ DS_CUDA

CUDA data sharing mode.

@ DS_Generic

Generic data-sharing mode.

void getDefaultDistScheduleAndChunk(CodeGenFunction &CGF, const OMPLoopDirective &S, OpenMPDistScheduleClauseKind &ScheduleKind, llvm::Value *&Chunk) const override

Choose a default value for the dist_schedule clause.

Address getAddressOfLocalVariable(CodeGenFunction &CGF, const VarDecl *VD) override

Gets the OpenMP-specific address of the local variable.

void emitFunctionProlog(CodeGenFunction &CGF, const Decl *D) override

Emits OpenMP-specific function prolog.

void getDefaultScheduleAndChunk(CodeGenFunction &CGF, const OMPLoopDirective &S, OpenMPScheduleClauseKind &ScheduleKind, const Expr *&ChunkExpr) const override

Choose a default value for the schedule clause.

void emitNumTeamsClause(CodeGenFunction &CGF, const Expr *NumTeams, const Expr *ThreadLimit, SourceLocation Loc) override

This function ought to emit, in the general case, a call to.

void emitCriticalRegion(CodeGenFunction &CGF, StringRef CriticalName, const RegionCodeGenTy &CriticalOpGen, SourceLocation Loc, const Expr *Hint=nullptr) override

Emits a critical region.

void emitTeamsCall(CodeGenFunction &CGF, const OMPExecutableDirective &D, SourceLocation Loc, llvm::Function *OutlinedFn, ArrayRef< llvm::Value * > CapturedVars) override

Emits code for teams call of the OutlinedFn with variables captured in a record which address is stor...

bool hasAllocateAttributeForGlobalVar(const VarDecl *VD, LangAS &AS) override

Checks if the variable has associated OMPAllocateDeclAttr attribute with the predefined allocator and...

void getKmpcFreeShared(CodeGenFunction &CGF, const std::pair< llvm::Value *, llvm::Value * > &AddrSizePair) override

Get call to __kmpc_free_shared.

CGOpenMPRuntimeGPU(CodeGenModule &CGM)

llvm::Function * emitParallelOutlinedFunction(CodeGenFunction &CGF, const OMPExecutableDirective &D, const VarDecl *ThreadIDVar, OpenMPDirectiveKind InnermostKind, const RegionCodeGenTy &CodeGen) override

Emits inlined function for the specified OpenMP parallel.

void functionFinished(CodeGenFunction &CGF) override

Cleans up references to the objects in finished function.

llvm::Value * getGPUThreadID(CodeGenFunction &CGF)

Get the id of the current thread on the GPU.

void processRequiresDirective(const OMPRequiresDecl *D) override

Perform check on requires decl to ensure that target architecture supports unified addressing.

bool isDelayedVariableLengthDecl(CodeGenFunction &CGF, const VarDecl *VD) const override

Declare generalized virtual functions which need to be defined by all specializations of OpenMPGPURun...

void emitOutlinedFunctionCall(CodeGenFunction &CGF, SourceLocation Loc, llvm::FunctionCallee OutlinedFn, ArrayRef< llvm::Value * > Args={}) const override

Emits call of the outlined function with the provided arguments, translating these arguments to corre...

Address getParameterAddress(CodeGenFunction &CGF, const VarDecl *NativeParam, const VarDecl *TargetParam) const override

Gets the address of the native argument basing on the address of the target-specific parameter.

ExecutionMode

Defines the execution mode.

@ EM_NonSPMD

Non-SPMD execution mode (1 master thread, others are workers).

@ EM_Unknown

Unknown execution mode (orphaned directive).

@ EM_SPMD

SPMD execution mode (all threads are worker threads).

void emitBarrierCall(CodeGenFunction &CGF, SourceLocation Loc, OpenMPDirectiveKind Kind, bool EmitChecks=true, bool ForceSimpleCall=false) override

Emit an implicit/explicit barrier for OpenMP threads.

llvm::Value * getGPUNumThreads(CodeGenFunction &CGF)

Get the maximum number of threads in a block of the GPU.

const VarDecl * translateParameter(const FieldDecl *FD, const VarDecl *NativeParam) const override

Translates the native parameter of outlined function if this is required for target.

std::pair< llvm::Value *, llvm::Value * > getKmpcAllocShared(CodeGenFunction &CGF, const VarDecl *VD) override

Get call to __kmpc_alloc_shared.

bool isGPU() const override

Returns true if the current target is a GPU.

void emitNumThreadsClause(CodeGenFunction &CGF, llvm::Value *NumThreads, SourceLocation Loc) override

Emits call to void __kmpc_push_num_threads(ident_t *loc, kmp_int32 global_tid, kmp_int32 num_threads)...

void adjustTargetSpecificDataForLambdas(CodeGenFunction &CGF, const OMPExecutableDirective &D) const override

Adjust some parameters for the target-based directives, like addresses of the variables captured by r...

virtual Address emitThreadIDAddress(CodeGenFunction &CGF, SourceLocation Loc)

Emits address of the word in a memory where current thread id is stored.

static const Stmt * getSingleCompoundChild(ASTContext &Ctx, const Stmt *Body)

Checks if the Body is the CompoundStmt and returns its child statement iff there is only one that is ...

llvm::Value * emitUpdateLocation(CodeGenFunction &CGF, SourceLocation Loc, unsigned Flags=0, bool EmitLoc=false)

Emits object of ident_t type with info for source location.

virtual void functionFinished(CodeGenFunction &CGF)

Cleans up references to the objects in finished function.

virtual llvm::Function * emitTeamsOutlinedFunction(CodeGenFunction &CGF, const OMPExecutableDirective &D, const VarDecl *ThreadIDVar, OpenMPDirectiveKind InnermostKind, const RegionCodeGenTy &CodeGen)

Emits outlined function for the specified OpenMP teams directive D.

llvm::OpenMPIRBuilder OMPBuilder

An OpenMP-IR-Builder instance.

virtual void emitTargetOutlinedFunctionHelper(const OMPExecutableDirective &D, StringRef ParentName, llvm::Function *&OutlinedFn, llvm::Constant *&OutlinedFnID, bool IsOffloadEntry, const RegionCodeGenTy &CodeGen)

Helper to emit outlined function for 'target' directive.

bool hasRequiresUnifiedSharedMemory() const

Return whether the unified_shared_memory has been specified.

virtual void processRequiresDirective(const OMPRequiresDecl *D)

Perform check on requires decl to ensure that target architecture supports unified addressing.

llvm::Value * getThreadID(CodeGenFunction &CGF, SourceLocation Loc)

Gets thread id value for the current thread.

void clearLocThreadIdInsertPt(CodeGenFunction &CGF)

void computeMinAndMaxThreadsAndTeams(const OMPExecutableDirective &D, CodeGenFunction &CGF, llvm::OpenMPIRBuilder::TargetKernelDefaultAttrs &Attrs)

Helper to determine the min/max number of threads/teams for D.

static unsigned getDefaultFlagsForBarriers(OpenMPDirectiveKind Kind)

Returns default flags for the barriers depending on the directive, for which this barier is going to ...

virtual llvm::Function * emitParallelOutlinedFunction(CodeGenFunction &CGF, const OMPExecutableDirective &D, const VarDecl *ThreadIDVar, OpenMPDirectiveKind InnermostKind, const RegionCodeGenTy &CodeGen)

Emits outlined function for the specified OpenMP parallel directive D.

virtual void getDefaultDistScheduleAndChunk(CodeGenFunction &CGF, const OMPLoopDirective &S, OpenMPDistScheduleClauseKind &ScheduleKind, llvm::Value *&Chunk) const

Choose default schedule type and chunk value for the dist_schedule clause.

llvm::Type * getIdentTyPointerTy()

Returns pointer to ident_t type.

void emitSingleReductionCombiner(CodeGenFunction &CGF, const Expr *ReductionOp, const Expr *PrivateRef, const DeclRefExpr *LHS, const DeclRefExpr *RHS)

Emits single reduction combiner.

llvm::OpenMPIRBuilder & getOMPBuilder()

virtual void emitCriticalRegion(CodeGenFunction &CGF, StringRef CriticalName, const RegionCodeGenTy &CriticalOpGen, SourceLocation Loc, const Expr *Hint=nullptr)

Emits a critical region.

virtual void emitOutlinedFunctionCall(CodeGenFunction &CGF, SourceLocation Loc, llvm::FunctionCallee OutlinedFn, ArrayRef< llvm::Value * > Args={}) const

Emits call of the outlined function with the provided arguments, translating these arguments to corre...

virtual void emitReduction(CodeGenFunction &CGF, SourceLocation Loc, ArrayRef< const Expr * > Privates, ArrayRef< const Expr * > LHSExprs, ArrayRef< const Expr * > RHSExprs, ArrayRef< const Expr * > ReductionOps, ReductionOptionsTy Options)

Emit a code for reduction clause.

CapturedRegionKind getKind() const

bool isCXXThisExprCaptured() const

The scope used to remap some variables as private in the OpenMP loop body (or other captured region e...

CodeGenFunction - This class organizes the per-function state that is used while generating LLVM code...

void FinishFunction(SourceLocation EndLoc=SourceLocation())

FinishFunction - Complete IR generation of the current function.

static TypeEvaluationKind getEvaluationKind(QualType T)

getEvaluationKind - Return the TypeEvaluationKind of QualType T.

CGCapturedStmtInfo * CapturedStmtInfo

LValue MakeNaturalAlignPointeeRawAddrLValue(llvm::Value *V, QualType T)

Same as MakeNaturalAlignPointeeAddrLValue except that the pointer is known to be unsigned.

Address EmitLoadOfPointer(Address Ptr, const PointerType *PtrTy, LValueBaseInfo *BaseInfo=nullptr, TBAAAccessInfo *TBAAInfo=nullptr)

Load a pointer with type PtrTy stored at address Ptr.

RawAddress CreateDefaultAlignTempAlloca(llvm::Type *Ty, const Twine &Name="tmp")

CreateDefaultAlignedTempAlloca - This creates an alloca with the default ABI alignment of the given L...

llvm::BasicBlock * createBasicBlock(const Twine &name="", llvm::Function *parent=nullptr, llvm::BasicBlock *before=nullptr)

createBasicBlock - Create an LLVM basic block.

LValue EmitLValueForFieldInitialization(LValue Base, const FieldDecl *Field)

EmitLValueForFieldInitialization - Like EmitLValueForField, except that if the Field is a reference,...

void EmitBlock(llvm::BasicBlock *BB, bool IsFinished=false)

EmitBlock - Emit the given block.

llvm::Type * ConvertTypeForMem(QualType T)

llvm::AssertingVH< llvm::Instruction > AllocaInsertPt

AllocaInsertPoint - This is an instruction in the entry block before which we prefer to insert alloca...

RawAddress CreateMemTemp(QualType T, const Twine &Name="tmp", RawAddress *Alloca=nullptr)

CreateMemTemp - Create a temporary memory object of the given type, with appropriate alignmen and cas...

const TargetInfo & getTarget() const

llvm::DebugLoc SourceLocToDebugLoc(SourceLocation Location)

Converts Location to a DebugLoc, if debug information is enabled.

llvm::Value * getTypeSize(QualType Ty)

Returns calculated size of the specified type.

void StartFunction(GlobalDecl GD, QualType RetTy, llvm::Function *Fn, const CGFunctionInfo &FnInfo, const FunctionArgList &Args, SourceLocation Loc=SourceLocation(), SourceLocation StartLoc=SourceLocation())

Emit code for the start of a function.

bool HaveInsertPoint() const

HaveInsertPoint - True if an insertion point is defined.

CGDebugInfo * getDebugInfo()

void EmitBranch(llvm::BasicBlock *Block)

EmitBranch - Emit a branch to the specified basic block from the current insert block,...

ASTContext & getContext() const

llvm::Value * EmitLoadOfScalar(Address Addr, bool Volatile, QualType Ty, SourceLocation Loc, AlignmentSource Source=AlignmentSource::Type, bool isNontemporal=false)

EmitLoadOfScalar - Load a scalar value from an address, taking care to appropriately convert from the...

llvm::CallInst * EmitRuntimeCall(llvm::FunctionCallee callee, const Twine &name="")

CodeGenTypes & getTypes() const

llvm::Value * EvaluateExprAsBool(const Expr *E)

EvaluateExprAsBool - Perform the usual unary conversions on the specified expression and compare the ...

LValue MakeAddrLValue(Address Addr, QualType T, AlignmentSource Source=AlignmentSource::Type)

llvm::Value * LoadCXXThis()

LoadCXXThis - Load the value of 'this'.

LValue EmitLoadOfReferenceLValue(LValue RefLVal)

Address GetAddrOfLocalVar(const VarDecl *VD)

GetAddrOfLocalVar - Return the address of a local variable.

llvm::Value * EmitScalarConversion(llvm::Value *Src, QualType SrcTy, QualType DstTy, SourceLocation Loc)

Emit a conversion from the specified type to the specified destination type, both of which are LLVM s...

llvm::LLVMContext & getLLVMContext()

void EmitStoreOfScalar(llvm::Value *Value, Address Addr, bool Volatile, QualType Ty, AlignmentSource Source=AlignmentSource::Type, bool isInit=false, bool isNontemporal=false)

EmitStoreOfScalar - Store a scalar value to an address, taking care to appropriately convert from the...

This class organizes the cross-function state that is used while generating LLVM code.

void SetInternalFunctionAttributes(GlobalDecl GD, llvm::Function *F, const CGFunctionInfo &FI)

Set the attributes on the LLVM function for the given decl and function info.

llvm::Module & getModule() const

const LangOptions & getLangOpts() const

CodeGenTypes & getTypes()

const TargetInfo & getTarget() const

void Error(SourceLocation loc, StringRef error)

Emit a general error that something can't be done.

CGOpenMPRuntime & getOpenMPRuntime()

Return a reference to the configured OpenMP runtime.

ASTContext & getContext() const

llvm::LLVMContext & getLLVMContext()

llvm::FunctionType * GetFunctionType(const CGFunctionInfo &Info)

GetFunctionType - Get the LLVM function type for.

const CGFunctionInfo & arrangeBuiltinFunctionDeclaration(QualType resultType, const FunctionArgList &args)

A builtin function is a freestanding function using the default C conventions.

unsigned getTargetAddressSpace(QualType T) const

llvm::Type * ConvertTypeForMem(QualType T)

ConvertTypeForMem - Convert type T into a llvm::Type.

Information for lazily generating a cleanup.

FunctionArgList - Type for representing both the decl and type of parameters to a function.

LValue - This represents an lvalue references.

Address getAddress() const

A basic class for pre|post-action for advanced codegen sequence for OpenMP region.

An abstract representation of an aligned address.

llvm::Value * getPointer() const

Class provides a way to call simple version of codegen for OpenMP region, or an advanced with possibl...

void setAction(PrePostActionTy &Action) const

ConstStmtVisitor - This class implements a simple visitor for Stmt subclasses.

DeclContext - This is used only as base class of specific decl types that can act as declaration cont...

void addDecl(Decl *D)

Add the declaration D into this context.

A reference to a declared variable, function, enum, etc.

DeclStmt - Adaptor class for mixing declarations with statements and expressions.

Decl - This represents one declaration (or definition), e.g.

attr_iterator attr_end() const

bool isCanonicalDecl() const

Whether this particular Decl is a canonical one.

attr_iterator attr_begin() const

SourceLocation getLocation() const

DeclContext * getDeclContext()

SourceLocation getBeginLoc() const LLVM_READONLY

virtual Decl * getCanonicalDecl()

Retrieves the "canonical" declaration of the given declaration.

SourceLocation getBeginLoc() const LLVM_READONLY

This represents one expression.

Expr * IgnoreParenImpCasts() LLVM_READONLY

Skip past any parentheses and implicit casts which might surround this expression until reaching a fi...

Expr * IgnoreParens() LLVM_READONLY

Skip past any parentheses which might surround this expression until reaching a fixed point.

bool isLValue() const

isLValue - True if this expression is an "l-value" according to the rules of the current language.

Represents a member of a struct/union/class.

static FieldDecl * Create(const ASTContext &C, DeclContext *DC, SourceLocation StartLoc, SourceLocation IdLoc, const IdentifierInfo *Id, QualType T, TypeSourceInfo *TInfo, Expr *BW, bool Mutable, InClassInitStyle InitStyle)

GlobalDecl - represents a global declaration.

ImplicitCastExpr - Allows us to explicitly represent implicit type conversions, which have no direct ...

static ImplicitParamDecl * Create(ASTContext &C, DeclContext *DC, SourceLocation IdLoc, IdentifierInfo *Id, QualType T, ImplicitParamKind ParamKind)

Create implicit parameter.

static IntegerLiteral * Create(const ASTContext &C, const llvm::APInt &V, QualType type, SourceLocation l)

Returns a new integer literal with value 'V' and type 'type'.

Describes the capture of a variable or of this, or of a C++1y init-capture.

A C++ lambda expression, which produces a function object (of unspecified type) that can be invoked l...

std::string OMPHostIRFile

Name of the IR file that contains the result of the OpenMP target host code generation.

IdentifierInfo * getIdentifier() const

Get the identifier that names this declaration, if there is one.

StringRef getName() const

Get the name of identifier for this declaration as a StringRef.

This is a basic class for representing single OpenMP clause.

This is a basic class for representing single OpenMP executable directive.

OpenMPDirectiveKind getDirectiveKind() const

static llvm::iterator_range< specific_clause_iterator< SpecificClause > > getClausesOfKind(ArrayRef< OMPClause * > Clauses)

This represents clause 'lastprivate' in the '#pragma omp ...' directives.

This is a common base class for loop directives ('omp simd', 'omp for', 'omp for simd' etc....

This represents clause 'reduction' in the '#pragma omp ...' directives.

This represents '#pragma omp requires...' directive.

This represents 'ompx_bare' clause in the '#pragma omp target teams ...' directive.

static ParmVarDecl * Create(ASTContext &C, DeclContext *DC, SourceLocation StartLoc, SourceLocation IdLoc, const IdentifierInfo *Id, QualType T, TypeSourceInfo *TInfo, StorageClass S, Expr *DefArg)

PointerType - C99 6.7.5.1 - Pointer Declarators.

A (possibly-)qualified type.

LangAS getAddressSpace() const

Return the address space of this type.

QualType getNonReferenceType() const

If Type is a reference type (e.g., const int&), returns the type that the reference refers to ("const...

QualType getCanonicalType() const

A qualifier set is used to build a set of qualifiers.

const Type * strip(QualType type)

Collect any qualifiers on the given type and return an unqualified type.

QualType apply(const ASTContext &Context, QualType QT) const

Apply the collected qualifiers to the given type.

void addAddressSpace(LangAS space)

Represents a struct/union/class.

virtual void completeDefinition()

Note that the definition of this type is now complete.

Scope - A scope is a transient data structure that is used while parsing the program.

Encodes a location in the source.

RetTy Visit(PTR(Stmt) S, ParamTys... P)

Stmt - This represents one statement.

void startDefinition()

Starts the definition of this tag declaration.

unsigned getNewAlign() const

Return the largest alignment for which a suitably-sized allocation with '::operator new(size_t)' is g...

TargetOptions & getTargetOpts() const

Retrieve the target options.

virtual const llvm::omp::GV & getGridValue() const

virtual bool hasFeature(StringRef Feature) const

Determine whether the given target has the given feature.

llvm::StringMap< bool > FeatureMap

The map of which features have been enabled disabled based on the command line.

The base class of the type hierarchy.

CXXRecordDecl * getAsCXXRecordDecl() const

Retrieves the CXXRecordDecl that this type refers to, either because the type is a RecordType or beca...

bool isIntegerType() const

isIntegerType() does not include complex integers (a GCC extension).

bool isReferenceType() const

QualType getPointeeType() const

If this is a pointer, ObjC object pointer, or block pointer, this returns the respective pointee.

bool isLValueReferenceType() const

bool hasSignedIntegerRepresentation() const

Determine whether this type has an signed integer representation of some sort, e.g....

bool isVariablyModifiedType() const

Whether this type is a variably-modified type (C99 6.7.5).

UnaryOperator - This represents the unary-expression's (except sizeof and alignof),...

Represent the declaration of a variable (in which case it is an lvalue) a function (in which case it ...

bool isInitCapture() const

Whether this variable is the implicit variable for a lambda init-capture.

Represents a variable declaration or definition.

VarDecl * getCanonicalDecl() override

Retrieves the "canonical" declaration of the given declaration.

bool isInitCapture() const

Whether this variable is the implicit variable for a lambda init-capture.

specific_attr_iterator - Iterates over a subrange of an AttrVec, only providing attributes that are o...

@ Type

The l-value was considered opaque, so the alignment was determined from a type.

@ Decl

The l-value was an access to a declared entity or something equivalently strong, like the address of ...

The JSON file list parser is used to communicate input to InstallAPI.

llvm::omp::Directive OpenMPDirectiveKind

OpenMP directives.

@ ICIS_NoInit

No in-class initializer.

bool isOpenMPDistributeDirective(OpenMPDirectiveKind DKind)

Checks if the specified directive is a distribute directive.

@ LCK_ByRef

Capturing by reference.

@ Private

'private' clause, allowed on 'parallel', 'serial', 'loop', 'parallel loop', and 'serial loop' constru...

bool isOpenMPParallelDirective(OpenMPDirectiveKind DKind)

Checks if the specified directive is a parallel-kind directive.

bool isOpenMPPrivate(OpenMPClauseKind Kind)

Checks if the specified clause is one of private clauses like 'private', 'firstprivate',...

OpenMPDistScheduleClauseKind

OpenMP attributes for 'dist_schedule' clause.

bool isOpenMPTargetExecutionDirective(OpenMPDirectiveKind DKind)

Checks if the specified directive is a target code offload directive.

bool isOpenMPTeamsDirective(OpenMPDirectiveKind DKind)

Checks if the specified directive is a teams-kind directive.

OffloadArch StringToOffloadArch(llvm::StringRef S)

bool isOpenMPLoopBoundSharingDirective(OpenMPDirectiveKind Kind)

Checks if the specified directive kind is one of the composite or combined directives that need loop ...

LangAS

Defines the address space values used by the address space qualifier of QualType.

const char * OffloadArchToString(OffloadArch A)

void getOpenMPCaptureRegions(llvm::SmallVectorImpl< OpenMPDirectiveKind > &CaptureRegions, OpenMPDirectiveKind DKind)

Return the captured regions of an OpenMP directive.

LangAS getLangASFromTargetAS(unsigned TargetAS)

@ CXXThis

Parameter for C++ 'this' argument.

@ Other

Other implicit parameter.

OpenMPScheduleClauseKind

OpenMP attributes for 'schedule' clause.

llvm::PointerType * VoidPtrTy

llvm::IntegerType * SizeTy

llvm::PointerType * VoidPtrPtrTy

llvm::IntegerType * Int32Ty

llvm::PointerType * Int8PtrTy


RetroSearch is an open source project built by @garambo | Open a GitHub Issue

Search and Browse the WWW like it's 1997 | Search results from DuckDuckGo

HTML: 3.2 | Encoding: UTF-8 | Version: 0.7.4