A RetroSearch Logo

Home - News ( United States | United Kingdom | Italy | Germany ) - Football scores

Search Query:

Showing content from https://github.com/llvm/llvm-project/commit/d517976758c8674bdcd4c74457f7a83f20e432c5 below:

Add utility class for checking dependency among accesses · llvm/llvm-project@d517976 · GitHub

@@ -3084,6 +3084,385 @@ void AccessAnalysis::processMemAccesses(bool UseDeferred) {

3084 3084

}

3085 3085

}

3086 3086 3087 +

/// \brief Checks memory dependences among accesses to the same underlying

3088 +

/// object to determine whether there vectorization is legal or not (and at

3089 +

/// which vectorization factor).

3090 +

///

3091 +

/// This class works under the assumption that we already checked that memory

3092 +

/// locations with different underlying pointers are "must-not alias".

3093 +

/// We use the ScalarEvolution framework to symbolically evalutate access

3094 +

/// functions pairs. Since we currently don't restructure the loop we can rely

3095 +

/// on the program order of memory accesses to determine their safety.

3096 +

/// At the moment we will only deem accesses as safe for:

3097 +

/// * A negative constant distance assuming program order.

3098 +

///

3099 +

/// Safe: tmp = a[i + 1]; OR a[i + 1] = x;

3100 +

/// a[i] = tmp; y = a[i];

3101 +

///

3102 +

/// The latter case is safe because later checks guarantuee that there can't

3103 +

/// be a cycle through a phi node (that is, we check that "x" and "y" is not

3104 +

/// the same variable: a header phi can only be an induction or a reduction, a

3105 +

/// reduction can't have a memory sink, an induction can't have a memory

3106 +

/// source). This is important and must not be violated (or we have to

3107 +

/// resort to checking for cycles through memory).

3108 +

///

3109 +

/// * A positive constant distance assuming program order that is bigger

3110 +

/// than the biggest memory access.

3111 +

///

3112 +

/// tmp = a[i] OR b[i] = x

3113 +

/// a[i+2] = tmp y = b[i+2];

3114 +

///

3115 +

/// Safe distance: 2 x sizeof(a[0]), and 2 x sizeof(b[0]), respectively.

3116 +

///

3117 +

/// * Zero distances and all accesses have the same size.

3118 +

///

3119 +

class MemoryDepChecker {

3120 +

public:

3121 +

typedef std::pair<Value*, char> MemAccessInfo;

3122 + 3123 +

MemoryDepChecker(ScalarEvolution *Se, DataLayout *Dl, const Loop *L) :

3124 +

SE(Se), DL(Dl), InnermostLoop(L), AccessIdx(0) {}

3125 + 3126 +

/// \brief Register the location (instructions are given increasing numbers)

3127 +

/// of a write access.

3128 +

void addAccess(StoreInst *SI) {

3129 +

Value *Ptr = SI->getPointerOperand();

3130 +

Accesses[std::make_pair(Ptr, true)].push_back(AccessIdx);

3131 +

InstMap.push_back(SI);

3132 +

++AccessIdx;

3133 +

}

3134 + 3135 +

/// \brief Register the location (instructions are given increasing numbers)

3136 +

/// of a write access.

3137 +

void addAccess(LoadInst *LI) {

3138 +

Value *Ptr = LI->getPointerOperand();

3139 +

Accesses[std::make_pair(Ptr, false)].push_back(AccessIdx);

3140 +

InstMap.push_back(LI);

3141 +

++AccessIdx;

3142 +

}

3143 + 3144 +

/// \brief Check whether the dependencies between the accesses are safe.

3145 +

///

3146 +

/// Only checks sets with elements in \p CheckDeps.

3147 +

bool areDepsSafe(AccessAnalysis::DepCandidates &AccessSets,

3148 +

DenseSet<MemAccessInfo> &CheckDeps);

3149 + 3150 +

/// \brief The maximum number of bytes of a vector register we can vectorize

3151 +

/// the accesses safely with.

3152 +

unsigned getMaxSafeDepDistBytes() { return MaxSafeDepDistBytes; }

3153 + 3154 +

private:

3155 +

ScalarEvolution *SE;

3156 +

DataLayout *DL;

3157 +

const Loop *InnermostLoop;

3158 + 3159 +

/// \brief Maps access locations (ptr, read/write) to program order.

3160 +

DenseMap<MemAccessInfo, std::vector<unsigned> > Accesses;

3161 + 3162 +

/// \brief Memory access instructions in program order.

3163 +

SmallVector<Instruction *, 16> InstMap;

3164 + 3165 +

/// \brief The program order index to be used for the next instruction.

3166 +

unsigned AccessIdx;

3167 + 3168 +

// We can access this many bytes in parallel safely.

3169 +

unsigned MaxSafeDepDistBytes;

3170 + 3171 +

/// \brief Check whether there is a plausible dependence between the two

3172 +

/// accesses.

3173 +

///

3174 +

/// Access \p A must happen before \p B in program order. The two indices

3175 +

/// identify the index into the program order map.

3176 +

///

3177 +

/// This function checks whether there is a plausible dependence (or the

3178 +

/// absence of such can't be proved) between the two accesses. If there is a

3179 +

/// plausible dependence but the dependence distance is bigger than one

3180 +

/// element access it records this distance in \p MaxSafeDepDistBytes (if this

3181 +

/// distance is smaller than any other distance encountered so far).

3182 +

/// Otherwise, this function returns true signaling a possible dependence.

3183 +

bool isDependent(const MemAccessInfo &A, unsigned AIdx,

3184 +

const MemAccessInfo &B, unsigned BIdx);

3185 + 3186 +

/// \brief Check whether the data dependence could prevent store-load

3187 +

/// forwarding.

3188 +

bool couldPreventStoreLoadForward(unsigned Distance, unsigned TypeByteSize);

3189 +

};

3190 + 3191 +

static bool isInBoundsGep(Value *Ptr) {

3192 +

if (GetElementPtrInst *GEP = dyn_cast<GetElementPtrInst>(Ptr))

3193 +

return GEP->isInBounds();

3194 +

return false;

3195 +

}

3196 + 3197 +

/// \brief Check whether the access through \p Ptr has a constant stride.

3198 +

static int isStridedPtr(ScalarEvolution *SE, DataLayout *DL, Value *Ptr,

3199 +

const Loop *Lp) {

3200 +

const Type *PtrTy = Ptr->getType();

3201 +

assert(PtrTy->isPointerTy() && "Unexpected non ptr");

3202 + 3203 +

// Make sure that the pointer does not point to aggregate types.

3204 +

if (cast<PointerType>(Ptr->getType())->getElementType()->isAggregateType()) {

3205 +

DEBUG(dbgs() << "LV: Bad stride - Not a pointer to a scalar type" << *Ptr

3206 +

<< "\n");

3207 +

return 0;

3208 +

}

3209 + 3210 +

const SCEV *PtrScev = SE->getSCEV(Ptr);

3211 +

const SCEVAddRecExpr *AR = dyn_cast<SCEVAddRecExpr>(PtrScev);

3212 +

if (!AR) {

3213 +

DEBUG(dbgs() << "LV: Bad stride - Not an AddRecExpr pointer "

3214 +

<< *Ptr << " SCEV: " << *PtrScev << "\n");

3215 +

return 0;

3216 +

}

3217 + 3218 +

// The accesss function must stride over the innermost loop.

3219 +

if (Lp != AR->getLoop()) {

3220 +

DEBUG(dbgs() << "LV: Bad stride - Not striding over innermost loop " << *Ptr

3221 +

<< " SCEV: " << *PtrScev << "\n");

3222 +

}

3223 + 3224 +

// The address calculation must not wrap. Otherwise, a dependence could be

3225 +

// inverted. An inbounds getelementptr that is a AddRec with a unit stride

3226 +

// cannot wrap per definition. The unit stride requirement is checked later.

3227 +

bool IsInBoundsGEP = isInBoundsGep(Ptr);

3228 +

bool IsNoWrapAddRec = AR->getNoWrapFlags(SCEV::NoWrapMask);

3229 +

if (!IsNoWrapAddRec && !IsInBoundsGEP) {

3230 +

DEBUG(dbgs() << "LV: Bad stride - Pointer may wrap in the address space "

3231 +

<< *Ptr << " SCEV: " << *PtrScev << "\n");

3232 +

return 0;

3233 +

}

3234 + 3235 +

// Check the step is constant.

3236 +

const SCEV *Step = AR->getStepRecurrence(*SE);

3237 + 3238 +

// Calculate the pointer stride and check if it is consecutive.

3239 +

const SCEVConstant *C = dyn_cast<SCEVConstant>(Step);

3240 +

if (!C) {

3241 +

DEBUG(dbgs() << "LV: Bad stride - Not a constant strided " << *Ptr <<

3242 +

" SCEV: " << *PtrScev << "\n");

3243 +

return 0;

3244 +

}

3245 + 3246 +

int64_t Size = DL->getTypeAllocSize(PtrTy->getPointerElementType());

3247 +

const APInt &APStepVal = C->getValue()->getValue();

3248 + 3249 +

// Huge step value - give up.

3250 +

if (APStepVal.getBitWidth() > 64)

3251 +

return 0;

3252 + 3253 +

int64_t StepVal = APStepVal.getSExtValue();

3254 + 3255 +

// Strided access.

3256 +

int64_t Stride = StepVal / Size;

3257 +

int64_t Rem = StepVal % Size;

3258 +

if (Rem)

3259 +

return 0;

3260 + 3261 +

// If the SCEV could wrap but we have an inbounds gep with a unit stride we

3262 +

// know we can't "wrap around the address space".

3263 +

if (!IsNoWrapAddRec && IsInBoundsGEP && Stride != 1 && Stride != -1)

3264 +

return 0;

3265 + 3266 +

return Stride;

3267 +

}

3268 + 3269 +

bool MemoryDepChecker::couldPreventStoreLoadForward(unsigned Distance,

3270 +

unsigned TypeByteSize) {

3271 +

// If loads occur at a distance that is not a multiple of a feasible vector

3272 +

// factor store-load forwarding does not take place.

3273 +

// Positive dependences might cause troubles because vectorizing them might

3274 +

// prevent store-load forwarding making vectorized code run a lot slower.

3275 +

// a[i] = a[i-3] ^ a[i-8];

3276 +

// The stores to a[i:i+1] don't align with the stores to a[i-3:i-2] and

3277 +

// hence on your typical architecture store-load forwarding does not take

3278 +

// place. Vectorizing in such cases does not make sense.

3279 +

// Store-load forwarding distance.

3280 +

const unsigned NumCyclesForStoreLoadThroughMemory = 8*TypeByteSize;

3281 +

// Maximum vector factor.

3282 +

unsigned MaxVFWithoutSLForwardIssues = MaxVectorWidth*TypeByteSize;

3283 +

if(MaxSafeDepDistBytes < MaxVFWithoutSLForwardIssues)

3284 +

MaxVFWithoutSLForwardIssues = MaxSafeDepDistBytes;

3285 + 3286 +

for (unsigned vf = 2*TypeByteSize; vf <= MaxVFWithoutSLForwardIssues;

3287 +

vf *= 2) {

3288 +

if (Distance % vf && Distance / vf < NumCyclesForStoreLoadThroughMemory) {

3289 +

MaxVFWithoutSLForwardIssues = (vf >>=1);

3290 +

break;

3291 +

}

3292 +

}

3293 + 3294 +

if (MaxVFWithoutSLForwardIssues< 2*TypeByteSize) {

3295 +

DEBUG(dbgs() << "LV: Distance " << Distance <<

3296 +

" that could cause a store-load forwarding conflict\n");

3297 +

return true;

3298 +

}

3299 + 3300 +

if (MaxVFWithoutSLForwardIssues < MaxSafeDepDistBytes &&

3301 +

MaxVFWithoutSLForwardIssues != MaxVectorWidth*TypeByteSize)

3302 +

MaxSafeDepDistBytes = MaxVFWithoutSLForwardIssues;

3303 +

return false;

3304 +

}

3305 + 3306 +

bool MemoryDepChecker::isDependent(const MemAccessInfo &A, unsigned AIdx,

3307 +

const MemAccessInfo &B, unsigned BIdx) {

3308 +

assert (AIdx < BIdx && "Must pass arguments in program order");

3309 + 3310 +

Value *APtr = A.first;

3311 +

Value *BPtr = B.first;

3312 +

bool AIsWrite = A.second;

3313 +

bool BIsWrite = B.second;

3314 + 3315 +

// Two reads are independent.

3316 +

if (!AIsWrite && !BIsWrite)

3317 +

return false;

3318 + 3319 +

const SCEV *AScev = SE->getSCEV(APtr);

3320 +

const SCEV *BScev = SE->getSCEV(BPtr);

3321 + 3322 +

int StrideAPtr = isStridedPtr(SE, DL, APtr, InnermostLoop);

3323 +

int StrideBPtr = isStridedPtr(SE, DL, BPtr, InnermostLoop);

3324 + 3325 +

const SCEV *Src = AScev;

3326 +

const SCEV *Sink = BScev;

3327 + 3328 +

// If the induction step is negative we have to invert source and sink of the

3329 +

// dependence.

3330 +

if (StrideAPtr < 0) {

3331 +

//Src = BScev;

3332 +

//Sink = AScev;

3333 +

std::swap(APtr, BPtr);

3334 +

std::swap(Src, Sink);

3335 +

std::swap(AIsWrite, BIsWrite);

3336 +

std::swap(AIdx, BIdx);

3337 +

std::swap(StrideAPtr, StrideBPtr);

3338 +

}

3339 + 3340 +

const SCEV *Dist = SE->getMinusSCEV(Sink, Src);

3341 + 3342 +

DEBUG(dbgs() << "LV: Src Scev: " << *Src << "Sink Scev: " << *Sink

3343 +

<< "(Induction step: " << StrideAPtr << ")\n");

3344 +

DEBUG(dbgs() << "LV: Distance for " << *InstMap[AIdx] << " to "

3345 +

<< *InstMap[BIdx] << ": " << *Dist << "\n");

3346 + 3347 +

// Need consecutive accesses. We don't want to vectorize

3348 +

// "A[B[i]] += ..." and similar code or pointer arithmetic that could wrap in

3349 +

// the address space.

3350 +

if (!StrideAPtr || !StrideBPtr || StrideAPtr != StrideBPtr){

3351 +

DEBUG(dbgs() << "Non-consecutive pointer access\n");

3352 +

return true;

3353 +

}

3354 + 3355 +

const SCEVConstant *C = dyn_cast<SCEVConstant>(Dist);

3356 +

if (!C) {

3357 +

DEBUG(dbgs() << "LV: Dependence because of non constant distance\n");

3358 +

return true;

3359 +

}

3360 + 3361 +

Type *ATy = APtr->getType()->getPointerElementType();

3362 +

Type *BTy = BPtr->getType()->getPointerElementType();

3363 +

unsigned TypeByteSize = DL->getTypeAllocSize(ATy);

3364 + 3365 +

// Negative distances are not plausible dependencies.

3366 +

const APInt &Val = C->getValue()->getValue();

3367 +

if (Val.isNegative()) {

3368 +

bool IsTrueDataDependence = (AIsWrite && !BIsWrite);

3369 +

if (IsTrueDataDependence &&

3370 +

(couldPreventStoreLoadForward(Val.abs().getZExtValue(), TypeByteSize) ||

3371 +

ATy != BTy))

3372 +

return true;

3373 + 3374 +

DEBUG(dbgs() << "LV: Dependence is negative: NoDep\n");

3375 +

return false;

3376 +

}

3377 + 3378 +

// Write to the same location with the same size.

3379 +

// Could be improved to assert type sizes are the same (i32 == float, etc).

3380 +

if (Val == 0) {

3381 +

if (ATy == BTy)

3382 +

return false;

3383 +

DEBUG(dbgs() << "LV: Zero dependence difference but different types");

3384 +

return true;

3385 +

}

3386 + 3387 +

assert(Val.isStrictlyPositive() && "Expect a positive value");

3388 + 3389 +

// Positive distance bigger than max vectorization factor.

3390 +

if (ATy != BTy) {

3391 +

DEBUG(dbgs() <<

3392 +

"LV: ReadWrite-Write positive dependency with different types");

3393 +

return false;

3394 +

}

3395 + 3396 +

unsigned Distance = (unsigned) Val.getZExtValue();

3397 + 3398 +

// Bail out early if passed-in parameters make vectorization not feasible.

3399 +

unsigned ForcedFactor = VectorizationFactor ? VectorizationFactor : 1;

3400 +

unsigned ForcedUnroll = VectorizationUnroll ? VectorizationUnroll : 1;

3401 + 3402 +

// The distance must be bigger than the size needed for a vectorized version

3403 +

// of the operation and the size of the vectorized operation must not be

3404 +

// bigger than the currrent maximum size.

3405 +

if (Distance < 2*TypeByteSize ||

3406 +

2*TypeByteSize > MaxSafeDepDistBytes ||

3407 +

Distance < TypeByteSize * ForcedUnroll * ForcedFactor) {

3408 +

DEBUG(dbgs() << "LV: Failure because of Positive distance "

3409 +

<< Val.getSExtValue() << "\n");

3410 +

return true;

3411 +

}

3412 + 3413 +

MaxSafeDepDistBytes = Distance < MaxSafeDepDistBytes ?

3414 +

Distance : MaxSafeDepDistBytes;

3415 + 3416 +

bool IsTrueDataDependence = (!AIsWrite && BIsWrite);

3417 +

if (IsTrueDataDependence &&

3418 +

couldPreventStoreLoadForward(Distance, TypeByteSize))

3419 +

return true;

3420 + 3421 +

DEBUG(dbgs() << "LV: Positive distance " << Val.getSExtValue() <<

3422 +

" with max VF=" << MaxSafeDepDistBytes/TypeByteSize << "\n");

3423 + 3424 +

return false;

3425 +

}

3426 + 3427 +

bool

3428 +

MemoryDepChecker::areDepsSafe(AccessAnalysis::DepCandidates &AccessSets,

3429 +

DenseSet<MemAccessInfo> &CheckDeps) {

3430 + 3431 +

MaxSafeDepDistBytes = -1U;

3432 +

while (!CheckDeps.empty()) {

3433 +

MemAccessInfo CurAccess = *CheckDeps.begin();

3434 + 3435 +

// Get the relevant memory access set.

3436 +

EquivalenceClasses<MemAccessInfo>::iterator I =

3437 +

AccessSets.findValue(AccessSets.getLeaderValue(CurAccess));

3438 + 3439 +

// Check accesses within this set.

3440 +

EquivalenceClasses<MemAccessInfo>::member_iterator AI, AE;

3441 +

AI = AccessSets.member_begin(I), AE = AccessSets.member_end();

3442 + 3443 +

// Check every access pair.

3444 +

while (AI != AE) {

3445 +

CheckDeps.erase(*AI);

3446 +

EquivalenceClasses<MemAccessInfo>::member_iterator OI = llvm::next(AI);

3447 +

while (OI != AE) {

3448 +

// Check every accessing instruction pair in program order.

3449 +

for (std::vector<unsigned>::iterator I1 = Accesses[*AI].begin(),

3450 +

I1E = Accesses[*AI].end(); I1 != I1E; ++I1)

3451 +

for (std::vector<unsigned>::iterator I2 = Accesses[*OI].begin(),

3452 +

I2E = Accesses[*OI].end(); I2 != I2E; ++I2) {

3453 +

if (*I1 < *I2 && isDependent(*AI, *I1, *OI, *I2))

3454 +

return false;

3455 +

if (*I2 < *I1 && isDependent(*OI, *I2, *AI, *I1))

3456 +

return false;

3457 +

}

3458 +

++OI;

3459 +

}

3460 +

AI++;

3461 +

}

3462 +

}

3463 +

return true;

3464 +

}

3465 + 3087 3466

AliasAnalysis::Location

3088 3467

LoopVectorizationLegality::getLoadStoreLocation(Instruction *Inst) {

3089 3468

if (StoreInst *Store = dyn_cast<StoreInst>(Inst))


RetroSearch is an open source project built by @garambo | Open a GitHub Issue

Search and Browse the WWW like it's 1997 | Search results from DuckDuckGo

HTML: 3.2 | Encoding: UTF-8 | Version: 0.7.4