1 //===----------- VectorUtils.cpp - Vectorizer utility functions -----------===// 2 // 3 // The LLVM Compiler Infrastructure 4 // 5 // This file is distributed under the University of Illinois Open Source 6 // License. See LICENSE.TXT for details. 7 // 8 //===----------------------------------------------------------------------===// 9 // 10 // This file defines vectorizer utilities. 11 // 12 //===----------------------------------------------------------------------===// 13 14 #include "llvm/Analysis/LoopInfo.h" 15 #include "llvm/Analysis/ScalarEvolutionExpressions.h" 16 #include "llvm/Analysis/ScalarEvolution.h" 17 #include "llvm/Analysis/VectorUtils.h" 18 #include "llvm/IR/GetElementPtrTypeIterator.h" 19 #include "llvm/IR/PatternMatch.h" 20 #include "llvm/IR/Value.h" 21 #include "llvm/IR/Constants.h" 22 23 using namespace llvm; 24 using namespace llvm::PatternMatch; 25 26 /// \brief Identify if the intrinsic is trivially vectorizable. 27 /// This method returns true if the intrinsic's argument types are all 28 /// scalars for the scalar form of the intrinsic and all vectors for 29 /// the vector form of the intrinsic. 30 bool llvm::isTriviallyVectorizable(Intrinsic::ID ID) { 31 switch (ID) { 32 case Intrinsic::sqrt: 33 case Intrinsic::sin: 34 case Intrinsic::cos: 35 case Intrinsic::exp: 36 case Intrinsic::exp2: 37 case Intrinsic::log: 38 case Intrinsic::log10: 39 case Intrinsic::log2: 40 case Intrinsic::fabs: 41 case Intrinsic::minnum: 42 case Intrinsic::maxnum: 43 case Intrinsic::copysign: 44 case Intrinsic::floor: 45 case Intrinsic::ceil: 46 case Intrinsic::trunc: 47 case Intrinsic::rint: 48 case Intrinsic::nearbyint: 49 case Intrinsic::round: 50 case Intrinsic::bswap: 51 case Intrinsic::ctpop: 52 case Intrinsic::pow: 53 case Intrinsic::fma: 54 case Intrinsic::fmuladd: 55 case Intrinsic::ctlz: 56 case Intrinsic::cttz: 57 case Intrinsic::powi: 58 return true; 59 default: 60 return false; 61 } 62 } 63 64 /// \brief Identifies if the intrinsic has a scalar operand. It check for 65 /// ctlz,cttz and powi special intrinsics whose argument is scalar. 66 bool llvm::hasVectorInstrinsicScalarOpd(Intrinsic::ID ID, 67 unsigned ScalarOpdIdx) { 68 switch (ID) { 69 case Intrinsic::ctlz: 70 case Intrinsic::cttz: 71 case Intrinsic::powi: 72 return (ScalarOpdIdx == 1); 73 default: 74 return false; 75 } 76 } 77 78 /// \brief Check call has a unary float signature 79 /// It checks following: 80 /// a) call should have a single argument 81 /// b) argument type should be floating point type 82 /// c) call instruction type and argument type should be same 83 /// d) call should only reads memory. 84 /// If all these condition is met then return ValidIntrinsicID 85 /// else return not_intrinsic. 86 Intrinsic::ID 87 llvm::checkUnaryFloatSignature(const CallInst &I, 88 Intrinsic::ID ValidIntrinsicID) { 89 if (I.getNumArgOperands() != 1 || 90 !I.getArgOperand(0)->getType()->isFloatingPointTy() || 91 I.getType() != I.getArgOperand(0)->getType() || !I.onlyReadsMemory()) 92 return Intrinsic::not_intrinsic; 93 94 return ValidIntrinsicID; 95 } 96 97 /// \brief Check call has a binary float signature 98 /// It checks following: 99 /// a) call should have 2 arguments. 100 /// b) arguments type should be floating point type 101 /// c) call instruction type and arguments type should be same 102 /// d) call should only reads memory. 103 /// If all these condition is met then return ValidIntrinsicID 104 /// else return not_intrinsic. 105 Intrinsic::ID 106 llvm::checkBinaryFloatSignature(const CallInst &I, 107 Intrinsic::ID ValidIntrinsicID) { 108 if (I.getNumArgOperands() != 2 || 109 !I.getArgOperand(0)->getType()->isFloatingPointTy() || 110 !I.getArgOperand(1)->getType()->isFloatingPointTy() || 111 I.getType() != I.getArgOperand(0)->getType() || 112 I.getType() != I.getArgOperand(1)->getType() || !I.onlyReadsMemory()) 113 return Intrinsic::not_intrinsic; 114 115 return ValidIntrinsicID; 116 } 117 118 /// \brief Returns intrinsic ID for call. 119 /// For the input call instruction it finds mapping intrinsic and returns 120 /// its ID, in case it does not found it return not_intrinsic. 121 Intrinsic::ID llvm::getIntrinsicIDForCall(CallInst *CI, 122 const TargetLibraryInfo *TLI) { 123 // If we have an intrinsic call, check if it is trivially vectorizable. 124 if (IntrinsicInst *II = dyn_cast<IntrinsicInst>(CI)) { 125 Intrinsic::ID ID = II->getIntrinsicID(); 126 if (isTriviallyVectorizable(ID) || ID == Intrinsic::lifetime_start || 127 ID == Intrinsic::lifetime_end || ID == Intrinsic::assume) 128 return ID; 129 return Intrinsic::not_intrinsic; 130 } 131 132 if (!TLI) 133 return Intrinsic::not_intrinsic; 134 135 LibFunc::Func Func; 136 Function *F = CI->getCalledFunction(); 137 // We're going to make assumptions on the semantics of the functions, check 138 // that the target knows that it's available in this environment and it does 139 // not have local linkage. 140 if (!F || F->hasLocalLinkage() || !TLI->getLibFunc(F->getName(), Func)) 141 return Intrinsic::not_intrinsic; 142 143 // Otherwise check if we have a call to a function that can be turned into a 144 // vector intrinsic. 145 switch (Func) { 146 default: 147 break; 148 case LibFunc::sin: 149 case LibFunc::sinf: 150 case LibFunc::sinl: 151 return checkUnaryFloatSignature(*CI, Intrinsic::sin); 152 case LibFunc::cos: 153 case LibFunc::cosf: 154 case LibFunc::cosl: 155 return checkUnaryFloatSignature(*CI, Intrinsic::cos); 156 case LibFunc::exp: 157 case LibFunc::expf: 158 case LibFunc::expl: 159 return checkUnaryFloatSignature(*CI, Intrinsic::exp); 160 case LibFunc::exp2: 161 case LibFunc::exp2f: 162 case LibFunc::exp2l: 163 return checkUnaryFloatSignature(*CI, Intrinsic::exp2); 164 case LibFunc::log: 165 case LibFunc::logf: 166 case LibFunc::logl: 167 return checkUnaryFloatSignature(*CI, Intrinsic::log); 168 case LibFunc::log10: 169 case LibFunc::log10f: 170 case LibFunc::log10l: 171 return checkUnaryFloatSignature(*CI, Intrinsic::log10); 172 case LibFunc::log2: 173 case LibFunc::log2f: 174 case LibFunc::log2l: 175 return checkUnaryFloatSignature(*CI, Intrinsic::log2); 176 case LibFunc::fabs: 177 case LibFunc::fabsf: 178 case LibFunc::fabsl: 179 return checkUnaryFloatSignature(*CI, Intrinsic::fabs); 180 case LibFunc::fmin: 181 case LibFunc::fminf: 182 case LibFunc::fminl: 183 return checkBinaryFloatSignature(*CI, Intrinsic::minnum); 184 case LibFunc::fmax: 185 case LibFunc::fmaxf: 186 case LibFunc::fmaxl: 187 return checkBinaryFloatSignature(*CI, Intrinsic::maxnum); 188 case LibFunc::copysign: 189 case LibFunc::copysignf: 190 case LibFunc::copysignl: 191 return checkBinaryFloatSignature(*CI, Intrinsic::copysign); 192 case LibFunc::floor: 193 case LibFunc::floorf: 194 case LibFunc::floorl: 195 return checkUnaryFloatSignature(*CI, Intrinsic::floor); 196 case LibFunc::ceil: 197 case LibFunc::ceilf: 198 case LibFunc::ceill: 199 return checkUnaryFloatSignature(*CI, Intrinsic::ceil); 200 case LibFunc::trunc: 201 case LibFunc::truncf: 202 case LibFunc::truncl: 203 return checkUnaryFloatSignature(*CI, Intrinsic::trunc); 204 case LibFunc::rint: 205 case LibFunc::rintf: 206 case LibFunc::rintl: 207 return checkUnaryFloatSignature(*CI, Intrinsic::rint); 208 case LibFunc::nearbyint: 209 case LibFunc::nearbyintf: 210 case LibFunc::nearbyintl: 211 return checkUnaryFloatSignature(*CI, Intrinsic::nearbyint); 212 case LibFunc::round: 213 case LibFunc::roundf: 214 case LibFunc::roundl: 215 return checkUnaryFloatSignature(*CI, Intrinsic::round); 216 case LibFunc::pow: 217 case LibFunc::powf: 218 case LibFunc::powl: 219 return checkBinaryFloatSignature(*CI, Intrinsic::pow); 220 } 221 222 return Intrinsic::not_intrinsic; 223 } 224 225 /// \brief Find the operand of the GEP that should be checked for consecutive 226 /// stores. This ignores trailing indices that have no effect on the final 227 /// pointer. 228 unsigned llvm::getGEPInductionOperand(const GetElementPtrInst *Gep) { 229 const DataLayout &DL = Gep->getModule()->getDataLayout(); 230 unsigned LastOperand = Gep->getNumOperands() - 1; 231 unsigned GEPAllocSize = DL.getTypeAllocSize( 232 cast<PointerType>(Gep->getType()->getScalarType())->getElementType()); 233 234 // Walk backwards and try to peel off zeros. 235 while (LastOperand > 1 && match(Gep->getOperand(LastOperand), m_Zero())) { 236 // Find the type we're currently indexing into. 237 gep_type_iterator GEPTI = gep_type_begin(Gep); 238 std::advance(GEPTI, LastOperand - 1); 239 240 // If it's a type with the same allocation size as the result of the GEP we 241 // can peel off the zero index. 242 if (DL.getTypeAllocSize(*GEPTI) != GEPAllocSize) 243 break; 244 --LastOperand; 245 } 246 247 return LastOperand; 248 } 249 250 /// \brief If the argument is a GEP, then returns the operand identified by 251 /// getGEPInductionOperand. However, if there is some other non-loop-invariant 252 /// operand, it returns that instead. 253 Value *llvm::stripGetElementPtr(Value *Ptr, ScalarEvolution *SE, Loop *Lp) { 254 GetElementPtrInst *GEP = dyn_cast<GetElementPtrInst>(Ptr); 255 if (!GEP) 256 return Ptr; 257 258 unsigned InductionOperand = getGEPInductionOperand(GEP); 259 260 // Check that all of the gep indices are uniform except for our induction 261 // operand. 262 for (unsigned i = 0, e = GEP->getNumOperands(); i != e; ++i) 263 if (i != InductionOperand && 264 !SE->isLoopInvariant(SE->getSCEV(GEP->getOperand(i)), Lp)) 265 return Ptr; 266 return GEP->getOperand(InductionOperand); 267 } 268 269 /// \brief If a value has only one user that is a CastInst, return it. 270 Value *llvm::getUniqueCastUse(Value *Ptr, Loop *Lp, Type *Ty) { 271 Value *UniqueCast = nullptr; 272 for (User *U : Ptr->users()) { 273 CastInst *CI = dyn_cast<CastInst>(U); 274 if (CI && CI->getType() == Ty) { 275 if (!UniqueCast) 276 UniqueCast = CI; 277 else 278 return nullptr; 279 } 280 } 281 return UniqueCast; 282 } 283 284 /// \brief Get the stride of a pointer access in a loop. Looks for symbolic 285 /// strides "a[i*stride]". Returns the symbolic stride, or null otherwise. 286 Value *llvm::getStrideFromPointer(Value *Ptr, ScalarEvolution *SE, Loop *Lp) { 287 auto *PtrTy = dyn_cast<PointerType>(Ptr->getType()); 288 if (!PtrTy || PtrTy->isAggregateType()) 289 return nullptr; 290 291 // Try to remove a gep instruction to make the pointer (actually index at this 292 // point) easier analyzable. If OrigPtr is equal to Ptr we are analzying the 293 // pointer, otherwise, we are analyzing the index. 294 Value *OrigPtr = Ptr; 295 296 // The size of the pointer access. 297 int64_t PtrAccessSize = 1; 298 299 Ptr = stripGetElementPtr(Ptr, SE, Lp); 300 const SCEV *V = SE->getSCEV(Ptr); 301 302 if (Ptr != OrigPtr) 303 // Strip off casts. 304 while (const SCEVCastExpr *C = dyn_cast<SCEVCastExpr>(V)) 305 V = C->getOperand(); 306 307 const SCEVAddRecExpr *S = dyn_cast<SCEVAddRecExpr>(V); 308 if (!S) 309 return nullptr; 310 311 V = S->getStepRecurrence(*SE); 312 if (!V) 313 return nullptr; 314 315 // Strip off the size of access multiplication if we are still analyzing the 316 // pointer. 317 if (OrigPtr == Ptr) { 318 const DataLayout &DL = Lp->getHeader()->getModule()->getDataLayout(); 319 DL.getTypeAllocSize(PtrTy->getElementType()); 320 if (const SCEVMulExpr *M = dyn_cast<SCEVMulExpr>(V)) { 321 if (M->getOperand(0)->getSCEVType() != scConstant) 322 return nullptr; 323 324 const APInt &APStepVal = 325 cast<SCEVConstant>(M->getOperand(0))->getValue()->getValue(); 326 327 // Huge step value - give up. 328 if (APStepVal.getBitWidth() > 64) 329 return nullptr; 330 331 int64_t StepVal = APStepVal.getSExtValue(); 332 if (PtrAccessSize != StepVal) 333 return nullptr; 334 V = M->getOperand(1); 335 } 336 } 337 338 // Strip off casts. 339 Type *StripedOffRecurrenceCast = nullptr; 340 if (const SCEVCastExpr *C = dyn_cast<SCEVCastExpr>(V)) { 341 StripedOffRecurrenceCast = C->getType(); 342 V = C->getOperand(); 343 } 344 345 // Look for the loop invariant symbolic value. 346 const SCEVUnknown *U = dyn_cast<SCEVUnknown>(V); 347 if (!U) 348 return nullptr; 349 350 Value *Stride = U->getValue(); 351 if (!Lp->isLoopInvariant(Stride)) 352 return nullptr; 353 354 // If we have stripped off the recurrence cast we have to make sure that we 355 // return the value that is used in this loop so that we can replace it later. 356 if (StripedOffRecurrenceCast) 357 Stride = getUniqueCastUse(Stride, Lp, StripedOffRecurrenceCast); 358 359 return Stride; 360 } 361 362 /// \brief Given a vector and an element number, see if the scalar value is 363 /// already around as a register, for example if it were inserted then extracted 364 /// from the vector. 365 Value *llvm::findScalarElement(Value *V, unsigned EltNo) { 366 assert(V->getType()->isVectorTy() && "Not looking at a vector?"); 367 VectorType *VTy = cast<VectorType>(V->getType()); 368 unsigned Width = VTy->getNumElements(); 369 if (EltNo >= Width) // Out of range access. 370 return UndefValue::get(VTy->getElementType()); 371 372 if (Constant *C = dyn_cast<Constant>(V)) 373 return C->getAggregateElement(EltNo); 374 375 if (InsertElementInst *III = dyn_cast<InsertElementInst>(V)) { 376 // If this is an insert to a variable element, we don't know what it is. 377 if (!isa<ConstantInt>(III->getOperand(2))) 378 return nullptr; 379 unsigned IIElt = cast<ConstantInt>(III->getOperand(2))->getZExtValue(); 380 381 // If this is an insert to the element we are looking for, return the 382 // inserted value. 383 if (EltNo == IIElt) 384 return III->getOperand(1); 385 386 // Otherwise, the insertelement doesn't modify the value, recurse on its 387 // vector input. 388 return findScalarElement(III->getOperand(0), EltNo); 389 } 390 391 if (ShuffleVectorInst *SVI = dyn_cast<ShuffleVectorInst>(V)) { 392 unsigned LHSWidth = SVI->getOperand(0)->getType()->getVectorNumElements(); 393 int InEl = SVI->getMaskValue(EltNo); 394 if (InEl < 0) 395 return UndefValue::get(VTy->getElementType()); 396 if (InEl < (int)LHSWidth) 397 return findScalarElement(SVI->getOperand(0), InEl); 398 return findScalarElement(SVI->getOperand(1), InEl - LHSWidth); 399 } 400 401 // Extract a value from a vector add operation with a constant zero. 402 Value *Val = nullptr; Constant *Con = nullptr; 403 if (match(V, m_Add(m_Value(Val), m_Constant(Con)))) 404 if (Constant *Elt = Con->getAggregateElement(EltNo)) 405 if (Elt->isNullValue()) 406 return findScalarElement(Val, EltNo); 407 408 // Otherwise, we don't know. 409 return nullptr; 410 } 411 412 /// \brief Get splat value if the input is a splat vector or return nullptr. 413 /// This function is not fully general. It checks only 2 cases: 414 /// the input value is (1) a splat constants vector or (2) a sequence 415 /// of instructions that broadcast a single value into a vector. 416 /// 417 llvm::Value *llvm::getSplatValue(Value *V) { 418 if (auto *CV = dyn_cast<ConstantDataVector>(V)) 419 return CV->getSplatValue(); 420 421 auto *ShuffleInst = dyn_cast<ShuffleVectorInst>(V); 422 if (!ShuffleInst) 423 return nullptr; 424 // All-zero (or undef) shuffle mask elements. 425 for (int MaskElt : ShuffleInst->getShuffleMask()) 426 if (MaskElt != 0 && MaskElt != -1) 427 return nullptr; 428 // The first shuffle source is 'insertelement' with index 0. 429 auto *InsertEltInst = 430 dyn_cast<InsertElementInst>(ShuffleInst->getOperand(0)); 431 if (!InsertEltInst || !isa<ConstantInt>(InsertEltInst->getOperand(2)) || 432 !cast<ConstantInt>(InsertEltInst->getOperand(2))->isNullValue()) 433 return nullptr; 434 435 return InsertEltInst->getOperand(1); 436 } 437