1 //===-- VPlanTransforms.cpp - Utility VPlan to VPlan transforms -----------===// 2 // 3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4 // See https://llvm.org/LICENSE.txt for license information. 5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6 // 7 //===----------------------------------------------------------------------===// 8 /// 9 /// \file 10 /// This file implements a set of utility VPlan to VPlan transformations. 11 /// 12 //===----------------------------------------------------------------------===// 13 14 #include "VPlanTransforms.h" 15 #include "llvm/ADT/PostOrderIterator.h" 16 #include "llvm/ADT/SetVector.h" 17 #include "llvm/Analysis/IVDescriptors.h" 18 19 using namespace llvm; 20 21 void VPlanTransforms::VPInstructionsToVPRecipes( 22 Loop *OrigLoop, VPlanPtr &Plan, 23 function_ref<const InductionDescriptor *(PHINode *)> 24 GetIntOrFpInductionDescriptor, 25 SmallPtrSetImpl<Instruction *> &DeadInstructions, ScalarEvolution &SE) { 26 27 ReversePostOrderTraversal<VPBlockRecursiveTraversalWrapper<VPBlockBase *>> 28 RPOT(Plan->getEntry()); 29 for (VPBasicBlock *VPBB : VPBlockUtils::blocksOnly<VPBasicBlock>(RPOT)) { 30 // Introduce each ingredient into VPlan. 31 for (VPRecipeBase &Ingredient : llvm::make_early_inc_range(*VPBB)) { 32 VPValue *VPV = Ingredient.getVPSingleValue(); 33 Instruction *Inst = cast<Instruction>(VPV->getUnderlyingValue()); 34 if (DeadInstructions.count(Inst)) { 35 VPValue DummyValue; 36 VPV->replaceAllUsesWith(&DummyValue); 37 Ingredient.eraseFromParent(); 38 continue; 39 } 40 41 VPRecipeBase *NewRecipe = nullptr; 42 if (auto *VPPhi = dyn_cast<VPWidenPHIRecipe>(&Ingredient)) { 43 auto *Phi = cast<PHINode>(VPPhi->getUnderlyingValue()); 44 if (const auto *II = GetIntOrFpInductionDescriptor(Phi)) { 45 VPValue *Start = Plan->getOrAddVPValue(II->getStartValue()); 46 VPValue *Step = 47 vputils::getOrCreateVPValueForSCEVExpr(*Plan, II->getStep(), SE); 48 NewRecipe = new VPWidenIntOrFpInductionRecipe(Phi, Start, Step, *II, 49 false, true); 50 } else { 51 Plan->addVPValue(Phi, VPPhi); 52 continue; 53 } 54 } else { 55 assert(isa<VPInstruction>(&Ingredient) && 56 "only VPInstructions expected here"); 57 assert(!isa<PHINode>(Inst) && "phis should be handled above"); 58 // Create VPWidenMemoryInstructionRecipe for loads and stores. 59 if (LoadInst *Load = dyn_cast<LoadInst>(Inst)) { 60 NewRecipe = new VPWidenMemoryInstructionRecipe( 61 *Load, Plan->getOrAddVPValue(getLoadStorePointerOperand(Inst)), 62 nullptr /*Mask*/, false /*Consecutive*/, false /*Reverse*/); 63 } else if (StoreInst *Store = dyn_cast<StoreInst>(Inst)) { 64 NewRecipe = new VPWidenMemoryInstructionRecipe( 65 *Store, Plan->getOrAddVPValue(getLoadStorePointerOperand(Inst)), 66 Plan->getOrAddVPValue(Store->getValueOperand()), nullptr /*Mask*/, 67 false /*Consecutive*/, false /*Reverse*/); 68 } else if (GetElementPtrInst *GEP = dyn_cast<GetElementPtrInst>(Inst)) { 69 NewRecipe = new VPWidenGEPRecipe( 70 GEP, Plan->mapToVPValues(GEP->operands()), OrigLoop); 71 } else if (CallInst *CI = dyn_cast<CallInst>(Inst)) { 72 NewRecipe = 73 new VPWidenCallRecipe(*CI, Plan->mapToVPValues(CI->args())); 74 } else if (SelectInst *SI = dyn_cast<SelectInst>(Inst)) { 75 bool InvariantCond = 76 SE.isLoopInvariant(SE.getSCEV(SI->getOperand(0)), OrigLoop); 77 NewRecipe = new VPWidenSelectRecipe( 78 *SI, Plan->mapToVPValues(SI->operands()), InvariantCond); 79 } else { 80 NewRecipe = 81 new VPWidenRecipe(*Inst, Plan->mapToVPValues(Inst->operands())); 82 } 83 } 84 85 NewRecipe->insertBefore(&Ingredient); 86 if (NewRecipe->getNumDefinedValues() == 1) 87 VPV->replaceAllUsesWith(NewRecipe->getVPSingleValue()); 88 else 89 assert(NewRecipe->getNumDefinedValues() == 0 && 90 "Only recpies with zero or one defined values expected"); 91 Ingredient.eraseFromParent(); 92 Plan->removeVPValueFor(Inst); 93 for (auto *Def : NewRecipe->definedValues()) { 94 Plan->addVPValue(Inst, Def); 95 } 96 } 97 } 98 } 99 100 bool VPlanTransforms::sinkScalarOperands(VPlan &Plan) { 101 auto Iter = depth_first( 102 VPBlockRecursiveTraversalWrapper<VPBlockBase *>(Plan.getEntry())); 103 bool Changed = false; 104 // First, collect the operands of all predicated replicate recipes as seeds 105 // for sinking. 106 SetVector<std::pair<VPBasicBlock *, VPValue *>> WorkList; 107 for (VPBasicBlock *VPBB : VPBlockUtils::blocksOnly<VPBasicBlock>(Iter)) { 108 for (auto &Recipe : *VPBB) { 109 auto *RepR = dyn_cast<VPReplicateRecipe>(&Recipe); 110 if (!RepR || !RepR->isPredicated()) 111 continue; 112 for (VPValue *Op : RepR->operands()) 113 WorkList.insert(std::make_pair(RepR->getParent(), Op)); 114 } 115 } 116 117 // Try to sink each replicate recipe in the worklist. 118 while (!WorkList.empty()) { 119 VPBasicBlock *SinkTo; 120 VPValue *C; 121 std::tie(SinkTo, C) = WorkList.pop_back_val(); 122 auto *SinkCandidate = dyn_cast_or_null<VPReplicateRecipe>(C->Def); 123 if (!SinkCandidate || SinkCandidate->isUniform() || 124 SinkCandidate->getParent() == SinkTo || 125 SinkCandidate->mayHaveSideEffects() || 126 SinkCandidate->mayReadOrWriteMemory()) 127 continue; 128 129 bool NeedsDuplicating = false; 130 // All recipe users of the sink candidate must be in the same block SinkTo 131 // or all users outside of SinkTo must be uniform-after-vectorization ( 132 // i.e., only first lane is used) . In the latter case, we need to duplicate 133 // SinkCandidate. At the moment, we identify such UAV's by looking for the 134 // address operands of widened memory recipes. 135 auto CanSinkWithUser = [SinkTo, &NeedsDuplicating, 136 SinkCandidate](VPUser *U) { 137 auto *UI = dyn_cast<VPRecipeBase>(U); 138 if (!UI) 139 return false; 140 if (UI->getParent() == SinkTo) 141 return true; 142 auto *WidenI = dyn_cast<VPWidenMemoryInstructionRecipe>(UI); 143 if (WidenI && WidenI->getAddr() == SinkCandidate) { 144 NeedsDuplicating = true; 145 return true; 146 } 147 return false; 148 }; 149 if (!all_of(SinkCandidate->users(), CanSinkWithUser)) 150 continue; 151 152 if (NeedsDuplicating) { 153 Instruction *I = cast<Instruction>(SinkCandidate->getUnderlyingValue()); 154 auto *Clone = 155 new VPReplicateRecipe(I, SinkCandidate->operands(), true, false); 156 // TODO: add ".cloned" suffix to name of Clone's VPValue. 157 158 Clone->insertBefore(SinkCandidate); 159 SmallVector<VPUser *, 4> Users(SinkCandidate->users()); 160 for (auto *U : Users) { 161 auto *UI = cast<VPRecipeBase>(U); 162 if (UI->getParent() == SinkTo) 163 continue; 164 165 for (unsigned Idx = 0; Idx != UI->getNumOperands(); Idx++) { 166 if (UI->getOperand(Idx) != SinkCandidate) 167 continue; 168 UI->setOperand(Idx, Clone); 169 } 170 } 171 } 172 SinkCandidate->moveBefore(*SinkTo, SinkTo->getFirstNonPhi()); 173 for (VPValue *Op : SinkCandidate->operands()) 174 WorkList.insert(std::make_pair(SinkTo, Op)); 175 Changed = true; 176 } 177 return Changed; 178 } 179 180 /// If \p R is a region with a VPBranchOnMaskRecipe in the entry block, return 181 /// the mask. 182 VPValue *getPredicatedMask(VPRegionBlock *R) { 183 auto *EntryBB = dyn_cast<VPBasicBlock>(R->getEntry()); 184 if (!EntryBB || EntryBB->size() != 1 || 185 !isa<VPBranchOnMaskRecipe>(EntryBB->begin())) 186 return nullptr; 187 188 return cast<VPBranchOnMaskRecipe>(&*EntryBB->begin())->getOperand(0); 189 } 190 191 /// If \p R is a triangle region, return the 'then' block of the triangle. 192 static VPBasicBlock *getPredicatedThenBlock(VPRegionBlock *R) { 193 auto *EntryBB = cast<VPBasicBlock>(R->getEntry()); 194 if (EntryBB->getNumSuccessors() != 2) 195 return nullptr; 196 197 auto *Succ0 = dyn_cast<VPBasicBlock>(EntryBB->getSuccessors()[0]); 198 auto *Succ1 = dyn_cast<VPBasicBlock>(EntryBB->getSuccessors()[1]); 199 if (!Succ0 || !Succ1) 200 return nullptr; 201 202 if (Succ0->getNumSuccessors() + Succ1->getNumSuccessors() != 1) 203 return nullptr; 204 if (Succ0->getSingleSuccessor() == Succ1) 205 return Succ0; 206 if (Succ1->getSingleSuccessor() == Succ0) 207 return Succ1; 208 return nullptr; 209 } 210 211 bool VPlanTransforms::mergeReplicateRegions(VPlan &Plan) { 212 SetVector<VPRegionBlock *> DeletedRegions; 213 bool Changed = false; 214 215 // Collect region blocks to process up-front, to avoid iterator invalidation 216 // issues while merging regions. 217 SmallVector<VPRegionBlock *, 8> CandidateRegions( 218 VPBlockUtils::blocksOnly<VPRegionBlock>(depth_first( 219 VPBlockRecursiveTraversalWrapper<VPBlockBase *>(Plan.getEntry())))); 220 221 // Check if Base is a predicated triangle, followed by an empty block, 222 // followed by another predicate triangle. If that's the case, move the 223 // recipes from the first to the second triangle. 224 for (VPRegionBlock *Region1 : CandidateRegions) { 225 if (DeletedRegions.contains(Region1)) 226 continue; 227 auto *MiddleBasicBlock = 228 dyn_cast_or_null<VPBasicBlock>(Region1->getSingleSuccessor()); 229 if (!MiddleBasicBlock || !MiddleBasicBlock->empty()) 230 continue; 231 232 auto *Region2 = 233 dyn_cast_or_null<VPRegionBlock>(MiddleBasicBlock->getSingleSuccessor()); 234 if (!Region2) 235 continue; 236 237 VPValue *Mask1 = getPredicatedMask(Region1); 238 VPValue *Mask2 = getPredicatedMask(Region2); 239 if (!Mask1 || Mask1 != Mask2) 240 continue; 241 VPBasicBlock *Then1 = getPredicatedThenBlock(Region1); 242 VPBasicBlock *Then2 = getPredicatedThenBlock(Region2); 243 if (!Then1 || !Then2) 244 continue; 245 246 assert(Mask1 && Mask2 && "both region must have conditions"); 247 248 // Note: No fusion-preventing memory dependencies are expected in either 249 // region. Such dependencies should be rejected during earlier dependence 250 // checks, which guarantee accesses can be re-ordered for vectorization. 251 // 252 // Move recipes to the successor region. 253 for (VPRecipeBase &ToMove : make_early_inc_range(reverse(*Then1))) 254 ToMove.moveBefore(*Then2, Then2->getFirstNonPhi()); 255 256 auto *Merge1 = cast<VPBasicBlock>(Then1->getSingleSuccessor()); 257 auto *Merge2 = cast<VPBasicBlock>(Then2->getSingleSuccessor()); 258 259 // Move VPPredInstPHIRecipes from the merge block to the successor region's 260 // merge block. Update all users inside the successor region to use the 261 // original values. 262 for (VPRecipeBase &Phi1ToMove : make_early_inc_range(reverse(*Merge1))) { 263 VPValue *PredInst1 = 264 cast<VPPredInstPHIRecipe>(&Phi1ToMove)->getOperand(0); 265 VPValue *Phi1ToMoveV = Phi1ToMove.getVPSingleValue(); 266 SmallVector<VPUser *> Users(Phi1ToMoveV->users()); 267 for (VPUser *U : Users) { 268 auto *UI = dyn_cast<VPRecipeBase>(U); 269 if (!UI || UI->getParent() != Then2) 270 continue; 271 for (unsigned I = 0, E = U->getNumOperands(); I != E; ++I) { 272 if (Phi1ToMoveV != U->getOperand(I)) 273 continue; 274 U->setOperand(I, PredInst1); 275 } 276 } 277 278 Phi1ToMove.moveBefore(*Merge2, Merge2->begin()); 279 } 280 281 // Finally, remove the first region. 282 for (VPBlockBase *Pred : make_early_inc_range(Region1->getPredecessors())) { 283 VPBlockUtils::disconnectBlocks(Pred, Region1); 284 VPBlockUtils::connectBlocks(Pred, MiddleBasicBlock); 285 } 286 VPBlockUtils::disconnectBlocks(Region1, MiddleBasicBlock); 287 DeletedRegions.insert(Region1); 288 } 289 290 for (VPRegionBlock *ToDelete : DeletedRegions) 291 delete ToDelete; 292 return Changed; 293 } 294 295 void VPlanTransforms::removeRedundantInductionCasts(VPlan &Plan) { 296 for (auto &Phi : Plan.getVectorLoopRegion()->getEntryBasicBlock()->phis()) { 297 auto *IV = dyn_cast<VPWidenIntOrFpInductionRecipe>(&Phi); 298 if (!IV || IV->getTruncInst()) 299 continue; 300 301 // A sequence of IR Casts has potentially been recorded for IV, which 302 // *must be bypassed* when the IV is vectorized, because the vectorized IV 303 // will produce the desired casted value. This sequence forms a def-use 304 // chain and is provided in reverse order, ending with the cast that uses 305 // the IV phi. Search for the recipe of the last cast in the chain and 306 // replace it with the original IV. Note that only the final cast is 307 // expected to have users outside the cast-chain and the dead casts left 308 // over will be cleaned up later. 309 auto &Casts = IV->getInductionDescriptor().getCastInsts(); 310 VPValue *FindMyCast = IV; 311 for (Instruction *IRCast : reverse(Casts)) { 312 VPRecipeBase *FoundUserCast = nullptr; 313 for (auto *U : FindMyCast->users()) { 314 auto *UserCast = cast<VPRecipeBase>(U); 315 if (UserCast->getNumDefinedValues() == 1 && 316 UserCast->getVPSingleValue()->getUnderlyingValue() == IRCast) { 317 FoundUserCast = UserCast; 318 break; 319 } 320 } 321 FindMyCast = FoundUserCast->getVPSingleValue(); 322 } 323 FindMyCast->replaceAllUsesWith(IV); 324 } 325 } 326 327 void VPlanTransforms::removeRedundantCanonicalIVs(VPlan &Plan) { 328 VPCanonicalIVPHIRecipe *CanonicalIV = Plan.getCanonicalIV(); 329 VPWidenCanonicalIVRecipe *WidenNewIV = nullptr; 330 for (VPUser *U : CanonicalIV->users()) { 331 WidenNewIV = dyn_cast<VPWidenCanonicalIVRecipe>(U); 332 if (WidenNewIV) 333 break; 334 } 335 336 if (!WidenNewIV) 337 return; 338 339 VPBasicBlock *HeaderVPBB = Plan.getVectorLoopRegion()->getEntryBasicBlock(); 340 for (VPRecipeBase &Phi : HeaderVPBB->phis()) { 341 auto *WidenOriginalIV = dyn_cast<VPWidenIntOrFpInductionRecipe>(&Phi); 342 343 if (!WidenOriginalIV || !WidenOriginalIV->isCanonical() || 344 WidenOriginalIV->getScalarType() != WidenNewIV->getScalarType()) 345 continue; 346 347 // Replace WidenNewIV with WidenOriginalIV if WidenOriginalIV provides 348 // everything WidenNewIV's users need. That is, WidenOriginalIV will 349 // generate a vector phi or all users of WidenNewIV demand the first lane 350 // only. 351 if (WidenOriginalIV->needsVectorIV() || 352 vputils::onlyFirstLaneUsed(WidenNewIV)) { 353 WidenNewIV->replaceAllUsesWith(WidenOriginalIV); 354 WidenNewIV->eraseFromParent(); 355 return; 356 } 357 } 358 } 359 360 void VPlanTransforms::removeDeadRecipes(VPlan &Plan, Loop &OrigLoop) { 361 VPBasicBlock *Header = Plan.getVectorLoopRegion()->getEntryBasicBlock(); 362 // Remove dead recipes in header block. The recipes in the block are processed 363 // in reverse order, to catch chains of dead recipes. 364 // TODO: Remove dead recipes across whole plan. 365 for (VPRecipeBase &R : make_early_inc_range(reverse(*Header))) { 366 if (R.mayHaveSideEffects() || any_of(R.definedValues(), [](VPValue *V) { 367 return V->getNumUsers() > 0; 368 })) 369 continue; 370 R.eraseFromParent(); 371 } 372 } 373 374 void VPlanTransforms::optimizeInductions(VPlan &Plan, ScalarEvolution &SE) { 375 SmallVector<VPRecipeBase *> ToRemove; 376 VPBasicBlock *HeaderVPBB = Plan.getVectorLoopRegion()->getEntryBasicBlock(); 377 for (VPRecipeBase &Phi : HeaderVPBB->phis()) { 378 auto *IV = dyn_cast<VPWidenIntOrFpInductionRecipe>(&Phi); 379 if (!IV || !IV->needsScalarIV()) 380 continue; 381 382 const InductionDescriptor &ID = IV->getInductionDescriptor(); 383 VPValue *Step = 384 vputils::getOrCreateVPValueForSCEVExpr(Plan, ID.getStep(), SE); 385 Instruction *TruncI = IV->getTruncInst(); 386 VPScalarIVStepsRecipe *Steps = new VPScalarIVStepsRecipe( 387 IV->getPHINode()->getType(), ID, Plan.getCanonicalIV(), 388 IV->getStartValue(), Step, TruncI ? TruncI->getType() : nullptr); 389 HeaderVPBB->insert(Steps, HeaderVPBB->getFirstNonPhi()); 390 391 // If there are no vector users of IV, simply update all users to use Step 392 // instead. 393 if (!IV->needsVectorIV()) { 394 IV->replaceAllUsesWith(Steps); 395 continue; 396 } 397 398 // Otherwise only update scalar users of IV to use Step instead. Use 399 // SetVector to ensure the list of users doesn't contain duplicates. 400 SetVector<VPUser *> Users(IV->user_begin(), IV->user_end()); 401 for (VPUser *U : Users) { 402 if (!U->usesScalars(IV)) 403 continue; 404 for (unsigned I = 0, E = U->getNumOperands(); I != E; I++) { 405 if (U->getOperand(I) != IV) 406 continue; 407 U->setOperand(I, Steps); 408 } 409 } 410 } 411 } 412 413 void VPlanTransforms::removeRedundantExpandSCEVRecipes(VPlan &Plan) { 414 DenseMap<const SCEV *, VPValue *> SCEV2VPV; 415 416 for (VPRecipeBase &R : 417 make_early_inc_range(*Plan.getEntry()->getEntryBasicBlock())) { 418 auto *ExpR = dyn_cast<VPExpandSCEVRecipe>(&R); 419 if (!ExpR) 420 continue; 421 422 auto I = SCEV2VPV.insert({ExpR->getSCEV(), ExpR}); 423 if (I.second) 424 continue; 425 ExpR->replaceAllUsesWith(I.first->second); 426 ExpR->eraseFromParent(); 427 } 428 } 429