1 //===-- VPlanTransforms.cpp - Utility VPlan to VPlan transforms -----------===// 2 // 3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4 // See https://llvm.org/LICENSE.txt for license information. 5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6 // 7 //===----------------------------------------------------------------------===// 8 /// 9 /// \file 10 /// This file implements a set of utility VPlan to VPlan transformations. 11 /// 12 //===----------------------------------------------------------------------===// 13 14 #include "VPlanTransforms.h" 15 #include "llvm/ADT/PostOrderIterator.h" 16 #include "llvm/ADT/SetVector.h" 17 #include "llvm/Analysis/IVDescriptors.h" 18 19 using namespace llvm; 20 21 void VPlanTransforms::VPInstructionsToVPRecipes( 22 Loop *OrigLoop, VPlanPtr &Plan, 23 function_ref<const InductionDescriptor *(PHINode *)> 24 GetIntOrFpInductionDescriptor, 25 SmallPtrSetImpl<Instruction *> &DeadInstructions, ScalarEvolution &SE) { 26 27 auto *TopRegion = cast<VPRegionBlock>(Plan->getEntry()); 28 ReversePostOrderTraversal<VPBlockBase *> RPOT(TopRegion->getEntry()); 29 30 for (VPBlockBase *Base : RPOT) { 31 // Do not widen instructions in pre-header and exit blocks. 32 if (Base->getNumPredecessors() == 0 || Base->getNumSuccessors() == 0) 33 continue; 34 35 VPBasicBlock *VPBB = Base->getEntryBasicBlock(); 36 // Introduce each ingredient into VPlan. 37 for (VPRecipeBase &Ingredient : llvm::make_early_inc_range(*VPBB)) { 38 VPValue *VPV = Ingredient.getVPSingleValue(); 39 Instruction *Inst = cast<Instruction>(VPV->getUnderlyingValue()); 40 if (DeadInstructions.count(Inst)) { 41 VPValue DummyValue; 42 VPV->replaceAllUsesWith(&DummyValue); 43 Ingredient.eraseFromParent(); 44 continue; 45 } 46 47 VPRecipeBase *NewRecipe = nullptr; 48 if (auto *VPPhi = dyn_cast<VPWidenPHIRecipe>(&Ingredient)) { 49 auto *Phi = cast<PHINode>(VPPhi->getUnderlyingValue()); 50 if (const auto *II = GetIntOrFpInductionDescriptor(Phi)) { 51 VPValue *Start = Plan->getOrAddVPValue(II->getStartValue()); 52 VPValue *Step = 53 vputils::getOrCreateVPValueForSCEVExpr(*Plan, II->getStep(), SE); 54 NewRecipe = new VPWidenIntOrFpInductionRecipe(Phi, Start, Step, *II, 55 false, true); 56 } else { 57 Plan->addVPValue(Phi, VPPhi); 58 continue; 59 } 60 } else { 61 assert(isa<VPInstruction>(&Ingredient) && 62 "only VPInstructions expected here"); 63 assert(!isa<PHINode>(Inst) && "phis should be handled above"); 64 // Create VPWidenMemoryInstructionRecipe for loads and stores. 65 if (LoadInst *Load = dyn_cast<LoadInst>(Inst)) { 66 NewRecipe = new VPWidenMemoryInstructionRecipe( 67 *Load, Plan->getOrAddVPValue(getLoadStorePointerOperand(Inst)), 68 nullptr /*Mask*/, false /*Consecutive*/, false /*Reverse*/); 69 } else if (StoreInst *Store = dyn_cast<StoreInst>(Inst)) { 70 NewRecipe = new VPWidenMemoryInstructionRecipe( 71 *Store, Plan->getOrAddVPValue(getLoadStorePointerOperand(Inst)), 72 Plan->getOrAddVPValue(Store->getValueOperand()), nullptr /*Mask*/, 73 false /*Consecutive*/, false /*Reverse*/); 74 } else if (GetElementPtrInst *GEP = dyn_cast<GetElementPtrInst>(Inst)) { 75 NewRecipe = new VPWidenGEPRecipe( 76 GEP, Plan->mapToVPValues(GEP->operands()), OrigLoop); 77 } else if (CallInst *CI = dyn_cast<CallInst>(Inst)) { 78 NewRecipe = 79 new VPWidenCallRecipe(*CI, Plan->mapToVPValues(CI->args())); 80 } else if (SelectInst *SI = dyn_cast<SelectInst>(Inst)) { 81 bool InvariantCond = 82 SE.isLoopInvariant(SE.getSCEV(SI->getOperand(0)), OrigLoop); 83 NewRecipe = new VPWidenSelectRecipe( 84 *SI, Plan->mapToVPValues(SI->operands()), InvariantCond); 85 } else { 86 NewRecipe = 87 new VPWidenRecipe(*Inst, Plan->mapToVPValues(Inst->operands())); 88 } 89 } 90 91 NewRecipe->insertBefore(&Ingredient); 92 if (NewRecipe->getNumDefinedValues() == 1) 93 VPV->replaceAllUsesWith(NewRecipe->getVPSingleValue()); 94 else 95 assert(NewRecipe->getNumDefinedValues() == 0 && 96 "Only recpies with zero or one defined values expected"); 97 Ingredient.eraseFromParent(); 98 Plan->removeVPValueFor(Inst); 99 for (auto *Def : NewRecipe->definedValues()) { 100 Plan->addVPValue(Inst, Def); 101 } 102 } 103 } 104 } 105 106 bool VPlanTransforms::sinkScalarOperands(VPlan &Plan) { 107 auto Iter = depth_first( 108 VPBlockRecursiveTraversalWrapper<VPBlockBase *>(Plan.getEntry())); 109 bool Changed = false; 110 // First, collect the operands of all predicated replicate recipes as seeds 111 // for sinking. 112 SetVector<std::pair<VPBasicBlock *, VPValue *>> WorkList; 113 for (VPBasicBlock *VPBB : VPBlockUtils::blocksOnly<VPBasicBlock>(Iter)) { 114 for (auto &Recipe : *VPBB) { 115 auto *RepR = dyn_cast<VPReplicateRecipe>(&Recipe); 116 if (!RepR || !RepR->isPredicated()) 117 continue; 118 for (VPValue *Op : RepR->operands()) 119 WorkList.insert(std::make_pair(RepR->getParent(), Op)); 120 } 121 } 122 123 // Try to sink each replicate recipe in the worklist. 124 while (!WorkList.empty()) { 125 VPBasicBlock *SinkTo; 126 VPValue *C; 127 std::tie(SinkTo, C) = WorkList.pop_back_val(); 128 auto *SinkCandidate = dyn_cast_or_null<VPReplicateRecipe>(C->Def); 129 if (!SinkCandidate || SinkCandidate->isUniform() || 130 SinkCandidate->getParent() == SinkTo || 131 SinkCandidate->mayHaveSideEffects() || 132 SinkCandidate->mayReadOrWriteMemory()) 133 continue; 134 135 bool NeedsDuplicating = false; 136 // All recipe users of the sink candidate must be in the same block SinkTo 137 // or all users outside of SinkTo must be uniform-after-vectorization ( 138 // i.e., only first lane is used) . In the latter case, we need to duplicate 139 // SinkCandidate. At the moment, we identify such UAV's by looking for the 140 // address operands of widened memory recipes. 141 auto CanSinkWithUser = [SinkTo, &NeedsDuplicating, 142 SinkCandidate](VPUser *U) { 143 auto *UI = dyn_cast<VPRecipeBase>(U); 144 if (!UI) 145 return false; 146 if (UI->getParent() == SinkTo) 147 return true; 148 auto *WidenI = dyn_cast<VPWidenMemoryInstructionRecipe>(UI); 149 if (WidenI && WidenI->getAddr() == SinkCandidate) { 150 NeedsDuplicating = true; 151 return true; 152 } 153 return false; 154 }; 155 if (!all_of(SinkCandidate->users(), CanSinkWithUser)) 156 continue; 157 158 if (NeedsDuplicating) { 159 Instruction *I = cast<Instruction>(SinkCandidate->getUnderlyingValue()); 160 auto *Clone = 161 new VPReplicateRecipe(I, SinkCandidate->operands(), true, false); 162 // TODO: add ".cloned" suffix to name of Clone's VPValue. 163 164 Clone->insertBefore(SinkCandidate); 165 SmallVector<VPUser *, 4> Users(SinkCandidate->users()); 166 for (auto *U : Users) { 167 auto *UI = cast<VPRecipeBase>(U); 168 if (UI->getParent() == SinkTo) 169 continue; 170 171 for (unsigned Idx = 0; Idx != UI->getNumOperands(); Idx++) { 172 if (UI->getOperand(Idx) != SinkCandidate) 173 continue; 174 UI->setOperand(Idx, Clone); 175 } 176 } 177 } 178 SinkCandidate->moveBefore(*SinkTo, SinkTo->getFirstNonPhi()); 179 for (VPValue *Op : SinkCandidate->operands()) 180 WorkList.insert(std::make_pair(SinkTo, Op)); 181 Changed = true; 182 } 183 return Changed; 184 } 185 186 /// If \p R is a region with a VPBranchOnMaskRecipe in the entry block, return 187 /// the mask. 188 VPValue *getPredicatedMask(VPRegionBlock *R) { 189 auto *EntryBB = dyn_cast<VPBasicBlock>(R->getEntry()); 190 if (!EntryBB || EntryBB->size() != 1 || 191 !isa<VPBranchOnMaskRecipe>(EntryBB->begin())) 192 return nullptr; 193 194 return cast<VPBranchOnMaskRecipe>(&*EntryBB->begin())->getOperand(0); 195 } 196 197 /// If \p R is a triangle region, return the 'then' block of the triangle. 198 static VPBasicBlock *getPredicatedThenBlock(VPRegionBlock *R) { 199 auto *EntryBB = cast<VPBasicBlock>(R->getEntry()); 200 if (EntryBB->getNumSuccessors() != 2) 201 return nullptr; 202 203 auto *Succ0 = dyn_cast<VPBasicBlock>(EntryBB->getSuccessors()[0]); 204 auto *Succ1 = dyn_cast<VPBasicBlock>(EntryBB->getSuccessors()[1]); 205 if (!Succ0 || !Succ1) 206 return nullptr; 207 208 if (Succ0->getNumSuccessors() + Succ1->getNumSuccessors() != 1) 209 return nullptr; 210 if (Succ0->getSingleSuccessor() == Succ1) 211 return Succ0; 212 if (Succ1->getSingleSuccessor() == Succ0) 213 return Succ1; 214 return nullptr; 215 } 216 217 bool VPlanTransforms::mergeReplicateRegions(VPlan &Plan) { 218 SetVector<VPRegionBlock *> DeletedRegions; 219 bool Changed = false; 220 221 // Collect region blocks to process up-front, to avoid iterator invalidation 222 // issues while merging regions. 223 SmallVector<VPRegionBlock *, 8> CandidateRegions( 224 VPBlockUtils::blocksOnly<VPRegionBlock>(depth_first( 225 VPBlockRecursiveTraversalWrapper<VPBlockBase *>(Plan.getEntry())))); 226 227 // Check if Base is a predicated triangle, followed by an empty block, 228 // followed by another predicate triangle. If that's the case, move the 229 // recipes from the first to the second triangle. 230 for (VPRegionBlock *Region1 : CandidateRegions) { 231 if (DeletedRegions.contains(Region1)) 232 continue; 233 auto *MiddleBasicBlock = 234 dyn_cast_or_null<VPBasicBlock>(Region1->getSingleSuccessor()); 235 if (!MiddleBasicBlock || !MiddleBasicBlock->empty()) 236 continue; 237 238 auto *Region2 = 239 dyn_cast_or_null<VPRegionBlock>(MiddleBasicBlock->getSingleSuccessor()); 240 if (!Region2) 241 continue; 242 243 VPValue *Mask1 = getPredicatedMask(Region1); 244 VPValue *Mask2 = getPredicatedMask(Region2); 245 if (!Mask1 || Mask1 != Mask2) 246 continue; 247 VPBasicBlock *Then1 = getPredicatedThenBlock(Region1); 248 VPBasicBlock *Then2 = getPredicatedThenBlock(Region2); 249 if (!Then1 || !Then2) 250 continue; 251 252 assert(Mask1 && Mask2 && "both region must have conditions"); 253 254 // Note: No fusion-preventing memory dependencies are expected in either 255 // region. Such dependencies should be rejected during earlier dependence 256 // checks, which guarantee accesses can be re-ordered for vectorization. 257 // 258 // Move recipes to the successor region. 259 for (VPRecipeBase &ToMove : make_early_inc_range(reverse(*Then1))) 260 ToMove.moveBefore(*Then2, Then2->getFirstNonPhi()); 261 262 auto *Merge1 = cast<VPBasicBlock>(Then1->getSingleSuccessor()); 263 auto *Merge2 = cast<VPBasicBlock>(Then2->getSingleSuccessor()); 264 265 // Move VPPredInstPHIRecipes from the merge block to the successor region's 266 // merge block. Update all users inside the successor region to use the 267 // original values. 268 for (VPRecipeBase &Phi1ToMove : make_early_inc_range(reverse(*Merge1))) { 269 VPValue *PredInst1 = 270 cast<VPPredInstPHIRecipe>(&Phi1ToMove)->getOperand(0); 271 VPValue *Phi1ToMoveV = Phi1ToMove.getVPSingleValue(); 272 SmallVector<VPUser *> Users(Phi1ToMoveV->users()); 273 for (VPUser *U : Users) { 274 auto *UI = dyn_cast<VPRecipeBase>(U); 275 if (!UI || UI->getParent() != Then2) 276 continue; 277 for (unsigned I = 0, E = U->getNumOperands(); I != E; ++I) { 278 if (Phi1ToMoveV != U->getOperand(I)) 279 continue; 280 U->setOperand(I, PredInst1); 281 } 282 } 283 284 Phi1ToMove.moveBefore(*Merge2, Merge2->begin()); 285 } 286 287 // Finally, remove the first region. 288 for (VPBlockBase *Pred : make_early_inc_range(Region1->getPredecessors())) { 289 VPBlockUtils::disconnectBlocks(Pred, Region1); 290 VPBlockUtils::connectBlocks(Pred, MiddleBasicBlock); 291 } 292 VPBlockUtils::disconnectBlocks(Region1, MiddleBasicBlock); 293 DeletedRegions.insert(Region1); 294 } 295 296 for (VPRegionBlock *ToDelete : DeletedRegions) 297 delete ToDelete; 298 return Changed; 299 } 300 301 void VPlanTransforms::removeRedundantInductionCasts(VPlan &Plan) { 302 for (auto &Phi : Plan.getVectorLoopRegion()->getEntryBasicBlock()->phis()) { 303 auto *IV = dyn_cast<VPWidenIntOrFpInductionRecipe>(&Phi); 304 if (!IV || IV->getTruncInst()) 305 continue; 306 307 // A sequence of IR Casts has potentially been recorded for IV, which 308 // *must be bypassed* when the IV is vectorized, because the vectorized IV 309 // will produce the desired casted value. This sequence forms a def-use 310 // chain and is provided in reverse order, ending with the cast that uses 311 // the IV phi. Search for the recipe of the last cast in the chain and 312 // replace it with the original IV. Note that only the final cast is 313 // expected to have users outside the cast-chain and the dead casts left 314 // over will be cleaned up later. 315 auto &Casts = IV->getInductionDescriptor().getCastInsts(); 316 VPValue *FindMyCast = IV; 317 for (Instruction *IRCast : reverse(Casts)) { 318 VPRecipeBase *FoundUserCast = nullptr; 319 for (auto *U : FindMyCast->users()) { 320 auto *UserCast = cast<VPRecipeBase>(U); 321 if (UserCast->getNumDefinedValues() == 1 && 322 UserCast->getVPSingleValue()->getUnderlyingValue() == IRCast) { 323 FoundUserCast = UserCast; 324 break; 325 } 326 } 327 FindMyCast = FoundUserCast->getVPSingleValue(); 328 } 329 FindMyCast->replaceAllUsesWith(IV); 330 } 331 } 332 333 void VPlanTransforms::removeRedundantCanonicalIVs(VPlan &Plan) { 334 VPCanonicalIVPHIRecipe *CanonicalIV = Plan.getCanonicalIV(); 335 VPWidenCanonicalIVRecipe *WidenNewIV = nullptr; 336 for (VPUser *U : CanonicalIV->users()) { 337 WidenNewIV = dyn_cast<VPWidenCanonicalIVRecipe>(U); 338 if (WidenNewIV) 339 break; 340 } 341 342 if (!WidenNewIV) 343 return; 344 345 VPBasicBlock *HeaderVPBB = Plan.getVectorLoopRegion()->getEntryBasicBlock(); 346 for (VPRecipeBase &Phi : HeaderVPBB->phis()) { 347 auto *WidenOriginalIV = dyn_cast<VPWidenIntOrFpInductionRecipe>(&Phi); 348 349 if (!WidenOriginalIV || !WidenOriginalIV->isCanonical() || 350 WidenOriginalIV->getScalarType() != WidenNewIV->getScalarType()) 351 continue; 352 353 // Replace WidenNewIV with WidenOriginalIV if WidenOriginalIV provides 354 // everything WidenNewIV's users need. That is, WidenOriginalIV will 355 // generate a vector phi or all users of WidenNewIV demand the first lane 356 // only. 357 if (WidenOriginalIV->needsVectorIV() || 358 vputils::onlyFirstLaneUsed(WidenNewIV)) { 359 WidenNewIV->replaceAllUsesWith(WidenOriginalIV); 360 WidenNewIV->eraseFromParent(); 361 return; 362 } 363 } 364 } 365 366 // Check for live-out users currently not modeled in VPlan. 367 // Note that exit values of inductions are generated independent of 368 // the recipe. This means VPWidenIntOrFpInductionRecipe & 369 // VPScalarIVStepsRecipe can be removed, independent of uses outside 370 // the loop. 371 // TODO: Remove once live-outs are modeled in VPlan. 372 static bool hasOutsideUser(Instruction &I, Loop &OrigLoop) { 373 return any_of(I.users(), [&OrigLoop](User *U) { 374 if (!OrigLoop.contains(cast<Instruction>(U))) 375 return true; 376 377 // Look through single-value phis in the loop, as they won't be modeled in 378 // VPlan and may be used outside the loop. 379 if (auto *PN = dyn_cast<PHINode>(U)) 380 if (PN->getNumIncomingValues() == 1) 381 return hasOutsideUser(*PN, OrigLoop); 382 383 return false; 384 }); 385 } 386 387 void VPlanTransforms::removeDeadRecipes(VPlan &Plan, Loop &OrigLoop) { 388 VPBasicBlock *Header = Plan.getVectorLoopRegion()->getEntryBasicBlock(); 389 // Check if \p R is used outside the loop, if required. 390 // TODO: Remove once live-outs are modeled in VPlan. 391 auto HasUsersOutsideLoop = [&OrigLoop](VPRecipeBase &R) { 392 // Exit values for induction recipes are generated independent of the 393 // recipes, expect for truncated inductions. Hence there is no need to check 394 // for users outside the loop for them. 395 if (isa<VPScalarIVStepsRecipe>(&R) || 396 (isa<VPWidenIntOrFpInductionRecipe>(&R) && 397 !isa<TruncInst>(R.getUnderlyingInstr()))) 398 return false; 399 return R.getUnderlyingInstr() && 400 hasOutsideUser(*R.getUnderlyingInstr(), OrigLoop); 401 }; 402 // Remove dead recipes in header block. The recipes in the block are processed 403 // in reverse order, to catch chains of dead recipes. 404 // TODO: Remove dead recipes across whole plan. 405 for (VPRecipeBase &R : make_early_inc_range(reverse(*Header))) { 406 if (R.mayHaveSideEffects() || 407 any_of(R.definedValues(), 408 [](VPValue *V) { return V->getNumUsers() > 0; }) || 409 HasUsersOutsideLoop(R)) 410 continue; 411 R.eraseFromParent(); 412 } 413 } 414 415 void VPlanTransforms::optimizeInductions(VPlan &Plan, ScalarEvolution &SE) { 416 SmallVector<VPRecipeBase *> ToRemove; 417 VPBasicBlock *HeaderVPBB = Plan.getVectorLoopRegion()->getEntryBasicBlock(); 418 for (VPRecipeBase &Phi : HeaderVPBB->phis()) { 419 auto *IV = dyn_cast<VPWidenIntOrFpInductionRecipe>(&Phi); 420 if (!IV || !IV->needsScalarIV()) 421 continue; 422 423 const InductionDescriptor &ID = IV->getInductionDescriptor(); 424 VPValue *Step = 425 vputils::getOrCreateVPValueForSCEVExpr(Plan, ID.getStep(), SE); 426 Instruction *TruncI = IV->getTruncInst(); 427 VPScalarIVStepsRecipe *Steps = new VPScalarIVStepsRecipe( 428 IV->getPHINode()->getType(), ID, Plan.getCanonicalIV(), 429 IV->getStartValue(), Step, TruncI ? TruncI->getType() : nullptr); 430 HeaderVPBB->insert(Steps, HeaderVPBB->getFirstNonPhi()); 431 432 // If there are no vector users of IV, simply update all users to use Step 433 // instead. 434 if (!IV->needsVectorIV()) { 435 IV->replaceAllUsesWith(Steps); 436 continue; 437 } 438 439 // Otherwise only update scalar users of IV to use Step instead. Use 440 // SetVector to ensure the list of users doesn't contain duplicates. 441 SetVector<VPUser *> Users(IV->user_begin(), IV->user_end()); 442 for (VPUser *U : Users) { 443 if (!U->usesScalars(IV)) 444 continue; 445 for (unsigned I = 0, E = U->getNumOperands(); I != E; I++) { 446 if (U->getOperand(I) != IV) 447 continue; 448 U->setOperand(I, Steps); 449 } 450 } 451 } 452 } 453 454 void VPlanTransforms::removeRedundantExpandSCEVRecipes(VPlan &Plan) { 455 DenseMap<const SCEV *, VPValue *> SCEV2VPV; 456 457 for (VPRecipeBase &R : 458 make_early_inc_range(*Plan.getEntry()->getEntryBasicBlock())) { 459 auto *ExpR = dyn_cast<VPExpandSCEVRecipe>(&R); 460 if (!ExpR) 461 continue; 462 463 auto I = SCEV2VPV.insert({ExpR->getSCEV(), ExpR}); 464 if (I.second) 465 continue; 466 ExpR->replaceAllUsesWith(I.first->second); 467 ExpR->eraseFromParent(); 468 } 469 } 470