1 //===- MVETailPredication.cpp - MVE Tail Predication ----------------------===// 2 // 3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4 // See https://llvm.org/LICENSE.txt for license information. 5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6 // 7 //===----------------------------------------------------------------------===// 8 // 9 /// \file 10 /// Armv8.1m introduced MVE, M-Profile Vector Extension, and low-overhead 11 /// branches to help accelerate DSP applications. These two extensions can be 12 /// combined to provide implicit vector predication within a low-overhead loop. 13 /// The HardwareLoops pass inserts intrinsics identifying loops that the 14 /// backend will attempt to convert into a low-overhead loop. The vectorizer is 15 /// responsible for generating a vectorized loop in which the lanes are 16 /// predicated upon the iteration counter. This pass looks at these predicated 17 /// vector loops, that are targets for low-overhead loops, and prepares it for 18 /// code generation. Once the vectorizer has produced a masked loop, there's a 19 /// couple of final forms: 20 /// - A tail-predicated loop, with implicit predication. 21 /// - A loop containing multiple VCPT instructions, predicating multiple VPT 22 /// blocks of instructions operating on different vector types. 23 /// 24 /// This pass inserts the inserts the VCTP intrinsic to represent the effect of 25 /// tail predication. This will be picked up by the ARM Low-overhead loop pass, 26 /// which performs the final transformation to a DLSTP or WLSTP tail-predicated 27 /// loop. 28 29 #include "ARM.h" 30 #include "ARMSubtarget.h" 31 #include "llvm/Analysis/LoopInfo.h" 32 #include "llvm/Analysis/LoopPass.h" 33 #include "llvm/Analysis/ScalarEvolution.h" 34 #include "llvm/Analysis/ScalarEvolutionExpander.h" 35 #include "llvm/Analysis/ScalarEvolutionExpressions.h" 36 #include "llvm/Analysis/TargetTransformInfo.h" 37 #include "llvm/CodeGen/TargetPassConfig.h" 38 #include "llvm/InitializePasses.h" 39 #include "llvm/IR/IRBuilder.h" 40 #include "llvm/IR/Instructions.h" 41 #include "llvm/IR/IntrinsicsARM.h" 42 #include "llvm/IR/PatternMatch.h" 43 #include "llvm/Support/Debug.h" 44 #include "llvm/Transforms/Utils/BasicBlockUtils.h" 45 #include "llvm/Transforms/Utils/LoopUtils.h" 46 47 using namespace llvm; 48 49 #define DEBUG_TYPE "mve-tail-predication" 50 #define DESC "Transform predicated vector loops to use MVE tail predication" 51 52 cl::opt<bool> 53 DisableTailPredication("disable-mve-tail-predication", cl::Hidden, 54 cl::init(true), 55 cl::desc("Disable MVE Tail Predication")); 56 namespace { 57 58 class MVETailPredication : public LoopPass { 59 SmallVector<IntrinsicInst*, 4> MaskedInsts; 60 Loop *L = nullptr; 61 LoopInfo *LI = nullptr; 62 const DataLayout *DL; 63 DominatorTree *DT = nullptr; 64 ScalarEvolution *SE = nullptr; 65 TargetTransformInfo *TTI = nullptr; 66 TargetLibraryInfo *TLI = nullptr; 67 bool ClonedVCTPInExitBlock = false; 68 69 public: 70 static char ID; 71 72 MVETailPredication() : LoopPass(ID) { } 73 74 void getAnalysisUsage(AnalysisUsage &AU) const override { 75 AU.addRequired<ScalarEvolutionWrapperPass>(); 76 AU.addRequired<LoopInfoWrapperPass>(); 77 AU.addRequired<TargetPassConfig>(); 78 AU.addRequired<TargetTransformInfoWrapperPass>(); 79 AU.addRequired<DominatorTreeWrapperPass>(); 80 AU.addRequired<TargetLibraryInfoWrapperPass>(); 81 AU.addPreserved<LoopInfoWrapperPass>(); 82 AU.setPreservesCFG(); 83 } 84 85 bool runOnLoop(Loop *L, LPPassManager&) override; 86 87 private: 88 89 /// Perform the relevant checks on the loop and convert if possible. 90 bool TryConvert(Value *TripCount); 91 92 /// Return whether this is a vectorized loop, that contains masked 93 /// load/stores. 94 bool IsPredicatedVectorLoop(); 95 96 /// Compute a value for the total number of elements that the predicated 97 /// loop will process. 98 Value *ComputeElements(Value *TripCount, VectorType *VecTy); 99 100 /// Is the icmp that generates an i1 vector, based upon a loop counter 101 /// and a limit that is defined outside the loop. 102 bool isTailPredicate(Instruction *Predicate, Value *NumElements); 103 104 /// Insert the intrinsic to represent the effect of tail predication. 105 void InsertVCTPIntrinsic(Instruction *Predicate, 106 DenseMap<Instruction*, Instruction*> &NewPredicates, 107 VectorType *VecTy, 108 Value *NumElements); 109 110 /// Rematerialize the iteration count in exit blocks, which enables 111 /// ARMLowOverheadLoops to better optimise away loop update statements inside 112 /// hardware-loops. 113 void RematerializeIterCount(); 114 }; 115 116 } // end namespace 117 118 static bool IsDecrement(Instruction &I) { 119 auto *Call = dyn_cast<IntrinsicInst>(&I); 120 if (!Call) 121 return false; 122 123 Intrinsic::ID ID = Call->getIntrinsicID(); 124 return ID == Intrinsic::loop_decrement_reg; 125 } 126 127 static bool IsMasked(Instruction *I) { 128 auto *Call = dyn_cast<IntrinsicInst>(I); 129 if (!Call) 130 return false; 131 132 Intrinsic::ID ID = Call->getIntrinsicID(); 133 // TODO: Support gather/scatter expand/compress operations. 134 return ID == Intrinsic::masked_store || ID == Intrinsic::masked_load; 135 } 136 137 void MVETailPredication::RematerializeIterCount() { 138 SmallVector<WeakTrackingVH, 16> DeadInsts; 139 SCEVExpander Rewriter(*SE, *DL, "mvetp"); 140 ReplaceExitVal ReplaceExitValue = AlwaysRepl; 141 142 formLCSSARecursively(*L, *DT, LI, SE); 143 rewriteLoopExitValues(L, LI, TLI, SE, Rewriter, DT, ReplaceExitValue, 144 DeadInsts); 145 } 146 147 bool MVETailPredication::runOnLoop(Loop *L, LPPassManager&) { 148 if (skipLoop(L) || DisableTailPredication) 149 return false; 150 151 MaskedInsts.clear(); 152 Function &F = *L->getHeader()->getParent(); 153 auto &TPC = getAnalysis<TargetPassConfig>(); 154 auto &TM = TPC.getTM<TargetMachine>(); 155 auto *ST = &TM.getSubtarget<ARMSubtarget>(F); 156 DT = &getAnalysis<DominatorTreeWrapperPass>().getDomTree(); 157 LI = &getAnalysis<LoopInfoWrapperPass>().getLoopInfo(); 158 TTI = &getAnalysis<TargetTransformInfoWrapperPass>().getTTI(F); 159 SE = &getAnalysis<ScalarEvolutionWrapperPass>().getSE(); 160 auto *TLIP = getAnalysisIfAvailable<TargetLibraryInfoWrapperPass>(); 161 TLI = TLIP ? &TLIP->getTLI(*L->getHeader()->getParent()) : nullptr; 162 DL = &L->getHeader()->getModule()->getDataLayout(); 163 this->L = L; 164 165 // The MVE and LOB extensions are combined to enable tail-predication, but 166 // there's nothing preventing us from generating VCTP instructions for v8.1m. 167 if (!ST->hasMVEIntegerOps() || !ST->hasV8_1MMainlineOps()) { 168 LLVM_DEBUG(dbgs() << "ARM TP: Not a v8.1m.main+mve target.\n"); 169 return false; 170 } 171 172 BasicBlock *Preheader = L->getLoopPreheader(); 173 if (!Preheader) 174 return false; 175 176 auto FindLoopIterations = [](BasicBlock *BB) -> IntrinsicInst* { 177 for (auto &I : *BB) { 178 auto *Call = dyn_cast<IntrinsicInst>(&I); 179 if (!Call) 180 continue; 181 182 Intrinsic::ID ID = Call->getIntrinsicID(); 183 if (ID == Intrinsic::set_loop_iterations || 184 ID == Intrinsic::test_set_loop_iterations) 185 return cast<IntrinsicInst>(&I); 186 } 187 return nullptr; 188 }; 189 190 // Look for the hardware loop intrinsic that sets the iteration count. 191 IntrinsicInst *Setup = FindLoopIterations(Preheader); 192 193 // The test.set iteration could live in the pre-preheader. 194 if (!Setup) { 195 if (!Preheader->getSinglePredecessor()) 196 return false; 197 Setup = FindLoopIterations(Preheader->getSinglePredecessor()); 198 if (!Setup) 199 return false; 200 } 201 202 // Search for the hardware loop intrinic that decrements the loop counter. 203 IntrinsicInst *Decrement = nullptr; 204 for (auto *BB : L->getBlocks()) { 205 for (auto &I : *BB) { 206 if (IsDecrement(I)) { 207 Decrement = cast<IntrinsicInst>(&I); 208 break; 209 } 210 } 211 } 212 213 if (!Decrement) 214 return false; 215 216 LLVM_DEBUG(dbgs() << "ARM TP: Running on Loop: " << *L << *Setup << "\n" 217 << *Decrement << "\n"); 218 219 if (TryConvert(Setup->getArgOperand(0))) { 220 if (ClonedVCTPInExitBlock) 221 RematerializeIterCount(); 222 return true; 223 } 224 225 return false; 226 } 227 228 bool MVETailPredication::isTailPredicate(Instruction *I, Value *NumElements) { 229 // Look for the following: 230 231 // %trip.count.minus.1 = add i32 %N, -1 232 // %broadcast.splatinsert10 = insertelement <4 x i32> undef, 233 // i32 %trip.count.minus.1, i32 0 234 // %broadcast.splat11 = shufflevector <4 x i32> %broadcast.splatinsert10, 235 // <4 x i32> undef, 236 // <4 x i32> zeroinitializer 237 // ... 238 // ... 239 // %index = phi i32 240 // %broadcast.splatinsert = insertelement <4 x i32> undef, i32 %index, i32 0 241 // %broadcast.splat = shufflevector <4 x i32> %broadcast.splatinsert, 242 // <4 x i32> undef, 243 // <4 x i32> zeroinitializer 244 // %induction = add <4 x i32> %broadcast.splat, <i32 0, i32 1, i32 2, i32 3> 245 // %pred = icmp ule <4 x i32> %induction, %broadcast.splat11 246 247 // And return whether V == %pred. 248 249 using namespace PatternMatch; 250 251 CmpInst::Predicate Pred; 252 Instruction *Shuffle = nullptr; 253 Instruction *Induction = nullptr; 254 255 // The vector icmp 256 if (!match(I, m_ICmp(Pred, m_Instruction(Induction), 257 m_Instruction(Shuffle))) || 258 Pred != ICmpInst::ICMP_ULE) 259 return false; 260 261 // First find the stuff outside the loop which is setting up the limit 262 // vector.... 263 // The invariant shuffle that broadcast the limit into a vector. 264 Instruction *Insert = nullptr; 265 if (!match(Shuffle, m_ShuffleVector(m_Instruction(Insert), m_Undef(), 266 m_Zero()))) 267 return false; 268 269 // Insert the limit into a vector. 270 Instruction *BECount = nullptr; 271 if (!match(Insert, m_InsertElement(m_Undef(), m_Instruction(BECount), 272 m_Zero()))) 273 return false; 274 275 // The limit calculation, backedge count. 276 Value *TripCount = nullptr; 277 if (!match(BECount, m_Add(m_Value(TripCount), m_AllOnes()))) 278 return false; 279 280 if (TripCount != NumElements || !L->isLoopInvariant(BECount)) 281 return false; 282 283 // Now back to searching inside the loop body... 284 // Find the add with takes the index iv and adds a constant vector to it. 285 Instruction *BroadcastSplat = nullptr; 286 Constant *Const = nullptr; 287 if (!match(Induction, m_Add(m_Instruction(BroadcastSplat), 288 m_Constant(Const)))) 289 return false; 290 291 // Check that we're adding <0, 1, 2, 3... 292 if (auto *CDS = dyn_cast<ConstantDataSequential>(Const)) { 293 for (unsigned i = 0; i < CDS->getNumElements(); ++i) { 294 if (CDS->getElementAsInteger(i) != i) 295 return false; 296 } 297 } else 298 return false; 299 300 // The shuffle which broadcasts the index iv into a vector. 301 if (!match(BroadcastSplat, m_ShuffleVector(m_Instruction(Insert), m_Undef(), 302 m_Zero()))) 303 return false; 304 305 // The insert element which initialises a vector with the index iv. 306 Instruction *IV = nullptr; 307 if (!match(Insert, m_InsertElement(m_Undef(), m_Instruction(IV), m_Zero()))) 308 return false; 309 310 // The index iv. 311 auto *Phi = dyn_cast<PHINode>(IV); 312 if (!Phi) 313 return false; 314 315 // TODO: Don't think we need to check the entry value. 316 Value *OnEntry = Phi->getIncomingValueForBlock(L->getLoopPreheader()); 317 if (!match(OnEntry, m_Zero())) 318 return false; 319 320 Value *InLoop = Phi->getIncomingValueForBlock(L->getLoopLatch()); 321 unsigned Lanes = cast<VectorType>(Insert->getType())->getNumElements(); 322 323 Instruction *LHS = nullptr; 324 if (!match(InLoop, m_Add(m_Instruction(LHS), m_SpecificInt(Lanes)))) 325 return false; 326 327 return LHS == Phi; 328 } 329 330 static VectorType* getVectorType(IntrinsicInst *I) { 331 unsigned TypeOp = I->getIntrinsicID() == Intrinsic::masked_load ? 0 : 1; 332 auto *PtrTy = cast<PointerType>(I->getOperand(TypeOp)->getType()); 333 return cast<VectorType>(PtrTy->getElementType()); 334 } 335 336 bool MVETailPredication::IsPredicatedVectorLoop() { 337 // Check that the loop contains at least one masked load/store intrinsic. 338 // We only support 'normal' vector instructions - other than masked 339 // load/stores. 340 for (auto *BB : L->getBlocks()) { 341 for (auto &I : *BB) { 342 if (IsMasked(&I)) { 343 VectorType *VecTy = getVectorType(cast<IntrinsicInst>(&I)); 344 unsigned Lanes = VecTy->getNumElements(); 345 unsigned ElementWidth = VecTy->getScalarSizeInBits(); 346 // MVE vectors are 128-bit, but don't support 128 x i1. 347 // TODO: Can we support vectors larger than 128-bits? 348 unsigned MaxWidth = TTI->getRegisterBitWidth(true); 349 if (Lanes * ElementWidth > MaxWidth || Lanes == MaxWidth) 350 return false; 351 MaskedInsts.push_back(cast<IntrinsicInst>(&I)); 352 } else if (auto *Int = dyn_cast<IntrinsicInst>(&I)) { 353 for (auto &U : Int->args()) { 354 if (isa<VectorType>(U->getType())) 355 return false; 356 } 357 } 358 } 359 } 360 361 return !MaskedInsts.empty(); 362 } 363 364 Value* MVETailPredication::ComputeElements(Value *TripCount, 365 VectorType *VecTy) { 366 const SCEV *TripCountSE = SE->getSCEV(TripCount); 367 ConstantInt *VF = ConstantInt::get(cast<IntegerType>(TripCount->getType()), 368 VecTy->getNumElements()); 369 370 if (VF->equalsInt(1)) 371 return nullptr; 372 373 // TODO: Support constant trip counts. 374 auto VisitAdd = [&](const SCEVAddExpr *S) -> const SCEVMulExpr* { 375 if (auto *Const = dyn_cast<SCEVConstant>(S->getOperand(0))) { 376 if (Const->getAPInt() != -VF->getValue()) 377 return nullptr; 378 } else 379 return nullptr; 380 return dyn_cast<SCEVMulExpr>(S->getOperand(1)); 381 }; 382 383 auto VisitMul = [&](const SCEVMulExpr *S) -> const SCEVUDivExpr* { 384 if (auto *Const = dyn_cast<SCEVConstant>(S->getOperand(0))) { 385 if (Const->getValue() != VF) 386 return nullptr; 387 } else 388 return nullptr; 389 return dyn_cast<SCEVUDivExpr>(S->getOperand(1)); 390 }; 391 392 auto VisitDiv = [&](const SCEVUDivExpr *S) -> const SCEV* { 393 if (auto *Const = dyn_cast<SCEVConstant>(S->getRHS())) { 394 if (Const->getValue() != VF) 395 return nullptr; 396 } else 397 return nullptr; 398 399 if (auto *RoundUp = dyn_cast<SCEVAddExpr>(S->getLHS())) { 400 if (auto *Const = dyn_cast<SCEVConstant>(RoundUp->getOperand(0))) { 401 if (Const->getAPInt() != (VF->getValue() - 1)) 402 return nullptr; 403 } else 404 return nullptr; 405 406 return RoundUp->getOperand(1); 407 } 408 return nullptr; 409 }; 410 411 // TODO: Can we use SCEV helpers, such as findArrayDimensions, and friends to 412 // determine the numbers of elements instead? Looks like this is what is used 413 // for delinearization, but I'm not sure if it can be applied to the 414 // vectorized form - at least not without a bit more work than I feel 415 // comfortable with. 416 417 // Search for Elems in the following SCEV: 418 // (1 + ((-VF + (VF * (((VF - 1) + %Elems) /u VF))<nuw>) /u VF))<nuw><nsw> 419 const SCEV *Elems = nullptr; 420 if (auto *TC = dyn_cast<SCEVAddExpr>(TripCountSE)) 421 if (auto *Div = dyn_cast<SCEVUDivExpr>(TC->getOperand(1))) 422 if (auto *Add = dyn_cast<SCEVAddExpr>(Div->getLHS())) 423 if (auto *Mul = VisitAdd(Add)) 424 if (auto *Div = VisitMul(Mul)) 425 if (auto *Res = VisitDiv(Div)) 426 Elems = Res; 427 428 if (!Elems) 429 return nullptr; 430 431 Instruction *InsertPt = L->getLoopPreheader()->getTerminator(); 432 if (!isSafeToExpandAt(Elems, InsertPt, *SE)) 433 return nullptr; 434 435 auto DL = L->getHeader()->getModule()->getDataLayout(); 436 SCEVExpander Expander(*SE, DL, "elements"); 437 return Expander.expandCodeFor(Elems, Elems->getType(), InsertPt); 438 } 439 440 // Look through the exit block to see whether there's a duplicate predicate 441 // instruction. This can happen when we need to perform a select on values 442 // from the last and previous iteration. Instead of doing a straight 443 // replacement of that predicate with the vctp, clone the vctp and place it 444 // in the block. This means that the VPR doesn't have to be live into the 445 // exit block which should make it easier to convert this loop into a proper 446 // tail predicated loop. 447 static bool Cleanup(DenseMap<Instruction*, Instruction*> &NewPredicates, 448 SetVector<Instruction*> &MaybeDead, Loop *L) { 449 BasicBlock *Exit = L->getUniqueExitBlock(); 450 if (!Exit) { 451 LLVM_DEBUG(dbgs() << "ARM TP: can't find loop exit block\n"); 452 return false; 453 } 454 455 bool ClonedVCTPInExitBlock = false; 456 457 for (auto &Pair : NewPredicates) { 458 Instruction *OldPred = Pair.first; 459 Instruction *NewPred = Pair.second; 460 461 for (auto &I : *Exit) { 462 if (I.isSameOperationAs(OldPred)) { 463 Instruction *PredClone = NewPred->clone(); 464 PredClone->insertBefore(&I); 465 I.replaceAllUsesWith(PredClone); 466 MaybeDead.insert(&I); 467 ClonedVCTPInExitBlock = true; 468 LLVM_DEBUG(dbgs() << "ARM TP: replacing: "; I.dump(); 469 dbgs() << "ARM TP: with: "; PredClone->dump()); 470 break; 471 } 472 } 473 } 474 475 // Drop references and add operands to check for dead. 476 SmallPtrSet<Instruction*, 4> Dead; 477 while (!MaybeDead.empty()) { 478 auto *I = MaybeDead.front(); 479 MaybeDead.remove(I); 480 if (I->hasNUsesOrMore(1)) 481 continue; 482 483 for (auto &U : I->operands()) { 484 if (auto *OpI = dyn_cast<Instruction>(U)) 485 MaybeDead.insert(OpI); 486 } 487 I->dropAllReferences(); 488 Dead.insert(I); 489 } 490 491 for (auto *I : Dead) { 492 LLVM_DEBUG(dbgs() << "ARM TP: removing dead insn: "; I->dump()); 493 I->eraseFromParent(); 494 } 495 496 for (auto I : L->blocks()) 497 DeleteDeadPHIs(I); 498 499 return ClonedVCTPInExitBlock; 500 } 501 502 void MVETailPredication::InsertVCTPIntrinsic(Instruction *Predicate, 503 DenseMap<Instruction*, Instruction*> &NewPredicates, 504 VectorType *VecTy, Value *NumElements) { 505 IRBuilder<> Builder(L->getHeader()->getFirstNonPHI()); 506 Module *M = L->getHeader()->getModule(); 507 Type *Ty = IntegerType::get(M->getContext(), 32); 508 509 // Insert a phi to count the number of elements processed by the loop. 510 PHINode *Processed = Builder.CreatePHI(Ty, 2); 511 Processed->addIncoming(NumElements, L->getLoopPreheader()); 512 513 // Insert the intrinsic to represent the effect of tail predication. 514 Builder.SetInsertPoint(cast<Instruction>(Predicate)); 515 ConstantInt *Factor = 516 ConstantInt::get(cast<IntegerType>(Ty), VecTy->getNumElements()); 517 518 Intrinsic::ID VCTPID; 519 switch (VecTy->getNumElements()) { 520 default: 521 llvm_unreachable("unexpected number of lanes"); 522 case 4: VCTPID = Intrinsic::arm_mve_vctp32; break; 523 case 8: VCTPID = Intrinsic::arm_mve_vctp16; break; 524 case 16: VCTPID = Intrinsic::arm_mve_vctp8; break; 525 526 // FIXME: vctp64 currently not supported because the predicate 527 // vector wants to be <2 x i1>, but v2i1 is not a legal MVE 528 // type, so problems happen at isel time. 529 // Intrinsic::arm_mve_vctp64 exists for ACLE intrinsics 530 // purposes, but takes a v4i1 instead of a v2i1. 531 } 532 Function *VCTP = Intrinsic::getDeclaration(M, VCTPID); 533 Value *TailPredicate = Builder.CreateCall(VCTP, Processed); 534 Predicate->replaceAllUsesWith(TailPredicate); 535 NewPredicates[Predicate] = cast<Instruction>(TailPredicate); 536 537 // Add the incoming value to the new phi. 538 // TODO: This add likely already exists in the loop. 539 Value *Remaining = Builder.CreateSub(Processed, Factor); 540 Processed->addIncoming(Remaining, L->getLoopLatch()); 541 LLVM_DEBUG(dbgs() << "ARM TP: Insert processed elements phi: " 542 << *Processed << "\n" 543 << "ARM TP: Inserted VCTP: " << *TailPredicate << "\n"); 544 } 545 546 bool MVETailPredication::TryConvert(Value *TripCount) { 547 if (!IsPredicatedVectorLoop()) { 548 LLVM_DEBUG(dbgs() << "ARM TP: no masked instructions in loop"); 549 return false; 550 } 551 552 LLVM_DEBUG(dbgs() << "ARM TP: Found predicated vector loop.\n"); 553 554 // Walk through the masked intrinsics and try to find whether the predicate 555 // operand is generated from an induction variable. 556 SetVector<Instruction*> Predicates; 557 DenseMap<Instruction*, Instruction*> NewPredicates; 558 559 for (auto *I : MaskedInsts) { 560 Intrinsic::ID ID = I->getIntrinsicID(); 561 unsigned PredOp = ID == Intrinsic::masked_load ? 2 : 3; 562 auto *Predicate = dyn_cast<Instruction>(I->getArgOperand(PredOp)); 563 if (!Predicate || Predicates.count(Predicate)) 564 continue; 565 566 VectorType *VecTy = getVectorType(I); 567 Value *NumElements = ComputeElements(TripCount, VecTy); 568 if (!NumElements) 569 continue; 570 571 if (!isTailPredicate(Predicate, NumElements)) { 572 LLVM_DEBUG(dbgs() << "ARM TP: Not tail predicate: " << *Predicate << "\n"); 573 continue; 574 } 575 576 LLVM_DEBUG(dbgs() << "ARM TP: Found tail predicate: " << *Predicate << "\n"); 577 Predicates.insert(Predicate); 578 579 InsertVCTPIntrinsic(Predicate, NewPredicates, VecTy, NumElements); 580 } 581 582 // Now clean up. 583 ClonedVCTPInExitBlock = Cleanup(NewPredicates, Predicates, L); 584 return true; 585 } 586 587 Pass *llvm::createMVETailPredicationPass() { 588 return new MVETailPredication(); 589 } 590 591 char MVETailPredication::ID = 0; 592 593 INITIALIZE_PASS_BEGIN(MVETailPredication, DEBUG_TYPE, DESC, false, false) 594 INITIALIZE_PASS_END(MVETailPredication, DEBUG_TYPE, DESC, false, false) 595