1 //=- AArch64PromoteConstant.cpp --- Promote constant to global for AArch64 -==// 2 // 3 // The LLVM Compiler Infrastructure 4 // 5 // This file is distributed under the University of Illinois Open Source 6 // License. See LICENSE.TXT for details. 7 // 8 //===----------------------------------------------------------------------===// 9 // 10 // This file implements the AArch64PromoteConstant pass which promotes constants 11 // to global variables when this is likely to be more efficient. Currently only 12 // types related to constant vector (i.e., constant vector, array of constant 13 // vectors, constant structure with a constant vector field, etc.) are promoted 14 // to global variables. Constant vectors are likely to be lowered in target 15 // constant pool during instruction selection already; therefore, the access 16 // will remain the same (memory load), but the structure types are not split 17 // into different constant pool accesses for each field. A bonus side effect is 18 // that created globals may be merged by the global merge pass. 19 // 20 // FIXME: This pass may be useful for other targets too. 21 //===----------------------------------------------------------------------===// 22 23 #include "AArch64.h" 24 #include "llvm/ADT/DenseMap.h" 25 #include "llvm/ADT/SmallPtrSet.h" 26 #include "llvm/ADT/SmallVector.h" 27 #include "llvm/ADT/Statistic.h" 28 #include "llvm/IR/Constants.h" 29 #include "llvm/IR/Dominators.h" 30 #include "llvm/IR/Function.h" 31 #include "llvm/IR/GlobalVariable.h" 32 #include "llvm/IR/IRBuilder.h" 33 #include "llvm/IR/InlineAsm.h" 34 #include "llvm/IR/InstIterator.h" 35 #include "llvm/IR/Instructions.h" 36 #include "llvm/IR/IntrinsicInst.h" 37 #include "llvm/IR/Module.h" 38 #include "llvm/Pass.h" 39 #include "llvm/Support/CommandLine.h" 40 #include "llvm/Support/Debug.h" 41 #include "llvm/Support/raw_ostream.h" 42 43 using namespace llvm; 44 45 #define DEBUG_TYPE "aarch64-promote-const" 46 47 // Stress testing mode - disable heuristics. 48 static cl::opt<bool> Stress("aarch64-stress-promote-const", cl::Hidden, 49 cl::desc("Promote all vector constants")); 50 51 STATISTIC(NumPromoted, "Number of promoted constants"); 52 STATISTIC(NumPromotedUses, "Number of promoted constants uses"); 53 54 //===----------------------------------------------------------------------===// 55 // AArch64PromoteConstant 56 //===----------------------------------------------------------------------===// 57 58 namespace { 59 /// Promotes interesting constant into global variables. 60 /// The motivating example is: 61 /// static const uint16_t TableA[32] = { 62 /// 41944, 40330, 38837, 37450, 36158, 34953, 33826, 32768, 63 /// 31776, 30841, 29960, 29128, 28340, 27595, 26887, 26215, 64 /// 25576, 24967, 24386, 23832, 23302, 22796, 22311, 21846, 65 /// 21400, 20972, 20561, 20165, 19785, 19419, 19066, 18725, 66 /// }; 67 /// 68 /// uint8x16x4_t LoadStatic(void) { 69 /// uint8x16x4_t ret; 70 /// ret.val[0] = vld1q_u16(TableA + 0); 71 /// ret.val[1] = vld1q_u16(TableA + 8); 72 /// ret.val[2] = vld1q_u16(TableA + 16); 73 /// ret.val[3] = vld1q_u16(TableA + 24); 74 /// return ret; 75 /// } 76 /// 77 /// The constants in this example are folded into the uses. Thus, 4 different 78 /// constants are created. 79 /// 80 /// As their type is vector the cheapest way to create them is to load them 81 /// for the memory. 82 /// 83 /// Therefore the final assembly final has 4 different loads. With this pass 84 /// enabled, only one load is issued for the constants. 85 class AArch64PromoteConstant : public ModulePass { 86 87 public: 88 struct PromotedConstant { 89 bool ShouldConvert = false; 90 GlobalVariable *GV = nullptr; 91 }; 92 typedef SmallDenseMap<Constant *, PromotedConstant, 16> PromotionCacheTy; 93 94 struct UpdateRecord { 95 Constant *C; 96 Instruction *User; 97 unsigned Op; 98 99 UpdateRecord(Constant *C, Instruction *User, unsigned Op) 100 : C(C), User(User), Op(Op) {} 101 }; 102 103 static char ID; 104 AArch64PromoteConstant() : ModulePass(ID) { 105 initializeAArch64PromoteConstantPass(*PassRegistry::getPassRegistry()); 106 } 107 108 StringRef getPassName() const override { return "AArch64 Promote Constant"; } 109 110 /// Iterate over the functions and promote the interesting constants into 111 /// global variables with module scope. 112 bool runOnModule(Module &M) override { 113 DEBUG(dbgs() << getPassName() << '\n'); 114 if (skipModule(M)) 115 return false; 116 bool Changed = false; 117 PromotionCacheTy PromotionCache; 118 for (auto &MF : M) { 119 Changed |= runOnFunction(MF, PromotionCache); 120 } 121 return Changed; 122 } 123 124 private: 125 /// Look for interesting constants used within the given function. 126 /// Promote them into global variables, load these global variables within 127 /// the related function, so that the number of inserted load is minimal. 128 bool runOnFunction(Function &F, PromotionCacheTy &PromotionCache); 129 130 // This transformation requires dominator info 131 void getAnalysisUsage(AnalysisUsage &AU) const override { 132 AU.setPreservesCFG(); 133 AU.addRequired<DominatorTreeWrapperPass>(); 134 AU.addPreserved<DominatorTreeWrapperPass>(); 135 } 136 137 /// Type to store a list of Uses. 138 typedef SmallVector<std::pair<Instruction *, unsigned>, 4> Uses; 139 /// Map an insertion point to all the uses it dominates. 140 typedef DenseMap<Instruction *, Uses> InsertionPoints; 141 142 /// Find the closest point that dominates the given Use. 143 Instruction *findInsertionPoint(Instruction &User, unsigned OpNo); 144 145 /// Check if the given insertion point is dominated by an existing 146 /// insertion point. 147 /// If true, the given use is added to the list of dominated uses for 148 /// the related existing point. 149 /// \param NewPt the insertion point to be checked 150 /// \param User the user of the constant 151 /// \param OpNo the operand number of the use 152 /// \param InsertPts existing insertion points 153 /// \pre NewPt and all instruction in InsertPts belong to the same function 154 /// \return true if one of the insertion point in InsertPts dominates NewPt, 155 /// false otherwise 156 bool isDominated(Instruction *NewPt, Instruction *User, unsigned OpNo, 157 InsertionPoints &InsertPts); 158 159 /// Check if the given insertion point can be merged with an existing 160 /// insertion point in a common dominator. 161 /// If true, the given use is added to the list of the created insertion 162 /// point. 163 /// \param NewPt the insertion point to be checked 164 /// \param User the user of the constant 165 /// \param OpNo the operand number of the use 166 /// \param InsertPts existing insertion points 167 /// \pre NewPt and all instruction in InsertPts belong to the same function 168 /// \pre isDominated returns false for the exact same parameters. 169 /// \return true if it exists an insertion point in InsertPts that could 170 /// have been merged with NewPt in a common dominator, 171 /// false otherwise 172 bool tryAndMerge(Instruction *NewPt, Instruction *User, unsigned OpNo, 173 InsertionPoints &InsertPts); 174 175 /// Compute the minimal insertion points to dominates all the interesting 176 /// uses of value. 177 /// Insertion points are group per function and each insertion point 178 /// contains a list of all the uses it dominates within the related function 179 /// \param User the user of the constant 180 /// \param OpNo the operand number of the constant 181 /// \param[out] InsertPts output storage of the analysis 182 void computeInsertionPoint(Instruction *User, unsigned OpNo, 183 InsertionPoints &InsertPts); 184 185 /// Insert a definition of a new global variable at each point contained in 186 /// InsPtsPerFunc and update the related uses (also contained in 187 /// InsPtsPerFunc). 188 void insertDefinitions(Function &F, GlobalVariable &GV, 189 InsertionPoints &InsertPts); 190 191 /// Do the constant promotion indicated by the Updates records, keeping track 192 /// of globals in PromotionCache. 193 void promoteConstants(Function &F, SmallVectorImpl<UpdateRecord> &Updates, 194 PromotionCacheTy &PromotionCache); 195 196 /// Transfer the list of dominated uses of IPI to NewPt in InsertPts. 197 /// Append Use to this list and delete the entry of IPI in InsertPts. 198 static void appendAndTransferDominatedUses(Instruction *NewPt, 199 Instruction *User, unsigned OpNo, 200 InsertionPoints::iterator &IPI, 201 InsertionPoints &InsertPts) { 202 // Record the dominated use. 203 IPI->second.emplace_back(User, OpNo); 204 // Transfer the dominated uses of IPI to NewPt 205 // Inserting into the DenseMap may invalidate existing iterator. 206 // Keep a copy of the key to find the iterator to erase. Keep a copy of the 207 // value so that we don't have to dereference IPI->second. 208 Instruction *OldInstr = IPI->first; 209 Uses OldUses = std::move(IPI->second); 210 InsertPts[NewPt] = std::move(OldUses); 211 // Erase IPI. 212 InsertPts.erase(OldInstr); 213 } 214 }; 215 } // end anonymous namespace 216 217 char AArch64PromoteConstant::ID = 0; 218 219 INITIALIZE_PASS_BEGIN(AArch64PromoteConstant, "aarch64-promote-const", 220 "AArch64 Promote Constant Pass", false, false) 221 INITIALIZE_PASS_DEPENDENCY(DominatorTreeWrapperPass) 222 INITIALIZE_PASS_END(AArch64PromoteConstant, "aarch64-promote-const", 223 "AArch64 Promote Constant Pass", false, false) 224 225 ModulePass *llvm::createAArch64PromoteConstantPass() { 226 return new AArch64PromoteConstant(); 227 } 228 229 /// Check if the given type uses a vector type. 230 static bool isConstantUsingVectorTy(const Type *CstTy) { 231 if (CstTy->isVectorTy()) 232 return true; 233 if (CstTy->isStructTy()) { 234 for (unsigned EltIdx = 0, EndEltIdx = CstTy->getStructNumElements(); 235 EltIdx < EndEltIdx; ++EltIdx) 236 if (isConstantUsingVectorTy(CstTy->getStructElementType(EltIdx))) 237 return true; 238 } else if (CstTy->isArrayTy()) 239 return isConstantUsingVectorTy(CstTy->getArrayElementType()); 240 return false; 241 } 242 243 /// Check if the given use (Instruction + OpIdx) of Cst should be converted into 244 /// a load of a global variable initialized with Cst. 245 /// A use should be converted if it is legal to do so. 246 /// For instance, it is not legal to turn the mask operand of a shuffle vector 247 /// into a load of a global variable. 248 static bool shouldConvertUse(const Constant *Cst, const Instruction *Instr, 249 unsigned OpIdx) { 250 // shufflevector instruction expects a const for the mask argument, i.e., the 251 // third argument. Do not promote this use in that case. 252 if (isa<const ShuffleVectorInst>(Instr) && OpIdx == 2) 253 return false; 254 255 // extractvalue instruction expects a const idx. 256 if (isa<const ExtractValueInst>(Instr) && OpIdx > 0) 257 return false; 258 259 // extractvalue instruction expects a const idx. 260 if (isa<const InsertValueInst>(Instr) && OpIdx > 1) 261 return false; 262 263 if (isa<const AllocaInst>(Instr) && OpIdx > 0) 264 return false; 265 266 // Alignment argument must be constant. 267 if (isa<const LoadInst>(Instr) && OpIdx > 0) 268 return false; 269 270 // Alignment argument must be constant. 271 if (isa<const StoreInst>(Instr) && OpIdx > 1) 272 return false; 273 274 // Index must be constant. 275 if (isa<const GetElementPtrInst>(Instr) && OpIdx > 0) 276 return false; 277 278 // Personality function and filters must be constant. 279 // Give up on that instruction. 280 if (isa<const LandingPadInst>(Instr)) 281 return false; 282 283 // Switch instruction expects constants to compare to. 284 if (isa<const SwitchInst>(Instr)) 285 return false; 286 287 // Expected address must be a constant. 288 if (isa<const IndirectBrInst>(Instr)) 289 return false; 290 291 // Do not mess with intrinsics. 292 if (isa<const IntrinsicInst>(Instr)) 293 return false; 294 295 // Do not mess with inline asm. 296 const CallInst *CI = dyn_cast<const CallInst>(Instr); 297 return !(CI && isa<const InlineAsm>(CI->getCalledValue())); 298 } 299 300 /// Check if the given Cst should be converted into 301 /// a load of a global variable initialized with Cst. 302 /// A constant should be converted if it is likely that the materialization of 303 /// the constant will be tricky. Thus, we give up on zero or undef values. 304 /// 305 /// \todo Currently, accept only vector related types. 306 /// Also we give up on all simple vector type to keep the existing 307 /// behavior. Otherwise, we should push here all the check of the lowering of 308 /// BUILD_VECTOR. By giving up, we lose the potential benefit of merging 309 /// constant via global merge and the fact that the same constant is stored 310 /// only once with this method (versus, as many function that uses the constant 311 /// for the regular approach, even for float). 312 /// Again, the simplest solution would be to promote every 313 /// constant and rematerialize them when they are actually cheap to create. 314 static bool shouldConvertImpl(const Constant *Cst) { 315 if (isa<const UndefValue>(Cst)) 316 return false; 317 318 // FIXME: In some cases, it may be interesting to promote in memory 319 // a zero initialized constant. 320 // E.g., when the type of Cst require more instructions than the 321 // adrp/add/load sequence or when this sequence can be shared by several 322 // instances of Cst. 323 // Ideally, we could promote this into a global and rematerialize the constant 324 // when it was a bad idea. 325 if (Cst->isZeroValue()) 326 return false; 327 328 if (Stress) 329 return true; 330 331 // FIXME: see function \todo 332 if (Cst->getType()->isVectorTy()) 333 return false; 334 return isConstantUsingVectorTy(Cst->getType()); 335 } 336 337 static bool 338 shouldConvert(Constant &C, 339 AArch64PromoteConstant::PromotionCacheTy &PromotionCache) { 340 auto Converted = PromotionCache.insert( 341 std::make_pair(&C, AArch64PromoteConstant::PromotedConstant())); 342 if (Converted.second) 343 Converted.first->second.ShouldConvert = shouldConvertImpl(&C); 344 return Converted.first->second.ShouldConvert; 345 } 346 347 Instruction *AArch64PromoteConstant::findInsertionPoint(Instruction &User, 348 unsigned OpNo) { 349 // If this user is a phi, the insertion point is in the related 350 // incoming basic block. 351 if (PHINode *PhiInst = dyn_cast<PHINode>(&User)) 352 return PhiInst->getIncomingBlock(OpNo)->getTerminator(); 353 354 return &User; 355 } 356 357 bool AArch64PromoteConstant::isDominated(Instruction *NewPt, Instruction *User, 358 unsigned OpNo, 359 InsertionPoints &InsertPts) { 360 361 DominatorTree &DT = getAnalysis<DominatorTreeWrapperPass>( 362 *NewPt->getParent()->getParent()).getDomTree(); 363 364 // Traverse all the existing insertion points and check if one is dominating 365 // NewPt. If it is, remember that. 366 for (auto &IPI : InsertPts) { 367 if (NewPt == IPI.first || DT.dominates(IPI.first, NewPt) || 368 // When IPI.first is a terminator instruction, DT may think that 369 // the result is defined on the edge. 370 // Here we are testing the insertion point, not the definition. 371 (IPI.first->getParent() != NewPt->getParent() && 372 DT.dominates(IPI.first->getParent(), NewPt->getParent()))) { 373 // No need to insert this point. Just record the dominated use. 374 DEBUG(dbgs() << "Insertion point dominated by:\n"); 375 DEBUG(IPI.first->print(dbgs())); 376 DEBUG(dbgs() << '\n'); 377 IPI.second.emplace_back(User, OpNo); 378 return true; 379 } 380 } 381 return false; 382 } 383 384 bool AArch64PromoteConstant::tryAndMerge(Instruction *NewPt, Instruction *User, 385 unsigned OpNo, 386 InsertionPoints &InsertPts) { 387 DominatorTree &DT = getAnalysis<DominatorTreeWrapperPass>( 388 *NewPt->getParent()->getParent()).getDomTree(); 389 BasicBlock *NewBB = NewPt->getParent(); 390 391 // Traverse all the existing insertion point and check if one is dominated by 392 // NewPt and thus useless or can be combined with NewPt into a common 393 // dominator. 394 for (InsertionPoints::iterator IPI = InsertPts.begin(), 395 EndIPI = InsertPts.end(); 396 IPI != EndIPI; ++IPI) { 397 BasicBlock *CurBB = IPI->first->getParent(); 398 if (NewBB == CurBB) { 399 // Instructions are in the same block. 400 // By construction, NewPt is dominating the other. 401 // Indeed, isDominated returned false with the exact same arguments. 402 DEBUG(dbgs() << "Merge insertion point with:\n"); 403 DEBUG(IPI->first->print(dbgs())); 404 DEBUG(dbgs() << "\nat considered insertion point.\n"); 405 appendAndTransferDominatedUses(NewPt, User, OpNo, IPI, InsertPts); 406 return true; 407 } 408 409 // Look for a common dominator 410 BasicBlock *CommonDominator = DT.findNearestCommonDominator(NewBB, CurBB); 411 // If none exists, we cannot merge these two points. 412 if (!CommonDominator) 413 continue; 414 415 if (CommonDominator != NewBB) { 416 // By construction, the CommonDominator cannot be CurBB. 417 assert(CommonDominator != CurBB && 418 "Instruction has not been rejected during isDominated check!"); 419 // Take the last instruction of the CommonDominator as insertion point 420 NewPt = CommonDominator->getTerminator(); 421 } 422 // else, CommonDominator is the block of NewBB, hence NewBB is the last 423 // possible insertion point in that block. 424 DEBUG(dbgs() << "Merge insertion point with:\n"); 425 DEBUG(IPI->first->print(dbgs())); 426 DEBUG(dbgs() << '\n'); 427 DEBUG(NewPt->print(dbgs())); 428 DEBUG(dbgs() << '\n'); 429 appendAndTransferDominatedUses(NewPt, User, OpNo, IPI, InsertPts); 430 return true; 431 } 432 return false; 433 } 434 435 void AArch64PromoteConstant::computeInsertionPoint( 436 Instruction *User, unsigned OpNo, InsertionPoints &InsertPts) { 437 DEBUG(dbgs() << "Considered use, opidx " << OpNo << ":\n"); 438 DEBUG(User->print(dbgs())); 439 DEBUG(dbgs() << '\n'); 440 441 Instruction *InsertionPoint = findInsertionPoint(*User, OpNo); 442 443 DEBUG(dbgs() << "Considered insertion point:\n"); 444 DEBUG(InsertionPoint->print(dbgs())); 445 DEBUG(dbgs() << '\n'); 446 447 if (isDominated(InsertionPoint, User, OpNo, InsertPts)) 448 return; 449 // This insertion point is useful, check if we can merge some insertion 450 // point in a common dominator or if NewPt dominates an existing one. 451 if (tryAndMerge(InsertionPoint, User, OpNo, InsertPts)) 452 return; 453 454 DEBUG(dbgs() << "Keep considered insertion point\n"); 455 456 // It is definitely useful by its own 457 InsertPts[InsertionPoint].emplace_back(User, OpNo); 458 } 459 460 static void ensurePromotedGV(Function &F, Constant &C, 461 AArch64PromoteConstant::PromotedConstant &PC) { 462 assert(PC.ShouldConvert && 463 "Expected that we should convert this to a global"); 464 if (PC.GV) 465 return; 466 PC.GV = new GlobalVariable( 467 *F.getParent(), C.getType(), true, GlobalValue::InternalLinkage, nullptr, 468 "_PromotedConst", nullptr, GlobalVariable::NotThreadLocal); 469 PC.GV->setInitializer(&C); 470 DEBUG(dbgs() << "Global replacement: "); 471 DEBUG(PC.GV->print(dbgs())); 472 DEBUG(dbgs() << '\n'); 473 ++NumPromoted; 474 } 475 476 void AArch64PromoteConstant::insertDefinitions(Function &F, 477 GlobalVariable &PromotedGV, 478 InsertionPoints &InsertPts) { 479 #ifndef NDEBUG 480 // Do more checking for debug purposes. 481 DominatorTree &DT = getAnalysis<DominatorTreeWrapperPass>(F).getDomTree(); 482 #endif 483 assert(!InsertPts.empty() && "Empty uses does not need a definition"); 484 485 for (const auto &IPI : InsertPts) { 486 // Create the load of the global variable. 487 IRBuilder<> Builder(IPI.first); 488 LoadInst *LoadedCst = Builder.CreateLoad(&PromotedGV); 489 DEBUG(dbgs() << "**********\n"); 490 DEBUG(dbgs() << "New def: "); 491 DEBUG(LoadedCst->print(dbgs())); 492 DEBUG(dbgs() << '\n'); 493 494 // Update the dominated uses. 495 for (auto Use : IPI.second) { 496 #ifndef NDEBUG 497 assert(DT.dominates(LoadedCst, 498 findInsertionPoint(*Use.first, Use.second)) && 499 "Inserted definition does not dominate all its uses!"); 500 #endif 501 DEBUG({ 502 dbgs() << "Use to update " << Use.second << ":"; 503 Use.first->print(dbgs()); 504 dbgs() << '\n'; 505 }); 506 Use.first->setOperand(Use.second, LoadedCst); 507 ++NumPromotedUses; 508 } 509 } 510 } 511 512 void AArch64PromoteConstant::promoteConstants( 513 Function &F, SmallVectorImpl<UpdateRecord> &Updates, 514 PromotionCacheTy &PromotionCache) { 515 // Promote the constants. 516 for (auto U = Updates.begin(), E = Updates.end(); U != E;) { 517 DEBUG(dbgs() << "** Compute insertion points **\n"); 518 auto First = U; 519 Constant *C = First->C; 520 InsertionPoints InsertPts; 521 do { 522 computeInsertionPoint(U->User, U->Op, InsertPts); 523 } while (++U != E && U->C == C); 524 525 auto &Promotion = PromotionCache[C]; 526 ensurePromotedGV(F, *C, Promotion); 527 insertDefinitions(F, *Promotion.GV, InsertPts); 528 } 529 } 530 531 bool AArch64PromoteConstant::runOnFunction(Function &F, 532 PromotionCacheTy &PromotionCache) { 533 // Look for instructions using constant vector. Promote that constant to a 534 // global variable. Create as few loads of this variable as possible and 535 // update the uses accordingly. 536 SmallVector<UpdateRecord, 64> Updates; 537 for (Instruction &I : instructions(&F)) { 538 // Traverse the operand, looking for constant vectors. Replace them by a 539 // load of a global variable of constant vector type. 540 for (Use &U : I.operands()) { 541 Constant *Cst = dyn_cast<Constant>(U); 542 // There is no point in promoting global values as they are already 543 // global. Do not promote constant expressions either, as they may 544 // require some code expansion. 545 if (!Cst || isa<GlobalValue>(Cst) || isa<ConstantExpr>(Cst)) 546 continue; 547 548 // Check if this constant is worth promoting. 549 if (!shouldConvert(*Cst, PromotionCache)) 550 continue; 551 552 // Check if this use should be promoted. 553 unsigned OpNo = &U - I.op_begin(); 554 if (!shouldConvertUse(Cst, &I, OpNo)) 555 continue; 556 557 Updates.emplace_back(Cst, &I, OpNo); 558 } 559 } 560 561 if (Updates.empty()) 562 return false; 563 564 promoteConstants(F, Updates, PromotionCache); 565 return true; 566 } 567