1 //===----- TypePromotion.cpp ----------------------------------------------===// 2 // 3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4 // See https://llvm.org/LICENSE.txt for license information. 5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6 // 7 //===----------------------------------------------------------------------===// 8 // 9 /// \file 10 /// This is an opcode based type promotion pass for small types that would 11 /// otherwise be promoted during legalisation. This works around the limitations 12 /// of selection dag for cyclic regions. The search begins from icmp 13 /// instructions operands where a tree, consisting of non-wrapping or safe 14 /// wrapping instructions, is built, checked and promoted if possible. 15 /// 16 //===----------------------------------------------------------------------===// 17 18 #include "llvm/ADT/SetVector.h" 19 #include "llvm/ADT/StringRef.h" 20 #include "llvm/Analysis/TargetTransformInfo.h" 21 #include "llvm/CodeGen/Passes.h" 22 #include "llvm/CodeGen/TargetLowering.h" 23 #include "llvm/CodeGen/TargetPassConfig.h" 24 #include "llvm/CodeGen/TargetSubtargetInfo.h" 25 #include "llvm/IR/Attributes.h" 26 #include "llvm/IR/BasicBlock.h" 27 #include "llvm/IR/IRBuilder.h" 28 #include "llvm/IR/Constants.h" 29 #include "llvm/IR/DataLayout.h" 30 #include "llvm/IR/InstrTypes.h" 31 #include "llvm/IR/Instruction.h" 32 #include "llvm/IR/Instructions.h" 33 #include "llvm/IR/IntrinsicInst.h" 34 #include "llvm/IR/Intrinsics.h" 35 #include "llvm/IR/Type.h" 36 #include "llvm/IR/Value.h" 37 #include "llvm/IR/Verifier.h" 38 #include "llvm/InitializePasses.h" 39 #include "llvm/Pass.h" 40 #include "llvm/Support/Casting.h" 41 #include "llvm/Support/CommandLine.h" 42 43 #define DEBUG_TYPE "type-promotion" 44 #define PASS_NAME "Type Promotion" 45 46 using namespace llvm; 47 48 static cl::opt<bool> 49 DisablePromotion("disable-type-promotion", cl::Hidden, cl::init(true), 50 cl::desc("Disable type promotion pass")); 51 52 // The goal of this pass is to enable more efficient code generation for 53 // operations on narrow types (i.e. types with < 32-bits) and this is a 54 // motivating IR code example: 55 // 56 // define hidden i32 @cmp(i8 zeroext) { 57 // %2 = add i8 %0, -49 58 // %3 = icmp ult i8 %2, 3 59 // .. 60 // } 61 // 62 // The issue here is that i8 is type-legalized to i32 because i8 is not a 63 // legal type. Thus, arithmetic is done in integer-precision, but then the 64 // byte value is masked out as follows: 65 // 66 // t19: i32 = add t4, Constant:i32<-49> 67 // t24: i32 = and t19, Constant:i32<255> 68 // 69 // Consequently, we generate code like this: 70 // 71 // subs r0, #49 72 // uxtb r1, r0 73 // cmp r1, #3 74 // 75 // This shows that masking out the byte value results in generation of 76 // the UXTB instruction. This is not optimal as r0 already contains the byte 77 // value we need, and so instead we can just generate: 78 // 79 // sub.w r1, r0, #49 80 // cmp r1, #3 81 // 82 // We achieve this by type promoting the IR to i32 like so for this example: 83 // 84 // define i32 @cmp(i8 zeroext %c) { 85 // %0 = zext i8 %c to i32 86 // %c.off = add i32 %0, -49 87 // %1 = icmp ult i32 %c.off, 3 88 // .. 89 // } 90 // 91 // For this to be valid and legal, we need to prove that the i32 add is 92 // producing the same value as the i8 addition, and that e.g. no overflow 93 // happens. 94 // 95 // A brief sketch of the algorithm and some terminology. 96 // We pattern match interesting IR patterns: 97 // - which have "sources": instructions producing narrow values (i8, i16), and 98 // - they have "sinks": instructions consuming these narrow values. 99 // 100 // We collect all instruction connecting sources and sinks in a worklist, so 101 // that we can mutate these instruction and perform type promotion when it is 102 // legal to do so. 103 104 namespace { 105 class IRPromoter { 106 SmallPtrSet<Value*, 8> NewInsts; 107 SmallPtrSet<Instruction*, 4> InstsToRemove; 108 DenseMap<Value*, SmallVector<Type*, 4>> TruncTysMap; 109 SmallPtrSet<Value*, 8> Promoted; 110 LLVMContext &Ctx; 111 // The type we promote to: always i32 112 IntegerType *ExtTy = nullptr; 113 // The type of the value that the search began from, either i8 or i16. 114 // This defines the max range of the values that we allow in the promoted 115 // tree. 116 IntegerType *OrigTy = nullptr; 117 SetVector<Value*> *Visited; 118 SmallPtrSetImpl<Value*> *Sources; 119 SmallPtrSetImpl<Instruction*> *Sinks; 120 SmallPtrSetImpl<Instruction*> *SafeToPromote; 121 SmallPtrSetImpl<Instruction*> *SafeWrap; 122 123 void ReplaceAllUsersOfWith(Value *From, Value *To); 124 void PrepareWrappingAdds(void); 125 void ExtendSources(void); 126 void ConvertTruncs(void); 127 void PromoteTree(void); 128 void TruncateSinks(void); 129 void Cleanup(void); 130 131 public: 132 IRPromoter(Module *M) : Ctx(M->getContext()) { } 133 134 135 void Mutate(Type *OrigTy, unsigned PromotedWidth, 136 SetVector<Value*> &Visited, 137 SmallPtrSetImpl<Value*> &Sources, 138 SmallPtrSetImpl<Instruction*> &Sinks, 139 SmallPtrSetImpl<Instruction*> &SafeToPromote, 140 SmallPtrSetImpl<Instruction*> &SafeWrap); 141 }; 142 143 class TypePromotion : public FunctionPass { 144 IRPromoter *Promoter = nullptr; 145 SmallPtrSet<Value*, 16> AllVisited; 146 SmallPtrSet<Instruction*, 8> SafeToPromote; 147 SmallPtrSet<Instruction*, 4> SafeWrap; 148 149 bool isSafeWrap(Instruction *I); 150 bool isSupportedValue(Value *V); 151 bool isLegalToPromote(Value *V); 152 bool TryToPromote(Value *V, unsigned PromotedWidth); 153 154 public: 155 static char ID; 156 static unsigned TypeSize; 157 Type *OrigTy = nullptr; 158 159 TypePromotion() : FunctionPass(ID) {} 160 161 void getAnalysisUsage(AnalysisUsage &AU) const override { 162 AU.addRequired<TargetTransformInfoWrapperPass>(); 163 AU.addRequired<TargetPassConfig>(); 164 } 165 166 StringRef getPassName() const override { return PASS_NAME; } 167 168 bool doInitialization(Module &M) override; 169 bool runOnFunction(Function &F) override; 170 bool doFinalization(Module &M) override; 171 }; 172 173 } 174 175 static bool GenerateSignBits(Value *V) { 176 if (!isa<Instruction>(V)) 177 return false; 178 179 unsigned Opc = cast<Instruction>(V)->getOpcode(); 180 return Opc == Instruction::AShr || Opc == Instruction::SDiv || 181 Opc == Instruction::SRem || Opc == Instruction::SExt; 182 } 183 184 static bool EqualTypeSize(Value *V) { 185 return V->getType()->getScalarSizeInBits() == TypePromotion::TypeSize; 186 } 187 188 static bool LessOrEqualTypeSize(Value *V) { 189 return V->getType()->getScalarSizeInBits() <= TypePromotion::TypeSize; 190 } 191 192 static bool GreaterThanTypeSize(Value *V) { 193 return V->getType()->getScalarSizeInBits() > TypePromotion::TypeSize; 194 } 195 196 static bool LessThanTypeSize(Value *V) { 197 return V->getType()->getScalarSizeInBits() < TypePromotion::TypeSize; 198 } 199 200 /// Some instructions can use 8- and 16-bit operands, and we don't need to 201 /// promote anything larger. We disallow booleans to make life easier when 202 /// dealing with icmps but allow any other integer that is <= 16 bits. Void 203 /// types are accepted so we can handle switches. 204 static bool isSupportedType(Value *V) { 205 Type *Ty = V->getType(); 206 207 // Allow voids and pointers, these won't be promoted. 208 if (Ty->isVoidTy() || Ty->isPointerTy()) 209 return true; 210 211 if (auto *Ld = dyn_cast<LoadInst>(V)) 212 Ty = cast<PointerType>(Ld->getPointerOperandType())->getElementType(); 213 214 if (!isa<IntegerType>(Ty) || 215 cast<IntegerType>(V->getType())->getBitWidth() == 1) 216 return false; 217 218 return LessOrEqualTypeSize(V); 219 } 220 221 /// Return true if the given value is a source in the use-def chain, producing 222 /// a narrow 'TypeSize' value. These values will be zext to start the promotion 223 /// of the tree to i32. We guarantee that these won't populate the upper bits 224 /// of the register. ZExt on the loads will be free, and the same for call 225 /// return values because we only accept ones that guarantee a zeroext ret val. 226 /// Many arguments will have the zeroext attribute too, so those would be free 227 /// too. 228 static bool isSource(Value *V) { 229 if (!isa<IntegerType>(V->getType())) 230 return false; 231 232 // TODO Allow zext to be sources. 233 if (isa<Argument>(V)) 234 return true; 235 else if (isa<LoadInst>(V)) 236 return true; 237 else if (isa<BitCastInst>(V)) 238 return true; 239 else if (auto *Call = dyn_cast<CallInst>(V)) 240 return Call->hasRetAttr(Attribute::AttrKind::ZExt); 241 else if (auto *Trunc = dyn_cast<TruncInst>(V)) 242 return EqualTypeSize(Trunc); 243 return false; 244 } 245 246 /// Return true if V will require any promoted values to be truncated for the 247 /// the IR to remain valid. We can't mutate the value type of these 248 /// instructions. 249 static bool isSink(Value *V) { 250 // TODO The truncate also isn't actually necessary because we would already 251 // proved that the data value is kept within the range of the original data 252 // type. 253 254 // Sinks are: 255 // - points where the value in the register is being observed, such as an 256 // icmp, switch or store. 257 // - points where value types have to match, such as calls and returns. 258 // - zext are included to ease the transformation and are generally removed 259 // later on. 260 if (auto *Store = dyn_cast<StoreInst>(V)) 261 return LessOrEqualTypeSize(Store->getValueOperand()); 262 if (auto *Return = dyn_cast<ReturnInst>(V)) 263 return LessOrEqualTypeSize(Return->getReturnValue()); 264 if (auto *ZExt = dyn_cast<ZExtInst>(V)) 265 return GreaterThanTypeSize(ZExt); 266 if (auto *Switch = dyn_cast<SwitchInst>(V)) 267 return LessThanTypeSize(Switch->getCondition()); 268 if (auto *ICmp = dyn_cast<ICmpInst>(V)) 269 return ICmp->isSigned() || LessThanTypeSize(ICmp->getOperand(0)); 270 271 return isa<CallInst>(V); 272 } 273 274 /// Return whether this instruction can safely wrap. 275 bool TypePromotion::isSafeWrap(Instruction *I) { 276 // We can support a, potentially, wrapping instruction (I) if: 277 // - It is only used by an unsigned icmp. 278 // - The icmp uses a constant. 279 // - The wrapping value (I) is decreasing, i.e would underflow - wrapping 280 // around zero to become a larger number than before. 281 // - The wrapping instruction (I) also uses a constant. 282 // 283 // We can then use the two constants to calculate whether the result would 284 // wrap in respect to itself in the original bitwidth. If it doesn't wrap, 285 // just underflows the range, the icmp would give the same result whether the 286 // result has been truncated or not. We calculate this by: 287 // - Zero extending both constants, if needed, to 32-bits. 288 // - Take the absolute value of I's constant, adding this to the icmp const. 289 // - Check that this value is not out of range for small type. If it is, it 290 // means that it has underflowed enough to wrap around the icmp constant. 291 // 292 // For example: 293 // 294 // %sub = sub i8 %a, 2 295 // %cmp = icmp ule i8 %sub, 254 296 // 297 // If %a = 0, %sub = -2 == FE == 254 298 // But if this is evalulated as a i32 299 // %sub = -2 == FF FF FF FE == 4294967294 300 // So the unsigned compares (i8 and i32) would not yield the same result. 301 // 302 // Another way to look at it is: 303 // %a - 2 <= 254 304 // %a + 2 <= 254 + 2 305 // %a <= 256 306 // And we can't represent 256 in the i8 format, so we don't support it. 307 // 308 // Whereas: 309 // 310 // %sub i8 %a, 1 311 // %cmp = icmp ule i8 %sub, 254 312 // 313 // If %a = 0, %sub = -1 == FF == 255 314 // As i32: 315 // %sub = -1 == FF FF FF FF == 4294967295 316 // 317 // In this case, the unsigned compare results would be the same and this 318 // would also be true for ult, uge and ugt: 319 // - (255 < 254) == (0xFFFFFFFF < 254) == false 320 // - (255 <= 254) == (0xFFFFFFFF <= 254) == false 321 // - (255 > 254) == (0xFFFFFFFF > 254) == true 322 // - (255 >= 254) == (0xFFFFFFFF >= 254) == true 323 // 324 // To demonstrate why we can't handle increasing values: 325 // 326 // %add = add i8 %a, 2 327 // %cmp = icmp ult i8 %add, 127 328 // 329 // If %a = 254, %add = 256 == (i8 1) 330 // As i32: 331 // %add = 256 332 // 333 // (1 < 127) != (256 < 127) 334 335 unsigned Opc = I->getOpcode(); 336 if (Opc != Instruction::Add && Opc != Instruction::Sub) 337 return false; 338 339 if (!I->hasOneUse() || 340 !isa<ICmpInst>(*I->user_begin()) || 341 !isa<ConstantInt>(I->getOperand(1))) 342 return false; 343 344 ConstantInt *OverflowConst = cast<ConstantInt>(I->getOperand(1)); 345 bool NegImm = OverflowConst->isNegative(); 346 bool IsDecreasing = ((Opc == Instruction::Sub) && !NegImm) || 347 ((Opc == Instruction::Add) && NegImm); 348 if (!IsDecreasing) 349 return false; 350 351 // Don't support an icmp that deals with sign bits. 352 auto *CI = cast<ICmpInst>(*I->user_begin()); 353 if (CI->isSigned() || CI->isEquality()) 354 return false; 355 356 ConstantInt *ICmpConst = nullptr; 357 if (auto *Const = dyn_cast<ConstantInt>(CI->getOperand(0))) 358 ICmpConst = Const; 359 else if (auto *Const = dyn_cast<ConstantInt>(CI->getOperand(1))) 360 ICmpConst = Const; 361 else 362 return false; 363 364 // Now check that the result can't wrap on itself. 365 APInt Total = ICmpConst->getValue().getBitWidth() < 32 ? 366 ICmpConst->getValue().zext(32) : ICmpConst->getValue(); 367 368 Total += OverflowConst->getValue().getBitWidth() < 32 ? 369 OverflowConst->getValue().abs().zext(32) : OverflowConst->getValue().abs(); 370 371 APInt Max = APInt::getAllOnesValue(TypePromotion::TypeSize); 372 373 if (Total.getBitWidth() > Max.getBitWidth()) { 374 if (Total.ugt(Max.zext(Total.getBitWidth()))) 375 return false; 376 } else if (Max.getBitWidth() > Total.getBitWidth()) { 377 if (Total.zext(Max.getBitWidth()).ugt(Max)) 378 return false; 379 } else if (Total.ugt(Max)) 380 return false; 381 382 LLVM_DEBUG(dbgs() << "IR Promotion: Allowing safe overflow for " << *I << "\n"); 383 SafeWrap.insert(I); 384 return true; 385 } 386 387 static bool shouldPromote(Value *V) { 388 if (!isa<IntegerType>(V->getType()) || isSink(V)) 389 return false; 390 391 if (isSource(V)) 392 return true; 393 394 auto *I = dyn_cast<Instruction>(V); 395 if (!I) 396 return false; 397 398 if (isa<ICmpInst>(I)) 399 return false; 400 401 return true; 402 } 403 404 /// Return whether we can safely mutate V's type to ExtTy without having to be 405 /// concerned with zero extending or truncation. 406 static bool isPromotedResultSafe(Value *V) { 407 if (GenerateSignBits(V)) 408 return false; 409 410 if (!isa<Instruction>(V)) 411 return true; 412 413 if (!isa<OverflowingBinaryOperator>(V)) 414 return true; 415 416 return cast<Instruction>(V)->hasNoUnsignedWrap(); 417 } 418 419 void IRPromoter::ReplaceAllUsersOfWith(Value *From, Value *To) { 420 SmallVector<Instruction*, 4> Users; 421 Instruction *InstTo = dyn_cast<Instruction>(To); 422 bool ReplacedAll = true; 423 424 LLVM_DEBUG(dbgs() << "IR Promotion: Replacing " << *From << " with " << *To 425 << "\n"); 426 427 for (Use &U : From->uses()) { 428 auto *User = cast<Instruction>(U.getUser()); 429 if (InstTo && User->isIdenticalTo(InstTo)) { 430 ReplacedAll = false; 431 continue; 432 } 433 Users.push_back(User); 434 } 435 436 for (auto *U : Users) 437 U->replaceUsesOfWith(From, To); 438 439 if (ReplacedAll) 440 if (auto *I = dyn_cast<Instruction>(From)) 441 InstsToRemove.insert(I); 442 } 443 444 void IRPromoter::PrepareWrappingAdds() { 445 LLVM_DEBUG(dbgs() << "IR Promotion: Prepare wrapping adds.\n"); 446 IRBuilder<> Builder{Ctx}; 447 448 // For adds that safely wrap and use a negative immediate as operand 1, we 449 // create an equivalent instruction using a positive immediate. 450 // That positive immediate can then be zext along with all the other 451 // immediates later. 452 for (auto *I : *SafeWrap) { 453 if (I->getOpcode() != Instruction::Add) 454 continue; 455 456 LLVM_DEBUG(dbgs() << "IR Promotion: Adjusting " << *I << "\n"); 457 assert((isa<ConstantInt>(I->getOperand(1)) && 458 cast<ConstantInt>(I->getOperand(1))->isNegative()) && 459 "Wrapping should have a negative immediate as the second operand"); 460 461 auto Const = cast<ConstantInt>(I->getOperand(1)); 462 auto *NewConst = ConstantInt::get(Ctx, Const->getValue().abs()); 463 Builder.SetInsertPoint(I); 464 Value *NewVal = Builder.CreateSub(I->getOperand(0), NewConst); 465 if (auto *NewInst = dyn_cast<Instruction>(NewVal)) { 466 NewInst->copyIRFlags(I); 467 NewInsts.insert(NewInst); 468 } 469 InstsToRemove.insert(I); 470 I->replaceAllUsesWith(NewVal); 471 LLVM_DEBUG(dbgs() << "IR Promotion: New equivalent: " << *NewVal << "\n"); 472 } 473 for (auto *I : NewInsts) 474 Visited->insert(I); 475 } 476 477 void IRPromoter::ExtendSources() { 478 IRBuilder<> Builder{Ctx}; 479 480 auto InsertZExt = [&](Value *V, Instruction *InsertPt) { 481 assert(V->getType() != ExtTy && "zext already extends to i32"); 482 LLVM_DEBUG(dbgs() << "IR Promotion: Inserting ZExt for " << *V << "\n"); 483 Builder.SetInsertPoint(InsertPt); 484 if (auto *I = dyn_cast<Instruction>(V)) 485 Builder.SetCurrentDebugLocation(I->getDebugLoc()); 486 487 Value *ZExt = Builder.CreateZExt(V, ExtTy); 488 if (auto *I = dyn_cast<Instruction>(ZExt)) { 489 if (isa<Argument>(V)) 490 I->moveBefore(InsertPt); 491 else 492 I->moveAfter(InsertPt); 493 NewInsts.insert(I); 494 } 495 496 ReplaceAllUsersOfWith(V, ZExt); 497 }; 498 499 // Now, insert extending instructions between the sources and their users. 500 LLVM_DEBUG(dbgs() << "IR Promotion: Promoting sources:\n"); 501 for (auto V : *Sources) { 502 LLVM_DEBUG(dbgs() << " - " << *V << "\n"); 503 if (auto *I = dyn_cast<Instruction>(V)) 504 InsertZExt(I, I); 505 else if (auto *Arg = dyn_cast<Argument>(V)) { 506 BasicBlock &BB = Arg->getParent()->front(); 507 InsertZExt(Arg, &*BB.getFirstInsertionPt()); 508 } else { 509 llvm_unreachable("unhandled source that needs extending"); 510 } 511 Promoted.insert(V); 512 } 513 } 514 515 void IRPromoter::PromoteTree() { 516 LLVM_DEBUG(dbgs() << "IR Promotion: Mutating the tree..\n"); 517 518 IRBuilder<> Builder{Ctx}; 519 520 // Mutate the types of the instructions within the tree. Here we handle 521 // constant operands. 522 for (auto *V : *Visited) { 523 if (Sources->count(V)) 524 continue; 525 526 auto *I = cast<Instruction>(V); 527 if (Sinks->count(I)) 528 continue; 529 530 for (unsigned i = 0, e = I->getNumOperands(); i < e; ++i) { 531 Value *Op = I->getOperand(i); 532 if ((Op->getType() == ExtTy) || !isa<IntegerType>(Op->getType())) 533 continue; 534 535 if (auto *Const = dyn_cast<ConstantInt>(Op)) { 536 Constant *NewConst = ConstantExpr::getZExt(Const, ExtTy); 537 I->setOperand(i, NewConst); 538 } else if (isa<UndefValue>(Op)) 539 I->setOperand(i, UndefValue::get(ExtTy)); 540 } 541 542 if (shouldPromote(I)) { 543 I->mutateType(ExtTy); 544 Promoted.insert(I); 545 } 546 } 547 } 548 549 void IRPromoter::TruncateSinks() { 550 LLVM_DEBUG(dbgs() << "IR Promotion: Fixing up the sinks:\n"); 551 552 IRBuilder<> Builder{Ctx}; 553 554 auto InsertTrunc = [&](Value *V, Type *TruncTy) -> Instruction* { 555 if (!isa<Instruction>(V) || !isa<IntegerType>(V->getType())) 556 return nullptr; 557 558 if ((!Promoted.count(V) && !NewInsts.count(V)) || Sources->count(V)) 559 return nullptr; 560 561 LLVM_DEBUG(dbgs() << "IR Promotion: Creating " << *TruncTy << " Trunc for " 562 << *V << "\n"); 563 Builder.SetInsertPoint(cast<Instruction>(V)); 564 auto *Trunc = dyn_cast<Instruction>(Builder.CreateTrunc(V, TruncTy)); 565 if (Trunc) 566 NewInsts.insert(Trunc); 567 return Trunc; 568 }; 569 570 // Fix up any stores or returns that use the results of the promoted 571 // chain. 572 for (auto I : *Sinks) { 573 LLVM_DEBUG(dbgs() << "IR Promotion: For Sink: " << *I << "\n"); 574 575 // Handle calls separately as we need to iterate over arg operands. 576 if (auto *Call = dyn_cast<CallInst>(I)) { 577 for (unsigned i = 0; i < Call->getNumArgOperands(); ++i) { 578 Value *Arg = Call->getArgOperand(i); 579 Type *Ty = TruncTysMap[Call][i]; 580 if (Instruction *Trunc = InsertTrunc(Arg, Ty)) { 581 Trunc->moveBefore(Call); 582 Call->setArgOperand(i, Trunc); 583 } 584 } 585 continue; 586 } 587 588 // Special case switches because we need to truncate the condition. 589 if (auto *Switch = dyn_cast<SwitchInst>(I)) { 590 Type *Ty = TruncTysMap[Switch][0]; 591 if (Instruction *Trunc = InsertTrunc(Switch->getCondition(), Ty)) { 592 Trunc->moveBefore(Switch); 593 Switch->setCondition(Trunc); 594 } 595 continue; 596 } 597 598 // Now handle the others. 599 for (unsigned i = 0; i < I->getNumOperands(); ++i) { 600 Type *Ty = TruncTysMap[I][i]; 601 if (Instruction *Trunc = InsertTrunc(I->getOperand(i), Ty)) { 602 Trunc->moveBefore(I); 603 I->setOperand(i, Trunc); 604 } 605 } 606 } 607 } 608 609 void IRPromoter::Cleanup() { 610 LLVM_DEBUG(dbgs() << "IR Promotion: Cleanup..\n"); 611 // Some zexts will now have become redundant, along with their trunc 612 // operands, so remove them 613 for (auto V : *Visited) { 614 if (!isa<ZExtInst>(V)) 615 continue; 616 617 auto ZExt = cast<ZExtInst>(V); 618 if (ZExt->getDestTy() != ExtTy) 619 continue; 620 621 Value *Src = ZExt->getOperand(0); 622 if (ZExt->getSrcTy() == ZExt->getDestTy()) { 623 LLVM_DEBUG(dbgs() << "IR Promotion: Removing unnecessary cast: " << *ZExt 624 << "\n"); 625 ReplaceAllUsersOfWith(ZExt, Src); 626 continue; 627 } 628 629 // Unless they produce a value that is narrower than ExtTy, we can 630 // replace the result of the zext with the input of a newly inserted 631 // trunc. 632 if (NewInsts.count(Src) && isa<TruncInst>(Src) && 633 Src->getType() == OrigTy) { 634 auto *Trunc = cast<TruncInst>(Src); 635 assert(Trunc->getOperand(0)->getType() == ExtTy && 636 "expected inserted trunc to be operating on i32"); 637 ReplaceAllUsersOfWith(ZExt, Trunc->getOperand(0)); 638 } 639 } 640 641 for (auto *I : InstsToRemove) { 642 LLVM_DEBUG(dbgs() << "IR Promotion: Removing " << *I << "\n"); 643 I->dropAllReferences(); 644 I->eraseFromParent(); 645 } 646 647 InstsToRemove.clear(); 648 NewInsts.clear(); 649 TruncTysMap.clear(); 650 Promoted.clear(); 651 SafeToPromote->clear(); 652 SafeWrap->clear(); 653 } 654 655 void IRPromoter::ConvertTruncs() { 656 LLVM_DEBUG(dbgs() << "IR Promotion: Converting truncs..\n"); 657 IRBuilder<> Builder{Ctx}; 658 659 for (auto *V : *Visited) { 660 if (!isa<TruncInst>(V) || Sources->count(V)) 661 continue; 662 663 auto *Trunc = cast<TruncInst>(V); 664 Builder.SetInsertPoint(Trunc); 665 IntegerType *SrcTy = cast<IntegerType>(Trunc->getOperand(0)->getType()); 666 IntegerType *DestTy = cast<IntegerType>(TruncTysMap[Trunc][0]); 667 668 unsigned NumBits = DestTy->getScalarSizeInBits(); 669 ConstantInt *Mask = 670 ConstantInt::get(SrcTy, APInt::getMaxValue(NumBits).getZExtValue()); 671 Value *Masked = Builder.CreateAnd(Trunc->getOperand(0), Mask); 672 673 if (auto *I = dyn_cast<Instruction>(Masked)) 674 NewInsts.insert(I); 675 676 ReplaceAllUsersOfWith(Trunc, Masked); 677 } 678 } 679 680 void IRPromoter::Mutate(Type *OrigTy, unsigned PromotedWidth, 681 SetVector<Value*> &Visited, 682 SmallPtrSetImpl<Value*> &Sources, 683 SmallPtrSetImpl<Instruction*> &Sinks, 684 SmallPtrSetImpl<Instruction*> &SafeToPromote, 685 SmallPtrSetImpl<Instruction*> &SafeWrap) { 686 LLVM_DEBUG(dbgs() << "IR Promotion: Promoting use-def chains from " 687 << TypePromotion::TypeSize << " to " << PromotedWidth 688 << "-bits\n"); 689 690 assert(isa<IntegerType>(OrigTy) && "expected integer type"); 691 this->OrigTy = cast<IntegerType>(OrigTy); 692 ExtTy = IntegerType::get(Ctx, PromotedWidth); 693 assert(OrigTy->getPrimitiveSizeInBits() < ExtTy->getPrimitiveSizeInBits() && 694 "original type not smaller than extended type"); 695 696 this->Visited = &Visited; 697 this->Sources = &Sources; 698 this->Sinks = &Sinks; 699 this->SafeToPromote = &SafeToPromote; 700 this->SafeWrap = &SafeWrap; 701 702 // Cache original types of the values that will likely need truncating 703 for (auto *I : Sinks) { 704 if (auto *Call = dyn_cast<CallInst>(I)) { 705 for (unsigned i = 0; i < Call->getNumArgOperands(); ++i) { 706 Value *Arg = Call->getArgOperand(i); 707 TruncTysMap[Call].push_back(Arg->getType()); 708 } 709 } else if (auto *Switch = dyn_cast<SwitchInst>(I)) 710 TruncTysMap[I].push_back(Switch->getCondition()->getType()); 711 else { 712 for (unsigned i = 0; i < I->getNumOperands(); ++i) 713 TruncTysMap[I].push_back(I->getOperand(i)->getType()); 714 } 715 } 716 for (auto *V : Visited) { 717 if (!isa<TruncInst>(V) || Sources.count(V)) 718 continue; 719 auto *Trunc = cast<TruncInst>(V); 720 TruncTysMap[Trunc].push_back(Trunc->getDestTy()); 721 } 722 723 // Convert adds using negative immediates to equivalent instructions that use 724 // positive constants. 725 PrepareWrappingAdds(); 726 727 // Insert zext instructions between sources and their users. 728 ExtendSources(); 729 730 // Promote visited instructions, mutating their types in place. 731 PromoteTree(); 732 733 // Convert any truncs, that aren't sources, into AND masks. 734 ConvertTruncs(); 735 736 // Insert trunc instructions for use by calls, stores etc... 737 TruncateSinks(); 738 739 // Finally, remove unecessary zexts and truncs, delete old instructions and 740 // clear the data structures. 741 Cleanup(); 742 743 LLVM_DEBUG(dbgs() << "IR Promotion: Mutation complete\n"); 744 } 745 746 /// We accept most instructions, as well as Arguments and ConstantInsts. We 747 /// Disallow casts other than zext and truncs and only allow calls if their 748 /// return value is zeroext. We don't allow opcodes that can introduce sign 749 /// bits. 750 bool TypePromotion::isSupportedValue(Value *V) { 751 if (auto *I = dyn_cast<Instruction>(V)) { 752 switch (I->getOpcode()) { 753 default: 754 return isa<BinaryOperator>(I) && isSupportedType(I) && 755 !GenerateSignBits(I); 756 case Instruction::GetElementPtr: 757 case Instruction::Store: 758 case Instruction::Br: 759 case Instruction::Switch: 760 return true; 761 case Instruction::PHI: 762 case Instruction::Select: 763 case Instruction::Ret: 764 case Instruction::Load: 765 case Instruction::Trunc: 766 case Instruction::BitCast: 767 return isSupportedType(I); 768 case Instruction::ZExt: 769 return isSupportedType(I->getOperand(0)); 770 case Instruction::ICmp: 771 // Now that we allow small types than TypeSize, only allow icmp of 772 // TypeSize because they will require a trunc to be legalised. 773 // TODO: Allow icmp of smaller types, and calculate at the end 774 // whether the transform would be beneficial. 775 if (isa<PointerType>(I->getOperand(0)->getType())) 776 return true; 777 return EqualTypeSize(I->getOperand(0)); 778 case Instruction::Call: { 779 // Special cases for calls as we need to check for zeroext 780 // TODO We should accept calls even if they don't have zeroext, as they 781 // can still be sinks. 782 auto *Call = cast<CallInst>(I); 783 return isSupportedType(Call) && 784 Call->hasRetAttr(Attribute::AttrKind::ZExt); 785 } 786 } 787 } else if (isa<Constant>(V) && !isa<ConstantExpr>(V)) { 788 return isSupportedType(V); 789 } else if (isa<Argument>(V)) 790 return isSupportedType(V); 791 792 return isa<BasicBlock>(V); 793 } 794 795 /// Check that the type of V would be promoted and that the original type is 796 /// smaller than the targeted promoted type. Check that we're not trying to 797 /// promote something larger than our base 'TypeSize' type. 798 bool TypePromotion::isLegalToPromote(Value *V) { 799 800 auto *I = dyn_cast<Instruction>(V); 801 if (!I) 802 return true; 803 804 if (SafeToPromote.count(I)) 805 return true; 806 807 if (isPromotedResultSafe(V) || isSafeWrap(I)) { 808 SafeToPromote.insert(I); 809 return true; 810 } 811 return false; 812 } 813 814 bool TypePromotion::TryToPromote(Value *V, unsigned PromotedWidth) { 815 OrigTy = V->getType(); 816 TypeSize = OrigTy->getPrimitiveSizeInBits(); 817 SafeToPromote.clear(); 818 SafeWrap.clear(); 819 820 if (!isSupportedValue(V) || !shouldPromote(V) || !isLegalToPromote(V)) 821 return false; 822 823 LLVM_DEBUG(dbgs() << "IR Promotion: TryToPromote: " << *V << ", from " 824 << TypeSize << " bits to " << PromotedWidth << "\n"); 825 826 SetVector<Value*> WorkList; 827 SmallPtrSet<Value*, 8> Sources; 828 SmallPtrSet<Instruction*, 4> Sinks; 829 SetVector<Value*> CurrentVisited; 830 WorkList.insert(V); 831 832 // Return true if V was added to the worklist as a supported instruction, 833 // if it was already visited, or if we don't need to explore it (e.g. 834 // pointer values and GEPs), and false otherwise. 835 auto AddLegalInst = [&](Value *V) { 836 if (CurrentVisited.count(V)) 837 return true; 838 839 // Ignore GEPs because they don't need promoting and the constant indices 840 // will prevent the transformation. 841 if (isa<GetElementPtrInst>(V)) 842 return true; 843 844 if (!isSupportedValue(V) || (shouldPromote(V) && !isLegalToPromote(V))) { 845 LLVM_DEBUG(dbgs() << "IR Promotion: Can't handle: " << *V << "\n"); 846 return false; 847 } 848 849 WorkList.insert(V); 850 return true; 851 }; 852 853 // Iterate through, and add to, a tree of operands and users in the use-def. 854 while (!WorkList.empty()) { 855 Value *V = WorkList.back(); 856 WorkList.pop_back(); 857 if (CurrentVisited.count(V)) 858 continue; 859 860 // Ignore non-instructions, other than arguments. 861 if (!isa<Instruction>(V) && !isSource(V)) 862 continue; 863 864 // If we've already visited this value from somewhere, bail now because 865 // the tree has already been explored. 866 // TODO: This could limit the transform, ie if we try to promote something 867 // from an i8 and fail first, before trying an i16. 868 if (AllVisited.count(V)) 869 return false; 870 871 CurrentVisited.insert(V); 872 AllVisited.insert(V); 873 874 // Calls can be both sources and sinks. 875 if (isSink(V)) 876 Sinks.insert(cast<Instruction>(V)); 877 878 if (isSource(V)) 879 Sources.insert(V); 880 881 if (!isSink(V) && !isSource(V)) { 882 if (auto *I = dyn_cast<Instruction>(V)) { 883 // Visit operands of any instruction visited. 884 for (auto &U : I->operands()) { 885 if (!AddLegalInst(U)) 886 return false; 887 } 888 } 889 } 890 891 // Don't visit users of a node which isn't going to be mutated unless its a 892 // source. 893 if (isSource(V) || shouldPromote(V)) { 894 for (Use &U : V->uses()) { 895 if (!AddLegalInst(U.getUser())) 896 return false; 897 } 898 } 899 } 900 901 LLVM_DEBUG(dbgs() << "IR Promotion: Visited nodes:\n"; 902 for (auto *I : CurrentVisited) 903 I->dump(); 904 ); 905 unsigned ToPromote = 0; 906 for (auto *V : CurrentVisited) { 907 if (Sources.count(V)) 908 continue; 909 if (Sinks.count(cast<Instruction>(V))) 910 continue; 911 ++ToPromote; 912 } 913 914 if (ToPromote < 2) 915 return false; 916 917 Promoter->Mutate(OrigTy, PromotedWidth, CurrentVisited, Sources, Sinks, 918 SafeToPromote, SafeWrap); 919 return true; 920 } 921 922 bool TypePromotion::doInitialization(Module &M) { 923 Promoter = new IRPromoter(&M); 924 return false; 925 } 926 927 bool TypePromotion::runOnFunction(Function &F) { 928 if (skipFunction(F) || DisablePromotion) 929 return false; 930 931 LLVM_DEBUG(dbgs() << "IR Promotion: Running on " << F.getName() << "\n"); 932 933 auto *TPC = getAnalysisIfAvailable<TargetPassConfig>(); 934 if (!TPC) 935 return false; 936 937 bool MadeChange = false; 938 const DataLayout &DL = F.getParent()->getDataLayout(); 939 const TargetMachine &TM = TPC->getTM<TargetMachine>(); 940 const TargetSubtargetInfo *SubtargetInfo = TM.getSubtargetImpl(F); 941 const TargetLowering *TLI = SubtargetInfo->getTargetLowering(); 942 const TargetTransformInfo &TII = 943 getAnalysis<TargetTransformInfoWrapperPass>().getTTI(F); 944 945 // Search up from icmps to try to promote their operands. 946 for (BasicBlock &BB : F) { 947 auto &Insts = BB.getInstList(); 948 for (auto &I : Insts) { 949 if (AllVisited.count(&I)) 950 continue; 951 952 if (!isa<ICmpInst>(&I)) 953 continue; 954 955 auto *ICmp = cast<ICmpInst>(&I); 956 // Skip signed or pointer compares 957 if (ICmp->isSigned() || 958 !isa<IntegerType>(ICmp->getOperand(0)->getType())) 959 continue; 960 961 LLVM_DEBUG(dbgs() << "IR Promotion: Searching from: " << *ICmp << "\n"); 962 963 for (auto &Op : ICmp->operands()) { 964 if (auto *I = dyn_cast<Instruction>(Op)) { 965 EVT SrcVT = TLI->getValueType(DL, I->getType()); 966 if (SrcVT.isSimple() && TLI->isTypeLegal(SrcVT.getSimpleVT())) 967 break; 968 969 if (TLI->getTypeAction(ICmp->getContext(), SrcVT) != 970 TargetLowering::TypePromoteInteger) 971 break; 972 973 EVT PromotedVT = TLI->getTypeToTransformTo(ICmp->getContext(), SrcVT); 974 if (TII.getRegisterBitWidth(false) < PromotedVT.getSizeInBits()) { 975 LLVM_DEBUG(dbgs() << "IR Promotion: Couldn't find target register " 976 << "for promoted type\n"); 977 break; 978 } 979 980 MadeChange |= TryToPromote(I, PromotedVT.getSizeInBits()); 981 break; 982 } 983 } 984 } 985 LLVM_DEBUG(if (verifyFunction(F, &dbgs())) { 986 dbgs() << F; 987 report_fatal_error("Broken function after type promotion"); 988 }); 989 } 990 if (MadeChange) 991 LLVM_DEBUG(dbgs() << "After TypePromotion: " << F << "\n"); 992 993 return MadeChange; 994 } 995 996 bool TypePromotion::doFinalization(Module &M) { 997 delete Promoter; 998 return false; 999 } 1000 1001 INITIALIZE_PASS_BEGIN(TypePromotion, DEBUG_TYPE, PASS_NAME, false, false) 1002 INITIALIZE_PASS_END(TypePromotion, DEBUG_TYPE, PASS_NAME, false, false) 1003 1004 char TypePromotion::ID = 0; 1005 unsigned TypePromotion::TypeSize = 0; 1006 1007 FunctionPass *llvm::createTypePromotionPass() { 1008 return new TypePromotion(); 1009 } 1010