1 //===-- SystemZElimCompare.cpp - Eliminate comparison instructions --------===// 2 // 3 // The LLVM Compiler Infrastructure 4 // 5 // This file is distributed under the University of Illinois Open Source 6 // License. See LICENSE.TXT for details. 7 // 8 //===----------------------------------------------------------------------===// 9 // 10 // This pass: 11 // (1) tries to remove compares if CC already contains the required information 12 // (2) fuses compares and branches into COMPARE AND BRANCH instructions 13 // 14 //===----------------------------------------------------------------------===// 15 16 #include "SystemZTargetMachine.h" 17 #include "llvm/ADT/Statistic.h" 18 #include "llvm/CodeGen/MachineFunctionPass.h" 19 #include "llvm/CodeGen/MachineInstrBuilder.h" 20 #include "llvm/IR/Function.h" 21 #include "llvm/Support/MathExtras.h" 22 #include "llvm/Target/TargetInstrInfo.h" 23 #include "llvm/Target/TargetMachine.h" 24 #include "llvm/Target/TargetRegisterInfo.h" 25 26 using namespace llvm; 27 28 #define DEBUG_TYPE "systemz-elim-compare" 29 30 STATISTIC(BranchOnCounts, "Number of branch-on-count instructions"); 31 STATISTIC(LoadAndTraps, "Number of load-and-trap instructions"); 32 STATISTIC(EliminatedComparisons, "Number of eliminated comparisons"); 33 STATISTIC(FusedComparisons, "Number of fused compare-and-branch instructions"); 34 35 namespace { 36 // Represents the references to a particular register in one or more 37 // instructions. 38 struct Reference { 39 Reference() 40 : Def(false), Use(false) {} 41 42 Reference &operator|=(const Reference &Other) { 43 Def |= Other.Def; 44 Use |= Other.Use; 45 return *this; 46 } 47 48 explicit operator bool() const { return Def || Use; } 49 50 // True if the register is defined or used in some form, either directly or 51 // via a sub- or super-register. 52 bool Def; 53 bool Use; 54 }; 55 56 class SystemZElimCompare : public MachineFunctionPass { 57 public: 58 static char ID; 59 SystemZElimCompare(const SystemZTargetMachine &tm) 60 : MachineFunctionPass(ID), TII(nullptr), TRI(nullptr) {} 61 62 StringRef getPassName() const override { 63 return "SystemZ Comparison Elimination"; 64 } 65 66 bool processBlock(MachineBasicBlock &MBB); 67 bool runOnMachineFunction(MachineFunction &F) override; 68 MachineFunctionProperties getRequiredProperties() const override { 69 return MachineFunctionProperties().set( 70 MachineFunctionProperties::Property::NoVRegs); 71 } 72 73 private: 74 Reference getRegReferences(MachineInstr &MI, unsigned Reg); 75 bool convertToBRCT(MachineInstr &MI, MachineInstr &Compare, 76 SmallVectorImpl<MachineInstr *> &CCUsers); 77 bool convertToLoadAndTrap(MachineInstr &MI, MachineInstr &Compare, 78 SmallVectorImpl<MachineInstr *> &CCUsers); 79 bool convertToLoadAndTest(MachineInstr &MI); 80 bool adjustCCMasksForInstr(MachineInstr &MI, MachineInstr &Compare, 81 SmallVectorImpl<MachineInstr *> &CCUsers); 82 bool optimizeCompareZero(MachineInstr &Compare, 83 SmallVectorImpl<MachineInstr *> &CCUsers); 84 bool fuseCompareOperations(MachineInstr &Compare, 85 SmallVectorImpl<MachineInstr *> &CCUsers); 86 87 const SystemZInstrInfo *TII; 88 const TargetRegisterInfo *TRI; 89 }; 90 91 char SystemZElimCompare::ID = 0; 92 } // end anonymous namespace 93 94 FunctionPass *llvm::createSystemZElimComparePass(SystemZTargetMachine &TM) { 95 return new SystemZElimCompare(TM); 96 } 97 98 // Return true if CC is live out of MBB. 99 static bool isCCLiveOut(MachineBasicBlock &MBB) { 100 for (auto SI = MBB.succ_begin(), SE = MBB.succ_end(); SI != SE; ++SI) 101 if ((*SI)->isLiveIn(SystemZ::CC)) 102 return true; 103 return false; 104 } 105 106 // Return true if any CC result of MI would reflect the value of Reg. 107 static bool resultTests(MachineInstr &MI, unsigned Reg) { 108 if (MI.getNumOperands() > 0 && MI.getOperand(0).isReg() && 109 MI.getOperand(0).isDef() && MI.getOperand(0).getReg() == Reg) 110 return true; 111 112 switch (MI.getOpcode()) { 113 case SystemZ::LR: 114 case SystemZ::LGR: 115 case SystemZ::LGFR: 116 case SystemZ::LTR: 117 case SystemZ::LTGR: 118 case SystemZ::LTGFR: 119 case SystemZ::LER: 120 case SystemZ::LDR: 121 case SystemZ::LXR: 122 case SystemZ::LTEBR: 123 case SystemZ::LTDBR: 124 case SystemZ::LTXBR: 125 if (MI.getOperand(1).getReg() == Reg) 126 return true; 127 } 128 129 return false; 130 } 131 132 // Describe the references to Reg or any of its aliases in MI. 133 Reference SystemZElimCompare::getRegReferences(MachineInstr &MI, unsigned Reg) { 134 Reference Ref; 135 for (unsigned I = 0, E = MI.getNumOperands(); I != E; ++I) { 136 const MachineOperand &MO = MI.getOperand(I); 137 if (MO.isReg()) { 138 if (unsigned MOReg = MO.getReg()) { 139 if (TRI->regsOverlap(MOReg, Reg)) { 140 if (MO.isUse()) 141 Ref.Use = true; 142 else if (MO.isDef()) 143 Ref.Def = true; 144 } 145 } 146 } 147 } 148 return Ref; 149 } 150 151 // Return true if this is a load and test which can be optimized the 152 // same way as compare instruction. 153 static bool isLoadAndTestAsCmp(MachineInstr &MI) { 154 // If we during isel used a load-and-test as a compare with 0, the 155 // def operand is dead. 156 return (MI.getOpcode() == SystemZ::LTEBR || 157 MI.getOpcode() == SystemZ::LTDBR || 158 MI.getOpcode() == SystemZ::LTXBR) && 159 MI.getOperand(0).isDead(); 160 } 161 162 // Return the source register of Compare, which is the unknown value 163 // being tested. 164 static unsigned getCompareSourceReg(MachineInstr &Compare) { 165 unsigned reg = 0; 166 if (Compare.isCompare()) 167 reg = Compare.getOperand(0).getReg(); 168 else if (isLoadAndTestAsCmp(Compare)) 169 reg = Compare.getOperand(1).getReg(); 170 assert (reg); 171 172 return reg; 173 } 174 175 // Compare compares the result of MI against zero. If MI is an addition 176 // of -1 and if CCUsers is a single branch on nonzero, eliminate the addition 177 // and convert the branch to a BRCT(G) or BRCTH. Return true on success. 178 bool SystemZElimCompare::convertToBRCT( 179 MachineInstr &MI, MachineInstr &Compare, 180 SmallVectorImpl<MachineInstr *> &CCUsers) { 181 // Check whether we have an addition of -1. 182 unsigned Opcode = MI.getOpcode(); 183 unsigned BRCT; 184 if (Opcode == SystemZ::AHI) 185 BRCT = SystemZ::BRCT; 186 else if (Opcode == SystemZ::AGHI) 187 BRCT = SystemZ::BRCTG; 188 else if (Opcode == SystemZ::AIH) 189 BRCT = SystemZ::BRCTH; 190 else 191 return false; 192 if (MI.getOperand(2).getImm() != -1) 193 return false; 194 195 // Check whether we have a single JLH. 196 if (CCUsers.size() != 1) 197 return false; 198 MachineInstr *Branch = CCUsers[0]; 199 if (Branch->getOpcode() != SystemZ::BRC || 200 Branch->getOperand(0).getImm() != SystemZ::CCMASK_ICMP || 201 Branch->getOperand(1).getImm() != SystemZ::CCMASK_CMP_NE) 202 return false; 203 204 // We already know that there are no references to the register between 205 // MI and Compare. Make sure that there are also no references between 206 // Compare and Branch. 207 unsigned SrcReg = getCompareSourceReg(Compare); 208 MachineBasicBlock::iterator MBBI = Compare, MBBE = Branch; 209 for (++MBBI; MBBI != MBBE; ++MBBI) 210 if (getRegReferences(*MBBI, SrcReg)) 211 return false; 212 213 // The transformation is OK. Rebuild Branch as a BRCT(G) or BRCTH. 214 MachineOperand Target(Branch->getOperand(2)); 215 while (Branch->getNumOperands()) 216 Branch->RemoveOperand(0); 217 Branch->setDesc(TII->get(BRCT)); 218 MachineInstrBuilder MIB(*Branch->getParent()->getParent(), Branch); 219 MIB.addOperand(MI.getOperand(0)) 220 .addOperand(MI.getOperand(1)) 221 .addOperand(Target); 222 // Add a CC def to BRCT(G), since we may have to split them again if the 223 // branch displacement overflows. BRCTH has a 32-bit displacement, so 224 // this is not necessary there. 225 if (BRCT != SystemZ::BRCTH) 226 MIB.addReg(SystemZ::CC, RegState::ImplicitDefine | RegState::Dead); 227 MI.eraseFromParent(); 228 return true; 229 } 230 231 // Compare compares the result of MI against zero. If MI is a suitable load 232 // instruction and if CCUsers is a single conditional trap on zero, eliminate 233 // the load and convert the branch to a load-and-trap. Return true on success. 234 bool SystemZElimCompare::convertToLoadAndTrap( 235 MachineInstr &MI, MachineInstr &Compare, 236 SmallVectorImpl<MachineInstr *> &CCUsers) { 237 unsigned LATOpcode = TII->getLoadAndTrap(MI.getOpcode()); 238 if (!LATOpcode) 239 return false; 240 241 // Check whether we have a single CondTrap that traps on zero. 242 if (CCUsers.size() != 1) 243 return false; 244 MachineInstr *Branch = CCUsers[0]; 245 if (Branch->getOpcode() != SystemZ::CondTrap || 246 Branch->getOperand(0).getImm() != SystemZ::CCMASK_ICMP || 247 Branch->getOperand(1).getImm() != SystemZ::CCMASK_CMP_EQ) 248 return false; 249 250 // We already know that there are no references to the register between 251 // MI and Compare. Make sure that there are also no references between 252 // Compare and Branch. 253 unsigned SrcReg = getCompareSourceReg(Compare); 254 MachineBasicBlock::iterator MBBI = Compare, MBBE = Branch; 255 for (++MBBI; MBBI != MBBE; ++MBBI) 256 if (getRegReferences(*MBBI, SrcReg)) 257 return false; 258 259 // The transformation is OK. Rebuild Branch as a load-and-trap. 260 while (Branch->getNumOperands()) 261 Branch->RemoveOperand(0); 262 Branch->setDesc(TII->get(LATOpcode)); 263 MachineInstrBuilder(*Branch->getParent()->getParent(), Branch) 264 .addOperand(MI.getOperand(0)) 265 .addOperand(MI.getOperand(1)) 266 .addOperand(MI.getOperand(2)) 267 .addOperand(MI.getOperand(3)); 268 MI.eraseFromParent(); 269 return true; 270 } 271 272 // If MI is a load instruction, try to convert it into a LOAD AND TEST. 273 // Return true on success. 274 bool SystemZElimCompare::convertToLoadAndTest(MachineInstr &MI) { 275 unsigned Opcode = TII->getLoadAndTest(MI.getOpcode()); 276 if (!Opcode) 277 return false; 278 279 MI.setDesc(TII->get(Opcode)); 280 MachineInstrBuilder(*MI.getParent()->getParent(), MI) 281 .addReg(SystemZ::CC, RegState::ImplicitDefine); 282 return true; 283 } 284 285 // The CC users in CCUsers are testing the result of a comparison of some 286 // value X against zero and we know that any CC value produced by MI 287 // would also reflect the value of X. Try to adjust CCUsers so that 288 // they test the result of MI directly, returning true on success. 289 // Leave everything unchanged on failure. 290 bool SystemZElimCompare::adjustCCMasksForInstr( 291 MachineInstr &MI, MachineInstr &Compare, 292 SmallVectorImpl<MachineInstr *> &CCUsers) { 293 int Opcode = MI.getOpcode(); 294 const MCInstrDesc &Desc = TII->get(Opcode); 295 unsigned MIFlags = Desc.TSFlags; 296 297 // See which compare-style condition codes are available. 298 unsigned ReusableCCMask = SystemZII::getCompareZeroCCMask(MIFlags); 299 300 // For unsigned comparisons with zero, only equality makes sense. 301 unsigned CompareFlags = Compare.getDesc().TSFlags; 302 if (CompareFlags & SystemZII::IsLogical) 303 ReusableCCMask &= SystemZ::CCMASK_CMP_EQ; 304 305 if (ReusableCCMask == 0) 306 return false; 307 308 unsigned CCValues = SystemZII::getCCValues(MIFlags); 309 assert((ReusableCCMask & ~CCValues) == 0 && "Invalid CCValues"); 310 311 // Now check whether these flags are enough for all users. 312 SmallVector<MachineOperand *, 4> AlterMasks; 313 for (unsigned int I = 0, E = CCUsers.size(); I != E; ++I) { 314 MachineInstr *MI = CCUsers[I]; 315 316 // Fail if this isn't a use of CC that we understand. 317 unsigned Flags = MI->getDesc().TSFlags; 318 unsigned FirstOpNum; 319 if (Flags & SystemZII::CCMaskFirst) 320 FirstOpNum = 0; 321 else if (Flags & SystemZII::CCMaskLast) 322 FirstOpNum = MI->getNumExplicitOperands() - 2; 323 else 324 return false; 325 326 // Check whether the instruction predicate treats all CC values 327 // outside of ReusableCCMask in the same way. In that case it 328 // doesn't matter what those CC values mean. 329 unsigned CCValid = MI->getOperand(FirstOpNum).getImm(); 330 unsigned CCMask = MI->getOperand(FirstOpNum + 1).getImm(); 331 unsigned OutValid = ~ReusableCCMask & CCValid; 332 unsigned OutMask = ~ReusableCCMask & CCMask; 333 if (OutMask != 0 && OutMask != OutValid) 334 return false; 335 336 AlterMasks.push_back(&MI->getOperand(FirstOpNum)); 337 AlterMasks.push_back(&MI->getOperand(FirstOpNum + 1)); 338 } 339 340 // All users are OK. Adjust the masks for MI. 341 for (unsigned I = 0, E = AlterMasks.size(); I != E; I += 2) { 342 AlterMasks[I]->setImm(CCValues); 343 unsigned CCMask = AlterMasks[I + 1]->getImm(); 344 if (CCMask & ~ReusableCCMask) 345 AlterMasks[I + 1]->setImm((CCMask & ReusableCCMask) | 346 (CCValues & ~ReusableCCMask)); 347 } 348 349 // CC is now live after MI. 350 int CCDef = MI.findRegisterDefOperandIdx(SystemZ::CC, false, true, TRI); 351 assert(CCDef >= 0 && "Couldn't find CC set"); 352 MI.getOperand(CCDef).setIsDead(false); 353 354 // Clear any intervening kills of CC. 355 MachineBasicBlock::iterator MBBI = MI, MBBE = Compare; 356 for (++MBBI; MBBI != MBBE; ++MBBI) 357 MBBI->clearRegisterKills(SystemZ::CC, TRI); 358 359 return true; 360 } 361 362 // Return true if Compare is a comparison against zero. 363 static bool isCompareZero(MachineInstr &Compare) { 364 switch (Compare.getOpcode()) { 365 case SystemZ::LTEBRCompare: 366 case SystemZ::LTDBRCompare: 367 case SystemZ::LTXBRCompare: 368 return true; 369 370 default: 371 372 if (isLoadAndTestAsCmp(Compare)) 373 return true; 374 375 return Compare.getNumExplicitOperands() == 2 && 376 Compare.getOperand(1).isImm() && Compare.getOperand(1).getImm() == 0; 377 } 378 } 379 380 // Try to optimize cases where comparison instruction Compare is testing 381 // a value against zero. Return true on success and if Compare should be 382 // deleted as dead. CCUsers is the list of instructions that use the CC 383 // value produced by Compare. 384 bool SystemZElimCompare::optimizeCompareZero( 385 MachineInstr &Compare, SmallVectorImpl<MachineInstr *> &CCUsers) { 386 if (!isCompareZero(Compare)) 387 return false; 388 389 // Search back for CC results that are based on the first operand. 390 unsigned SrcReg = getCompareSourceReg(Compare); 391 MachineBasicBlock &MBB = *Compare.getParent(); 392 MachineBasicBlock::iterator MBBI = Compare, MBBE = MBB.begin(); 393 Reference CCRefs; 394 Reference SrcRefs; 395 while (MBBI != MBBE) { 396 --MBBI; 397 MachineInstr &MI = *MBBI; 398 if (resultTests(MI, SrcReg)) { 399 // Try to remove both MI and Compare by converting a branch to BRCT(G). 400 // or a load-and-trap instruction. We don't care in this case whether 401 // CC is modified between MI and Compare. 402 if (!CCRefs.Use && !SrcRefs) { 403 if (convertToBRCT(MI, Compare, CCUsers)) { 404 BranchOnCounts += 1; 405 return true; 406 } 407 if (convertToLoadAndTrap(MI, Compare, CCUsers)) { 408 LoadAndTraps += 1; 409 return true; 410 } 411 } 412 // Try to eliminate Compare by reusing a CC result from MI. 413 if ((!CCRefs && convertToLoadAndTest(MI)) || 414 (!CCRefs.Def && adjustCCMasksForInstr(MI, Compare, CCUsers))) { 415 EliminatedComparisons += 1; 416 return true; 417 } 418 } 419 SrcRefs |= getRegReferences(MI, SrcReg); 420 if (SrcRefs.Def) 421 return false; 422 CCRefs |= getRegReferences(MI, SystemZ::CC); 423 if (CCRefs.Use && CCRefs.Def) 424 return false; 425 } 426 return false; 427 } 428 429 // Try to fuse comparison instruction Compare into a later branch. 430 // Return true on success and if Compare is therefore redundant. 431 bool SystemZElimCompare::fuseCompareOperations( 432 MachineInstr &Compare, SmallVectorImpl<MachineInstr *> &CCUsers) { 433 // See whether we have a single branch with which to fuse. 434 if (CCUsers.size() != 1) 435 return false; 436 MachineInstr *Branch = CCUsers[0]; 437 SystemZII::FusedCompareType Type; 438 switch (Branch->getOpcode()) { 439 case SystemZ::BRC: 440 Type = SystemZII::CompareAndBranch; 441 break; 442 case SystemZ::CondReturn: 443 Type = SystemZII::CompareAndReturn; 444 break; 445 case SystemZ::CallBCR: 446 Type = SystemZII::CompareAndSibcall; 447 break; 448 case SystemZ::CondTrap: 449 Type = SystemZII::CompareAndTrap; 450 break; 451 default: 452 return false; 453 } 454 455 // See whether we have a comparison that can be fused. 456 unsigned FusedOpcode = 457 TII->getFusedCompare(Compare.getOpcode(), Type, &Compare); 458 if (!FusedOpcode) 459 return false; 460 461 // Make sure that the operands are available at the branch. 462 // SrcReg2 is the register if the source operand is a register, 463 // 0 if the source operand is immediate, and the base register 464 // if the source operand is memory (index is not supported). 465 unsigned SrcReg = Compare.getOperand(0).getReg(); 466 unsigned SrcReg2 = 467 Compare.getOperand(1).isReg() ? Compare.getOperand(1).getReg() : 0; 468 MachineBasicBlock::iterator MBBI = Compare, MBBE = Branch; 469 for (++MBBI; MBBI != MBBE; ++MBBI) 470 if (MBBI->modifiesRegister(SrcReg, TRI) || 471 (SrcReg2 && MBBI->modifiesRegister(SrcReg2, TRI))) 472 return false; 473 474 // Read the branch mask, target (if applicable), regmask (if applicable). 475 MachineOperand CCMask(MBBI->getOperand(1)); 476 assert((CCMask.getImm() & ~SystemZ::CCMASK_ICMP) == 0 && 477 "Invalid condition-code mask for integer comparison"); 478 // This is only valid for CompareAndBranch. 479 MachineOperand Target(MBBI->getOperand( 480 Type == SystemZII::CompareAndBranch ? 2 : 0)); 481 const uint32_t *RegMask; 482 if (Type == SystemZII::CompareAndSibcall) 483 RegMask = MBBI->getOperand(2).getRegMask(); 484 485 // Clear out all current operands. 486 int CCUse = MBBI->findRegisterUseOperandIdx(SystemZ::CC, false, TRI); 487 assert(CCUse >= 0 && "BRC/BCR must use CC"); 488 Branch->RemoveOperand(CCUse); 489 // Remove target (branch) or regmask (sibcall). 490 if (Type == SystemZII::CompareAndBranch || 491 Type == SystemZII::CompareAndSibcall) 492 Branch->RemoveOperand(2); 493 Branch->RemoveOperand(1); 494 Branch->RemoveOperand(0); 495 496 // Rebuild Branch as a fused compare and branch. 497 // SrcNOps is the number of MI operands of the compare instruction 498 // that we need to copy over. 499 unsigned SrcNOps = 2; 500 if (FusedOpcode == SystemZ::CLT || FusedOpcode == SystemZ::CLGT) 501 SrcNOps = 3; 502 Branch->setDesc(TII->get(FusedOpcode)); 503 MachineInstrBuilder MIB(*Branch->getParent()->getParent(), Branch); 504 for (unsigned I = 0; I < SrcNOps; I++) 505 MIB.addOperand(Compare.getOperand(I)); 506 MIB.addOperand(CCMask); 507 508 if (Type == SystemZII::CompareAndBranch) { 509 // Only conditional branches define CC, as they may be converted back 510 // to a non-fused branch because of a long displacement. Conditional 511 // returns don't have that problem. 512 MIB.addOperand(Target) 513 .addReg(SystemZ::CC, RegState::ImplicitDefine | RegState::Dead); 514 } 515 516 if (Type == SystemZII::CompareAndSibcall) 517 MIB.addRegMask(RegMask); 518 519 // Clear any intervening kills of SrcReg and SrcReg2. 520 MBBI = Compare; 521 for (++MBBI; MBBI != MBBE; ++MBBI) { 522 MBBI->clearRegisterKills(SrcReg, TRI); 523 if (SrcReg2) 524 MBBI->clearRegisterKills(SrcReg2, TRI); 525 } 526 FusedComparisons += 1; 527 return true; 528 } 529 530 // Process all comparison instructions in MBB. Return true if something 531 // changed. 532 bool SystemZElimCompare::processBlock(MachineBasicBlock &MBB) { 533 bool Changed = false; 534 535 // Walk backwards through the block looking for comparisons, recording 536 // all CC users as we go. The subroutines can delete Compare and 537 // instructions before it. 538 bool CompleteCCUsers = !isCCLiveOut(MBB); 539 SmallVector<MachineInstr *, 4> CCUsers; 540 MachineBasicBlock::iterator MBBI = MBB.end(); 541 while (MBBI != MBB.begin()) { 542 MachineInstr &MI = *--MBBI; 543 if (CompleteCCUsers && (MI.isCompare() || isLoadAndTestAsCmp(MI)) && 544 (optimizeCompareZero(MI, CCUsers) || 545 fuseCompareOperations(MI, CCUsers))) { 546 ++MBBI; 547 MI.eraseFromParent(); 548 Changed = true; 549 CCUsers.clear(); 550 continue; 551 } 552 553 if (MI.definesRegister(SystemZ::CC)) { 554 CCUsers.clear(); 555 CompleteCCUsers = true; 556 } 557 if (MI.readsRegister(SystemZ::CC) && CompleteCCUsers) 558 CCUsers.push_back(&MI); 559 } 560 return Changed; 561 } 562 563 bool SystemZElimCompare::runOnMachineFunction(MachineFunction &F) { 564 if (skipFunction(*F.getFunction())) 565 return false; 566 567 TII = static_cast<const SystemZInstrInfo *>(F.getSubtarget().getInstrInfo()); 568 TRI = &TII->getRegisterInfo(); 569 570 bool Changed = false; 571 for (auto &MBB : F) 572 Changed |= processBlock(MBB); 573 574 return Changed; 575 } 576