1 //===-- SystemZElimCompare.cpp - Eliminate comparison instructions --------===// 2 // 3 // The LLVM Compiler Infrastructure 4 // 5 // This file is distributed under the University of Illinois Open Source 6 // License. See LICENSE.TXT for details. 7 // 8 //===----------------------------------------------------------------------===// 9 // 10 // This pass: 11 // (1) tries to remove compares if CC already contains the required information 12 // (2) fuses compares and branches into COMPARE AND BRANCH instructions 13 // 14 //===----------------------------------------------------------------------===// 15 16 #include "SystemZTargetMachine.h" 17 #include "llvm/ADT/Statistic.h" 18 #include "llvm/CodeGen/MachineFunctionPass.h" 19 #include "llvm/CodeGen/MachineInstrBuilder.h" 20 #include "llvm/IR/Function.h" 21 #include "llvm/Support/MathExtras.h" 22 #include "llvm/Target/TargetInstrInfo.h" 23 #include "llvm/Target/TargetMachine.h" 24 #include "llvm/Target/TargetRegisterInfo.h" 25 26 using namespace llvm; 27 28 #define DEBUG_TYPE "systemz-elim-compare" 29 30 STATISTIC(BranchOnCounts, "Number of branch-on-count instructions"); 31 STATISTIC(LoadAndTraps, "Number of load-and-trap instructions"); 32 STATISTIC(EliminatedComparisons, "Number of eliminated comparisons"); 33 STATISTIC(FusedComparisons, "Number of fused compare-and-branch instructions"); 34 35 namespace { 36 // Represents the references to a particular register in one or more 37 // instructions. 38 struct Reference { 39 Reference() 40 : Def(false), Use(false) {} 41 42 Reference &operator|=(const Reference &Other) { 43 Def |= Other.Def; 44 Use |= Other.Use; 45 return *this; 46 } 47 48 explicit operator bool() const { return Def || Use; } 49 50 // True if the register is defined or used in some form, either directly or 51 // via a sub- or super-register. 52 bool Def; 53 bool Use; 54 }; 55 56 class SystemZElimCompare : public MachineFunctionPass { 57 public: 58 static char ID; 59 SystemZElimCompare(const SystemZTargetMachine &tm) 60 : MachineFunctionPass(ID), TII(nullptr), TRI(nullptr) {} 61 62 StringRef getPassName() const override { 63 return "SystemZ Comparison Elimination"; 64 } 65 66 bool processBlock(MachineBasicBlock &MBB); 67 bool runOnMachineFunction(MachineFunction &F) override; 68 MachineFunctionProperties getRequiredProperties() const override { 69 return MachineFunctionProperties().set( 70 MachineFunctionProperties::Property::NoVRegs); 71 } 72 73 private: 74 Reference getRegReferences(MachineInstr &MI, unsigned Reg); 75 bool convertToBRCT(MachineInstr &MI, MachineInstr &Compare, 76 SmallVectorImpl<MachineInstr *> &CCUsers); 77 bool convertToLoadAndTrap(MachineInstr &MI, MachineInstr &Compare, 78 SmallVectorImpl<MachineInstr *> &CCUsers); 79 bool convertToLoadAndTest(MachineInstr &MI); 80 bool adjustCCMasksForInstr(MachineInstr &MI, MachineInstr &Compare, 81 SmallVectorImpl<MachineInstr *> &CCUsers); 82 bool optimizeCompareZero(MachineInstr &Compare, 83 SmallVectorImpl<MachineInstr *> &CCUsers); 84 bool fuseCompareOperations(MachineInstr &Compare, 85 SmallVectorImpl<MachineInstr *> &CCUsers); 86 87 const SystemZInstrInfo *TII; 88 const TargetRegisterInfo *TRI; 89 }; 90 91 char SystemZElimCompare::ID = 0; 92 } // end anonymous namespace 93 94 FunctionPass *llvm::createSystemZElimComparePass(SystemZTargetMachine &TM) { 95 return new SystemZElimCompare(TM); 96 } 97 98 // Return true if CC is live out of MBB. 99 static bool isCCLiveOut(MachineBasicBlock &MBB) { 100 for (auto SI = MBB.succ_begin(), SE = MBB.succ_end(); SI != SE; ++SI) 101 if ((*SI)->isLiveIn(SystemZ::CC)) 102 return true; 103 return false; 104 } 105 106 // Return true if any CC result of MI would reflect the value of Reg. 107 static bool resultTests(MachineInstr &MI, unsigned Reg) { 108 if (MI.getNumOperands() > 0 && MI.getOperand(0).isReg() && 109 MI.getOperand(0).isDef() && MI.getOperand(0).getReg() == Reg) 110 return true; 111 112 switch (MI.getOpcode()) { 113 case SystemZ::LR: 114 case SystemZ::LGR: 115 case SystemZ::LGFR: 116 case SystemZ::LTR: 117 case SystemZ::LTGR: 118 case SystemZ::LTGFR: 119 case SystemZ::LER: 120 case SystemZ::LDR: 121 case SystemZ::LXR: 122 case SystemZ::LTEBR: 123 case SystemZ::LTDBR: 124 case SystemZ::LTXBR: 125 if (MI.getOperand(1).getReg() == Reg) 126 return true; 127 } 128 129 return false; 130 } 131 132 // Describe the references to Reg or any of its aliases in MI. 133 Reference SystemZElimCompare::getRegReferences(MachineInstr &MI, unsigned Reg) { 134 Reference Ref; 135 for (unsigned I = 0, E = MI.getNumOperands(); I != E; ++I) { 136 const MachineOperand &MO = MI.getOperand(I); 137 if (MO.isReg()) { 138 if (unsigned MOReg = MO.getReg()) { 139 if (TRI->regsOverlap(MOReg, Reg)) { 140 if (MO.isUse()) 141 Ref.Use = true; 142 else if (MO.isDef()) 143 Ref.Def = true; 144 } 145 } 146 } 147 } 148 return Ref; 149 } 150 151 // Return true if this is a load and test which can be optimized the 152 // same way as compare instruction. 153 static bool isLoadAndTestAsCmp(MachineInstr &MI) { 154 // If we during isel used a load-and-test as a compare with 0, the 155 // def operand is dead. 156 return (MI.getOpcode() == SystemZ::LTEBR || 157 MI.getOpcode() == SystemZ::LTDBR || 158 MI.getOpcode() == SystemZ::LTXBR) && 159 MI.getOperand(0).isDead(); 160 } 161 162 // Return the source register of Compare, which is the unknown value 163 // being tested. 164 static unsigned getCompareSourceReg(MachineInstr &Compare) { 165 unsigned reg = 0; 166 if (Compare.isCompare()) 167 reg = Compare.getOperand(0).getReg(); 168 else if (isLoadAndTestAsCmp(Compare)) 169 reg = Compare.getOperand(1).getReg(); 170 assert (reg); 171 172 return reg; 173 } 174 175 // Compare compares the result of MI against zero. If MI is an addition 176 // of -1 and if CCUsers is a single branch on nonzero, eliminate the addition 177 // and convert the branch to a BRCT(G) or BRCTH. Return true on success. 178 bool SystemZElimCompare::convertToBRCT( 179 MachineInstr &MI, MachineInstr &Compare, 180 SmallVectorImpl<MachineInstr *> &CCUsers) { 181 // Check whether we have an addition of -1. 182 unsigned Opcode = MI.getOpcode(); 183 unsigned BRCT; 184 if (Opcode == SystemZ::AHI) 185 BRCT = SystemZ::BRCT; 186 else if (Opcode == SystemZ::AGHI) 187 BRCT = SystemZ::BRCTG; 188 else if (Opcode == SystemZ::AIH) 189 BRCT = SystemZ::BRCTH; 190 else 191 return false; 192 if (MI.getOperand(2).getImm() != -1) 193 return false; 194 195 // Check whether we have a single JLH. 196 if (CCUsers.size() != 1) 197 return false; 198 MachineInstr *Branch = CCUsers[0]; 199 if (Branch->getOpcode() != SystemZ::BRC || 200 Branch->getOperand(0).getImm() != SystemZ::CCMASK_ICMP || 201 Branch->getOperand(1).getImm() != SystemZ::CCMASK_CMP_NE) 202 return false; 203 204 // We already know that there are no references to the register between 205 // MI and Compare. Make sure that there are also no references between 206 // Compare and Branch. 207 unsigned SrcReg = getCompareSourceReg(Compare); 208 MachineBasicBlock::iterator MBBI = Compare, MBBE = Branch; 209 for (++MBBI; MBBI != MBBE; ++MBBI) 210 if (getRegReferences(*MBBI, SrcReg)) 211 return false; 212 213 // The transformation is OK. Rebuild Branch as a BRCT(G) or BRCTH. 214 MachineOperand Target(Branch->getOperand(2)); 215 while (Branch->getNumOperands()) 216 Branch->RemoveOperand(0); 217 Branch->setDesc(TII->get(BRCT)); 218 MachineInstrBuilder MIB(*Branch->getParent()->getParent(), Branch); 219 MIB.add(MI.getOperand(0)).add(MI.getOperand(1)).add(Target); 220 // Add a CC def to BRCT(G), since we may have to split them again if the 221 // branch displacement overflows. BRCTH has a 32-bit displacement, so 222 // this is not necessary there. 223 if (BRCT != SystemZ::BRCTH) 224 MIB.addReg(SystemZ::CC, RegState::ImplicitDefine | RegState::Dead); 225 MI.eraseFromParent(); 226 return true; 227 } 228 229 // Compare compares the result of MI against zero. If MI is a suitable load 230 // instruction and if CCUsers is a single conditional trap on zero, eliminate 231 // the load and convert the branch to a load-and-trap. Return true on success. 232 bool SystemZElimCompare::convertToLoadAndTrap( 233 MachineInstr &MI, MachineInstr &Compare, 234 SmallVectorImpl<MachineInstr *> &CCUsers) { 235 unsigned LATOpcode = TII->getLoadAndTrap(MI.getOpcode()); 236 if (!LATOpcode) 237 return false; 238 239 // Check whether we have a single CondTrap that traps on zero. 240 if (CCUsers.size() != 1) 241 return false; 242 MachineInstr *Branch = CCUsers[0]; 243 if (Branch->getOpcode() != SystemZ::CondTrap || 244 Branch->getOperand(0).getImm() != SystemZ::CCMASK_ICMP || 245 Branch->getOperand(1).getImm() != SystemZ::CCMASK_CMP_EQ) 246 return false; 247 248 // We already know that there are no references to the register between 249 // MI and Compare. Make sure that there are also no references between 250 // Compare and Branch. 251 unsigned SrcReg = getCompareSourceReg(Compare); 252 MachineBasicBlock::iterator MBBI = Compare, MBBE = Branch; 253 for (++MBBI; MBBI != MBBE; ++MBBI) 254 if (getRegReferences(*MBBI, SrcReg)) 255 return false; 256 257 // The transformation is OK. Rebuild Branch as a load-and-trap. 258 while (Branch->getNumOperands()) 259 Branch->RemoveOperand(0); 260 Branch->setDesc(TII->get(LATOpcode)); 261 MachineInstrBuilder(*Branch->getParent()->getParent(), Branch) 262 .add(MI.getOperand(0)) 263 .add(MI.getOperand(1)) 264 .add(MI.getOperand(2)) 265 .add(MI.getOperand(3)); 266 MI.eraseFromParent(); 267 return true; 268 } 269 270 // If MI is a load instruction, try to convert it into a LOAD AND TEST. 271 // Return true on success. 272 bool SystemZElimCompare::convertToLoadAndTest(MachineInstr &MI) { 273 unsigned Opcode = TII->getLoadAndTest(MI.getOpcode()); 274 if (!Opcode) 275 return false; 276 277 MI.setDesc(TII->get(Opcode)); 278 MachineInstrBuilder(*MI.getParent()->getParent(), MI) 279 .addReg(SystemZ::CC, RegState::ImplicitDefine); 280 return true; 281 } 282 283 // The CC users in CCUsers are testing the result of a comparison of some 284 // value X against zero and we know that any CC value produced by MI 285 // would also reflect the value of X. Try to adjust CCUsers so that 286 // they test the result of MI directly, returning true on success. 287 // Leave everything unchanged on failure. 288 bool SystemZElimCompare::adjustCCMasksForInstr( 289 MachineInstr &MI, MachineInstr &Compare, 290 SmallVectorImpl<MachineInstr *> &CCUsers) { 291 int Opcode = MI.getOpcode(); 292 const MCInstrDesc &Desc = TII->get(Opcode); 293 unsigned MIFlags = Desc.TSFlags; 294 295 // See which compare-style condition codes are available. 296 unsigned ReusableCCMask = SystemZII::getCompareZeroCCMask(MIFlags); 297 298 // For unsigned comparisons with zero, only equality makes sense. 299 unsigned CompareFlags = Compare.getDesc().TSFlags; 300 if (CompareFlags & SystemZII::IsLogical) 301 ReusableCCMask &= SystemZ::CCMASK_CMP_EQ; 302 303 if (ReusableCCMask == 0) 304 return false; 305 306 unsigned CCValues = SystemZII::getCCValues(MIFlags); 307 assert((ReusableCCMask & ~CCValues) == 0 && "Invalid CCValues"); 308 309 // Now check whether these flags are enough for all users. 310 SmallVector<MachineOperand *, 4> AlterMasks; 311 for (unsigned int I = 0, E = CCUsers.size(); I != E; ++I) { 312 MachineInstr *MI = CCUsers[I]; 313 314 // Fail if this isn't a use of CC that we understand. 315 unsigned Flags = MI->getDesc().TSFlags; 316 unsigned FirstOpNum; 317 if (Flags & SystemZII::CCMaskFirst) 318 FirstOpNum = 0; 319 else if (Flags & SystemZII::CCMaskLast) 320 FirstOpNum = MI->getNumExplicitOperands() - 2; 321 else 322 return false; 323 324 // Check whether the instruction predicate treats all CC values 325 // outside of ReusableCCMask in the same way. In that case it 326 // doesn't matter what those CC values mean. 327 unsigned CCValid = MI->getOperand(FirstOpNum).getImm(); 328 unsigned CCMask = MI->getOperand(FirstOpNum + 1).getImm(); 329 unsigned OutValid = ~ReusableCCMask & CCValid; 330 unsigned OutMask = ~ReusableCCMask & CCMask; 331 if (OutMask != 0 && OutMask != OutValid) 332 return false; 333 334 AlterMasks.push_back(&MI->getOperand(FirstOpNum)); 335 AlterMasks.push_back(&MI->getOperand(FirstOpNum + 1)); 336 } 337 338 // All users are OK. Adjust the masks for MI. 339 for (unsigned I = 0, E = AlterMasks.size(); I != E; I += 2) { 340 AlterMasks[I]->setImm(CCValues); 341 unsigned CCMask = AlterMasks[I + 1]->getImm(); 342 if (CCMask & ~ReusableCCMask) 343 AlterMasks[I + 1]->setImm((CCMask & ReusableCCMask) | 344 (CCValues & ~ReusableCCMask)); 345 } 346 347 // CC is now live after MI. 348 int CCDef = MI.findRegisterDefOperandIdx(SystemZ::CC, false, true, TRI); 349 assert(CCDef >= 0 && "Couldn't find CC set"); 350 MI.getOperand(CCDef).setIsDead(false); 351 352 // Clear any intervening kills of CC. 353 MachineBasicBlock::iterator MBBI = MI, MBBE = Compare; 354 for (++MBBI; MBBI != MBBE; ++MBBI) 355 MBBI->clearRegisterKills(SystemZ::CC, TRI); 356 357 return true; 358 } 359 360 // Return true if Compare is a comparison against zero. 361 static bool isCompareZero(MachineInstr &Compare) { 362 switch (Compare.getOpcode()) { 363 case SystemZ::LTEBRCompare: 364 case SystemZ::LTDBRCompare: 365 case SystemZ::LTXBRCompare: 366 return true; 367 368 default: 369 370 if (isLoadAndTestAsCmp(Compare)) 371 return true; 372 373 return Compare.getNumExplicitOperands() == 2 && 374 Compare.getOperand(1).isImm() && Compare.getOperand(1).getImm() == 0; 375 } 376 } 377 378 // Try to optimize cases where comparison instruction Compare is testing 379 // a value against zero. Return true on success and if Compare should be 380 // deleted as dead. CCUsers is the list of instructions that use the CC 381 // value produced by Compare. 382 bool SystemZElimCompare::optimizeCompareZero( 383 MachineInstr &Compare, SmallVectorImpl<MachineInstr *> &CCUsers) { 384 if (!isCompareZero(Compare)) 385 return false; 386 387 // Search back for CC results that are based on the first operand. 388 unsigned SrcReg = getCompareSourceReg(Compare); 389 MachineBasicBlock &MBB = *Compare.getParent(); 390 MachineBasicBlock::iterator MBBI = Compare, MBBE = MBB.begin(); 391 Reference CCRefs; 392 Reference SrcRefs; 393 while (MBBI != MBBE) { 394 --MBBI; 395 MachineInstr &MI = *MBBI; 396 if (resultTests(MI, SrcReg)) { 397 // Try to remove both MI and Compare by converting a branch to BRCT(G). 398 // or a load-and-trap instruction. We don't care in this case whether 399 // CC is modified between MI and Compare. 400 if (!CCRefs.Use && !SrcRefs) { 401 if (convertToBRCT(MI, Compare, CCUsers)) { 402 BranchOnCounts += 1; 403 return true; 404 } 405 if (convertToLoadAndTrap(MI, Compare, CCUsers)) { 406 LoadAndTraps += 1; 407 return true; 408 } 409 } 410 // Try to eliminate Compare by reusing a CC result from MI. 411 if ((!CCRefs && convertToLoadAndTest(MI)) || 412 (!CCRefs.Def && adjustCCMasksForInstr(MI, Compare, CCUsers))) { 413 EliminatedComparisons += 1; 414 return true; 415 } 416 } 417 SrcRefs |= getRegReferences(MI, SrcReg); 418 if (SrcRefs.Def) 419 return false; 420 CCRefs |= getRegReferences(MI, SystemZ::CC); 421 if (CCRefs.Use && CCRefs.Def) 422 return false; 423 } 424 return false; 425 } 426 427 // Try to fuse comparison instruction Compare into a later branch. 428 // Return true on success and if Compare is therefore redundant. 429 bool SystemZElimCompare::fuseCompareOperations( 430 MachineInstr &Compare, SmallVectorImpl<MachineInstr *> &CCUsers) { 431 // See whether we have a single branch with which to fuse. 432 if (CCUsers.size() != 1) 433 return false; 434 MachineInstr *Branch = CCUsers[0]; 435 SystemZII::FusedCompareType Type; 436 switch (Branch->getOpcode()) { 437 case SystemZ::BRC: 438 Type = SystemZII::CompareAndBranch; 439 break; 440 case SystemZ::CondReturn: 441 Type = SystemZII::CompareAndReturn; 442 break; 443 case SystemZ::CallBCR: 444 Type = SystemZII::CompareAndSibcall; 445 break; 446 case SystemZ::CondTrap: 447 Type = SystemZII::CompareAndTrap; 448 break; 449 default: 450 return false; 451 } 452 453 // See whether we have a comparison that can be fused. 454 unsigned FusedOpcode = 455 TII->getFusedCompare(Compare.getOpcode(), Type, &Compare); 456 if (!FusedOpcode) 457 return false; 458 459 // Make sure that the operands are available at the branch. 460 // SrcReg2 is the register if the source operand is a register, 461 // 0 if the source operand is immediate, and the base register 462 // if the source operand is memory (index is not supported). 463 unsigned SrcReg = Compare.getOperand(0).getReg(); 464 unsigned SrcReg2 = 465 Compare.getOperand(1).isReg() ? Compare.getOperand(1).getReg() : 0; 466 MachineBasicBlock::iterator MBBI = Compare, MBBE = Branch; 467 for (++MBBI; MBBI != MBBE; ++MBBI) 468 if (MBBI->modifiesRegister(SrcReg, TRI) || 469 (SrcReg2 && MBBI->modifiesRegister(SrcReg2, TRI))) 470 return false; 471 472 // Read the branch mask, target (if applicable), regmask (if applicable). 473 MachineOperand CCMask(MBBI->getOperand(1)); 474 assert((CCMask.getImm() & ~SystemZ::CCMASK_ICMP) == 0 && 475 "Invalid condition-code mask for integer comparison"); 476 // This is only valid for CompareAndBranch. 477 MachineOperand Target(MBBI->getOperand( 478 Type == SystemZII::CompareAndBranch ? 2 : 0)); 479 const uint32_t *RegMask; 480 if (Type == SystemZII::CompareAndSibcall) 481 RegMask = MBBI->getOperand(2).getRegMask(); 482 483 // Clear out all current operands. 484 int CCUse = MBBI->findRegisterUseOperandIdx(SystemZ::CC, false, TRI); 485 assert(CCUse >= 0 && "BRC/BCR must use CC"); 486 Branch->RemoveOperand(CCUse); 487 // Remove target (branch) or regmask (sibcall). 488 if (Type == SystemZII::CompareAndBranch || 489 Type == SystemZII::CompareAndSibcall) 490 Branch->RemoveOperand(2); 491 Branch->RemoveOperand(1); 492 Branch->RemoveOperand(0); 493 494 // Rebuild Branch as a fused compare and branch. 495 // SrcNOps is the number of MI operands of the compare instruction 496 // that we need to copy over. 497 unsigned SrcNOps = 2; 498 if (FusedOpcode == SystemZ::CLT || FusedOpcode == SystemZ::CLGT) 499 SrcNOps = 3; 500 Branch->setDesc(TII->get(FusedOpcode)); 501 MachineInstrBuilder MIB(*Branch->getParent()->getParent(), Branch); 502 for (unsigned I = 0; I < SrcNOps; I++) 503 MIB.add(Compare.getOperand(I)); 504 MIB.add(CCMask); 505 506 if (Type == SystemZII::CompareAndBranch) { 507 // Only conditional branches define CC, as they may be converted back 508 // to a non-fused branch because of a long displacement. Conditional 509 // returns don't have that problem. 510 MIB.add(Target).addReg(SystemZ::CC, 511 RegState::ImplicitDefine | RegState::Dead); 512 } 513 514 if (Type == SystemZII::CompareAndSibcall) 515 MIB.addRegMask(RegMask); 516 517 // Clear any intervening kills of SrcReg and SrcReg2. 518 MBBI = Compare; 519 for (++MBBI; MBBI != MBBE; ++MBBI) { 520 MBBI->clearRegisterKills(SrcReg, TRI); 521 if (SrcReg2) 522 MBBI->clearRegisterKills(SrcReg2, TRI); 523 } 524 FusedComparisons += 1; 525 return true; 526 } 527 528 // Process all comparison instructions in MBB. Return true if something 529 // changed. 530 bool SystemZElimCompare::processBlock(MachineBasicBlock &MBB) { 531 bool Changed = false; 532 533 // Walk backwards through the block looking for comparisons, recording 534 // all CC users as we go. The subroutines can delete Compare and 535 // instructions before it. 536 bool CompleteCCUsers = !isCCLiveOut(MBB); 537 SmallVector<MachineInstr *, 4> CCUsers; 538 MachineBasicBlock::iterator MBBI = MBB.end(); 539 while (MBBI != MBB.begin()) { 540 MachineInstr &MI = *--MBBI; 541 if (CompleteCCUsers && (MI.isCompare() || isLoadAndTestAsCmp(MI)) && 542 (optimizeCompareZero(MI, CCUsers) || 543 fuseCompareOperations(MI, CCUsers))) { 544 ++MBBI; 545 MI.eraseFromParent(); 546 Changed = true; 547 CCUsers.clear(); 548 continue; 549 } 550 551 if (MI.definesRegister(SystemZ::CC)) { 552 CCUsers.clear(); 553 CompleteCCUsers = true; 554 } 555 if (MI.readsRegister(SystemZ::CC) && CompleteCCUsers) 556 CCUsers.push_back(&MI); 557 } 558 return Changed; 559 } 560 561 bool SystemZElimCompare::runOnMachineFunction(MachineFunction &F) { 562 if (skipFunction(*F.getFunction())) 563 return false; 564 565 TII = static_cast<const SystemZInstrInfo *>(F.getSubtarget().getInstrInfo()); 566 TRI = &TII->getRegisterInfo(); 567 568 bool Changed = false; 569 for (auto &MBB : F) 570 Changed |= processBlock(MBB); 571 572 return Changed; 573 } 574