13b0846e8STim Northover //===-- AArch64ConditionalCompares.cpp --- CCMP formation for AArch64 -----===//
23b0846e8STim Northover //
32946cd70SChandler Carruth // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
42946cd70SChandler Carruth // See https://llvm.org/LICENSE.txt for license information.
52946cd70SChandler Carruth // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
63b0846e8STim Northover //
73b0846e8STim Northover //===----------------------------------------------------------------------===//
83b0846e8STim Northover //
93b0846e8STim Northover // This file implements the AArch64ConditionalCompares pass which reduces
103b0846e8STim Northover // branching and code size by using the conditional compare instructions CCMP,
113b0846e8STim Northover // CCMN, and FCMP.
123b0846e8STim Northover //
133b0846e8STim Northover // The CFG transformations for forming conditional compares are very similar to
143b0846e8STim Northover // if-conversion, and this pass should run immediately before the early
153b0846e8STim Northover // if-conversion pass.
163b0846e8STim Northover //
173b0846e8STim Northover //===----------------------------------------------------------------------===//
183b0846e8STim Northover 
193b0846e8STim Northover #include "AArch64.h"
203b0846e8STim Northover #include "llvm/ADT/DepthFirstIterator.h"
213b0846e8STim Northover #include "llvm/ADT/SetVector.h"
223b0846e8STim Northover #include "llvm/ADT/SmallPtrSet.h"
233b0846e8STim Northover #include "llvm/ADT/Statistic.h"
240bd79f41SMatthew Simpson #include "llvm/CodeGen/MachineBranchProbabilityInfo.h"
253b0846e8STim Northover #include "llvm/CodeGen/MachineDominators.h"
263b0846e8STim Northover #include "llvm/CodeGen/MachineFunction.h"
273b0846e8STim Northover #include "llvm/CodeGen/MachineFunctionPass.h"
283b0846e8STim Northover #include "llvm/CodeGen/MachineInstrBuilder.h"
293b0846e8STim Northover #include "llvm/CodeGen/MachineLoopInfo.h"
303b0846e8STim Northover #include "llvm/CodeGen/MachineRegisterInfo.h"
313b0846e8STim Northover #include "llvm/CodeGen/MachineTraceMetrics.h"
323b0846e8STim Northover #include "llvm/CodeGen/Passes.h"
333f833edcSDavid Blaikie #include "llvm/CodeGen/TargetInstrInfo.h"
34b3bde2eaSDavid Blaikie #include "llvm/CodeGen/TargetRegisterInfo.h"
35b3bde2eaSDavid Blaikie #include "llvm/CodeGen/TargetSubtargetInfo.h"
3605da2fe5SReid Kleckner #include "llvm/InitializePasses.h"
373b0846e8STim Northover #include "llvm/Support/CommandLine.h"
383b0846e8STim Northover #include "llvm/Support/Debug.h"
393b0846e8STim Northover #include "llvm/Support/raw_ostream.h"
403b0846e8STim Northover 
413b0846e8STim Northover using namespace llvm;
423b0846e8STim Northover 
433b0846e8STim Northover #define DEBUG_TYPE "aarch64-ccmp"
443b0846e8STim Northover 
453b0846e8STim Northover // Absolute maximum number of instructions allowed per speculated block.
463b0846e8STim Northover // This bypasses all other heuristics, so it should be set fairly high.
473b0846e8STim Northover static cl::opt<unsigned> BlockInstrLimit(
483b0846e8STim Northover     "aarch64-ccmp-limit", cl::init(30), cl::Hidden,
493b0846e8STim Northover     cl::desc("Maximum number of instructions per speculated block."));
503b0846e8STim Northover 
513b0846e8STim Northover // Stress testing mode - disable heuristics.
523b0846e8STim Northover static cl::opt<bool> Stress("aarch64-stress-ccmp", cl::Hidden,
533b0846e8STim Northover                             cl::desc("Turn all knobs to 11"));
543b0846e8STim Northover 
553b0846e8STim Northover STATISTIC(NumConsidered, "Number of ccmps considered");
563b0846e8STim Northover STATISTIC(NumPhiRejs, "Number of ccmps rejected (PHI)");
573b0846e8STim Northover STATISTIC(NumPhysRejs, "Number of ccmps rejected (Physregs)");
583b0846e8STim Northover STATISTIC(NumPhi2Rejs, "Number of ccmps rejected (PHI2)");
593b0846e8STim Northover STATISTIC(NumHeadBranchRejs, "Number of ccmps rejected (Head branch)");
603b0846e8STim Northover STATISTIC(NumCmpBranchRejs, "Number of ccmps rejected (CmpBB branch)");
613b0846e8STim Northover STATISTIC(NumCmpTermRejs, "Number of ccmps rejected (CmpBB is cbz...)");
623b0846e8STim Northover STATISTIC(NumImmRangeRejs, "Number of ccmps rejected (Imm out of range)");
633b0846e8STim Northover STATISTIC(NumLiveDstRejs, "Number of ccmps rejected (Cmp dest live)");
643b0846e8STim Northover STATISTIC(NumMultNZCVUses, "Number of ccmps rejected (NZCV used)");
653b0846e8STim Northover STATISTIC(NumUnknNZCVDefs, "Number of ccmps rejected (NZCV def unknown)");
663b0846e8STim Northover 
673b0846e8STim Northover STATISTIC(NumSpeculateRejs, "Number of ccmps rejected (Can't speculate)");
683b0846e8STim Northover 
693b0846e8STim Northover STATISTIC(NumConverted, "Number of ccmp instructions created");
703b0846e8STim Northover STATISTIC(NumCompBranches, "Number of cbz/cbnz branches converted");
713b0846e8STim Northover 
723b0846e8STim Northover //===----------------------------------------------------------------------===//
733b0846e8STim Northover //                                 SSACCmpConv
743b0846e8STim Northover //===----------------------------------------------------------------------===//
753b0846e8STim Northover //
763b0846e8STim Northover // The SSACCmpConv class performs ccmp-conversion on SSA form machine code
773b0846e8STim Northover // after determining if it is possible. The class contains no heuristics;
783b0846e8STim Northover // external code should be used to determine when ccmp-conversion is a good
793b0846e8STim Northover // idea.
803b0846e8STim Northover //
813b0846e8STim Northover // CCmp-formation works on a CFG representing chained conditions, typically
823b0846e8STim Northover // from C's short-circuit || and && operators:
833b0846e8STim Northover //
843b0846e8STim Northover //   From:         Head            To:         Head
853b0846e8STim Northover //                 / |                         CmpBB
863b0846e8STim Northover //                /  |                         / |
873b0846e8STim Northover //               |  CmpBB                     /  |
883b0846e8STim Northover //               |  / |                    Tail  |
893b0846e8STim Northover //               | /  |                      |   |
903b0846e8STim Northover //              Tail  |                      |   |
913b0846e8STim Northover //                |   |                      |   |
923b0846e8STim Northover //               ... ...                    ... ...
933b0846e8STim Northover //
943b0846e8STim Northover // The Head block is terminated by a br.cond instruction, and the CmpBB block
953b0846e8STim Northover // contains compare + br.cond. Tail must be a successor of both.
963b0846e8STim Northover //
973b0846e8STim Northover // The cmp-conversion turns the compare instruction in CmpBB into a conditional
983b0846e8STim Northover // compare, and merges CmpBB into Head, speculatively executing its
993b0846e8STim Northover // instructions. The AArch64 conditional compare instructions have an immediate
1003b0846e8STim Northover // operand that specifies the NZCV flag values when the condition is false and
1013b0846e8STim Northover // the compare isn't executed. This makes it possible to chain compares with
1023b0846e8STim Northover // different condition codes.
1033b0846e8STim Northover //
1043b0846e8STim Northover // Example:
1053b0846e8STim Northover //
1063b0846e8STim Northover //    if (a == 5 || b == 17)
1073b0846e8STim Northover //      foo();
1083b0846e8STim Northover //
1093b0846e8STim Northover //    Head:
1103b0846e8STim Northover //       cmp  w0, #5
1113b0846e8STim Northover //       b.eq Tail
1123b0846e8STim Northover //    CmpBB:
1133b0846e8STim Northover //       cmp  w1, #17
1143b0846e8STim Northover //       b.eq Tail
1153b0846e8STim Northover //    ...
1163b0846e8STim Northover //    Tail:
1173b0846e8STim Northover //      bl _foo
1183b0846e8STim Northover //
1193b0846e8STim Northover //  Becomes:
1203b0846e8STim Northover //
1213b0846e8STim Northover //    Head:
1223b0846e8STim Northover //       cmp  w0, #5
1233b0846e8STim Northover //       ccmp w1, #17, 4, ne  ; 4 = nZcv
1243b0846e8STim Northover //       b.eq Tail
1253b0846e8STim Northover //    ...
1263b0846e8STim Northover //    Tail:
1273b0846e8STim Northover //      bl _foo
1283b0846e8STim Northover //
1293b0846e8STim Northover // The ccmp condition code is the one that would cause the Head terminator to
1303b0846e8STim Northover // branch to CmpBB.
1313b0846e8STim Northover //
1323b0846e8STim Northover // FIXME: It should also be possible to speculate a block on the critical edge
1333b0846e8STim Northover // between Head and Tail, just like if-converting a diamond.
1343b0846e8STim Northover //
1353b0846e8STim Northover // FIXME: Handle PHIs in Tail by turning them into selects (if-conversion).
1363b0846e8STim Northover 
1373b0846e8STim Northover namespace {
1383b0846e8STim Northover class SSACCmpConv {
1393b0846e8STim Northover   MachineFunction *MF;
1403b0846e8STim Northover   const TargetInstrInfo *TII;
1413b0846e8STim Northover   const TargetRegisterInfo *TRI;
1423b0846e8STim Northover   MachineRegisterInfo *MRI;
1430bd79f41SMatthew Simpson   const MachineBranchProbabilityInfo *MBPI;
1443b0846e8STim Northover 
1453b0846e8STim Northover public:
1463b0846e8STim Northover   /// The first block containing a conditional branch, dominating everything
1473b0846e8STim Northover   /// else.
1483b0846e8STim Northover   MachineBasicBlock *Head;
1493b0846e8STim Northover 
1503b0846e8STim Northover   /// The block containing cmp+br.cond with a successor shared with Head.
1513b0846e8STim Northover   MachineBasicBlock *CmpBB;
1523b0846e8STim Northover 
1533b0846e8STim Northover   /// The common successor for Head and CmpBB.
1543b0846e8STim Northover   MachineBasicBlock *Tail;
1553b0846e8STim Northover 
1563b0846e8STim Northover   /// The compare instruction in CmpBB that can be converted to a ccmp.
1573b0846e8STim Northover   MachineInstr *CmpMI;
1583b0846e8STim Northover 
1593b0846e8STim Northover private:
160020041d9SKrzysztof Parzyszek   /// The branch condition in Head as determined by analyzeBranch.
1613b0846e8STim Northover   SmallVector<MachineOperand, 4> HeadCond;
1623b0846e8STim Northover 
1633b0846e8STim Northover   /// The condition code that makes Head branch to CmpBB.
1643b0846e8STim Northover   AArch64CC::CondCode HeadCmpBBCC;
1653b0846e8STim Northover 
1663b0846e8STim Northover   /// The branch condition in CmpBB.
1673b0846e8STim Northover   SmallVector<MachineOperand, 4> CmpBBCond;
1683b0846e8STim Northover 
1693b0846e8STim Northover   /// The condition code that makes CmpBB branch to Tail.
1703b0846e8STim Northover   AArch64CC::CondCode CmpBBTailCC;
1713b0846e8STim Northover 
1723b0846e8STim Northover   /// Check if the Tail PHIs are trivially convertible.
1733b0846e8STim Northover   bool trivialTailPHIs();
1743b0846e8STim Northover 
1753b0846e8STim Northover   /// Remove CmpBB from the Tail PHIs.
1763b0846e8STim Northover   void updateTailPHIs();
1773b0846e8STim Northover 
1783b0846e8STim Northover   /// Check if an operand defining DstReg is dead.
1793b0846e8STim Northover   bool isDeadDef(unsigned DstReg);
1803b0846e8STim Northover 
1813b0846e8STim Northover   /// Find the compare instruction in MBB that controls the conditional branch.
1823b0846e8STim Northover   /// Return NULL if a convertible instruction can't be found.
1833b0846e8STim Northover   MachineInstr *findConvertibleCompare(MachineBasicBlock *MBB);
1843b0846e8STim Northover 
1853b0846e8STim Northover   /// Return true if all non-terminator instructions in MBB can be safely
1863b0846e8STim Northover   /// speculated.
1873b0846e8STim Northover   bool canSpeculateInstrs(MachineBasicBlock *MBB, const MachineInstr *CmpMI);
1883b0846e8STim Northover 
1893b0846e8STim Northover public:
1903b0846e8STim Northover   /// runOnMachineFunction - Initialize per-function data structures.
runOnMachineFunction(MachineFunction & MF,const MachineBranchProbabilityInfo * MBPI)1910bd79f41SMatthew Simpson   void runOnMachineFunction(MachineFunction &MF,
1920bd79f41SMatthew Simpson                             const MachineBranchProbabilityInfo *MBPI) {
1933b0846e8STim Northover     this->MF = &MF;
1940bd79f41SMatthew Simpson     this->MBPI = MBPI;
195fc6de428SEric Christopher     TII = MF.getSubtarget().getInstrInfo();
196fc6de428SEric Christopher     TRI = MF.getSubtarget().getRegisterInfo();
1973b0846e8STim Northover     MRI = &MF.getRegInfo();
1983b0846e8STim Northover   }
1993b0846e8STim Northover 
2003b0846e8STim Northover   /// If the sub-CFG headed by MBB can be cmp-converted, initialize the
2013b0846e8STim Northover   /// internal state, and return true.
2023b0846e8STim Northover   bool canConvert(MachineBasicBlock *MBB);
2033b0846e8STim Northover 
2043b0846e8STim Northover   /// Cmo-convert the last block passed to canConvertCmp(), assuming
2053b0846e8STim Northover   /// it is possible. Add any erased blocks to RemovedBlocks.
2063b0846e8STim Northover   void convert(SmallVectorImpl<MachineBasicBlock *> &RemovedBlocks);
2073b0846e8STim Northover 
2083b0846e8STim Northover   /// Return the expected code size delta if the conversion into a
2093b0846e8STim Northover   /// conditional compare is performed.
2103b0846e8STim Northover   int expectedCodeSizeDelta() const;
2113b0846e8STim Northover };
2123b0846e8STim Northover } // end anonymous namespace
2133b0846e8STim Northover 
2143b0846e8STim Northover // Check that all PHIs in Tail are selecting the same value from Head and CmpBB.
2153b0846e8STim Northover // This means that no if-conversion is required when merging CmpBB into Head.
trivialTailPHIs()2163b0846e8STim Northover bool SSACCmpConv::trivialTailPHIs() {
2173b0846e8STim Northover   for (auto &I : *Tail) {
2183b0846e8STim Northover     if (!I.isPHI())
2193b0846e8STim Northover       break;
2203b0846e8STim Northover     unsigned HeadReg = 0, CmpBBReg = 0;
2213b0846e8STim Northover     // PHI operands come in (VReg, MBB) pairs.
2223b0846e8STim Northover     for (unsigned oi = 1, oe = I.getNumOperands(); oi != oe; oi += 2) {
2233b0846e8STim Northover       MachineBasicBlock *MBB = I.getOperand(oi + 1).getMBB();
2245ae66e56SDaniel Sanders       Register Reg = I.getOperand(oi).getReg();
2253b0846e8STim Northover       if (MBB == Head) {
2263b0846e8STim Northover         assert((!HeadReg || HeadReg == Reg) && "Inconsistent PHI operands");
2273b0846e8STim Northover         HeadReg = Reg;
2283b0846e8STim Northover       }
2293b0846e8STim Northover       if (MBB == CmpBB) {
2303b0846e8STim Northover         assert((!CmpBBReg || CmpBBReg == Reg) && "Inconsistent PHI operands");
2313b0846e8STim Northover         CmpBBReg = Reg;
2323b0846e8STim Northover       }
2333b0846e8STim Northover     }
2343b0846e8STim Northover     if (HeadReg != CmpBBReg)
2353b0846e8STim Northover       return false;
2363b0846e8STim Northover   }
2373b0846e8STim Northover   return true;
2383b0846e8STim Northover }
2393b0846e8STim Northover 
2403b0846e8STim Northover // Assuming that trivialTailPHIs() is true, update the Tail PHIs by simply
2413b0846e8STim Northover // removing the CmpBB operands. The Head operands will be identical.
updateTailPHIs()2423b0846e8STim Northover void SSACCmpConv::updateTailPHIs() {
2433b0846e8STim Northover   for (auto &I : *Tail) {
2443b0846e8STim Northover     if (!I.isPHI())
2453b0846e8STim Northover       break;
2463b0846e8STim Northover     // I is a PHI. It can have multiple entries for CmpBB.
2473b0846e8STim Northover     for (unsigned oi = I.getNumOperands(); oi > 2; oi -= 2) {
2483b0846e8STim Northover       // PHI operands are (Reg, MBB) at (oi-2, oi-1).
2493b0846e8STim Northover       if (I.getOperand(oi - 1).getMBB() == CmpBB) {
250*37b37838SShengchen Kan         I.removeOperand(oi - 1);
251*37b37838SShengchen Kan         I.removeOperand(oi - 2);
2523b0846e8STim Northover       }
2533b0846e8STim Northover     }
2543b0846e8STim Northover   }
2553b0846e8STim Northover }
2563b0846e8STim Northover 
2573b0846e8STim Northover // This pass runs before the AArch64DeadRegisterDefinitions pass, so compares
2583b0846e8STim Northover // are still writing virtual registers without any uses.
isDeadDef(unsigned DstReg)2593b0846e8STim Northover bool SSACCmpConv::isDeadDef(unsigned DstReg) {
2603b0846e8STim Northover   // Writes to the zero register are dead.
2613b0846e8STim Northover   if (DstReg == AArch64::WZR || DstReg == AArch64::XZR)
2623b0846e8STim Northover     return true;
2632bea69bfSDaniel Sanders   if (!Register::isVirtualRegister(DstReg))
2643b0846e8STim Northover     return false;
2653b0846e8STim Northover   // A virtual register def without any uses will be marked dead later, and
2663b0846e8STim Northover   // eventually replaced by the zero register.
2673b0846e8STim Northover   return MRI->use_nodbg_empty(DstReg);
2683b0846e8STim Northover }
2693b0846e8STim Northover 
270020041d9SKrzysztof Parzyszek // Parse a condition code returned by analyzeBranch, and compute the CondCode
2713b0846e8STim Northover // corresponding to TBB.
2723b0846e8STim Northover // Return
parseCond(ArrayRef<MachineOperand> Cond,AArch64CC::CondCode & CC)2733b0846e8STim Northover static bool parseCond(ArrayRef<MachineOperand> Cond, AArch64CC::CondCode &CC) {
2743b0846e8STim Northover   // A normal br.cond simply has the condition code.
2753b0846e8STim Northover   if (Cond[0].getImm() != -1) {
2763b0846e8STim Northover     assert(Cond.size() == 1 && "Unknown Cond array format");
2773b0846e8STim Northover     CC = (AArch64CC::CondCode)(int)Cond[0].getImm();
2783b0846e8STim Northover     return true;
2793b0846e8STim Northover   }
2803b0846e8STim Northover   // For tbz and cbz instruction, the opcode is next.
2813b0846e8STim Northover   switch (Cond[1].getImm()) {
2823b0846e8STim Northover   default:
2833b0846e8STim Northover     // This includes tbz / tbnz branches which can't be converted to
2843b0846e8STim Northover     // ccmp + br.cond.
2853b0846e8STim Northover     return false;
2863b0846e8STim Northover   case AArch64::CBZW:
2873b0846e8STim Northover   case AArch64::CBZX:
2883b0846e8STim Northover     assert(Cond.size() == 3 && "Unknown Cond array format");
2893b0846e8STim Northover     CC = AArch64CC::EQ;
2903b0846e8STim Northover     return true;
2913b0846e8STim Northover   case AArch64::CBNZW:
2923b0846e8STim Northover   case AArch64::CBNZX:
2933b0846e8STim Northover     assert(Cond.size() == 3 && "Unknown Cond array format");
2943b0846e8STim Northover     CC = AArch64CC::NE;
2953b0846e8STim Northover     return true;
2963b0846e8STim Northover   }
2973b0846e8STim Northover }
2983b0846e8STim Northover 
findConvertibleCompare(MachineBasicBlock * MBB)2993b0846e8STim Northover MachineInstr *SSACCmpConv::findConvertibleCompare(MachineBasicBlock *MBB) {
3003b0846e8STim Northover   MachineBasicBlock::iterator I = MBB->getFirstTerminator();
3013b0846e8STim Northover   if (I == MBB->end())
3023b0846e8STim Northover     return nullptr;
3033b0846e8STim Northover   // The terminator must be controlled by the flags.
3043b0846e8STim Northover   if (!I->readsRegister(AArch64::NZCV)) {
3053b0846e8STim Northover     switch (I->getOpcode()) {
3063b0846e8STim Northover     case AArch64::CBZW:
3073b0846e8STim Northover     case AArch64::CBZX:
3083b0846e8STim Northover     case AArch64::CBNZW:
3093b0846e8STim Northover     case AArch64::CBNZX:
3103b0846e8STim Northover       // These can be converted into a ccmp against #0.
311ab53fd9bSDuncan P. N. Exon Smith       return &*I;
3123b0846e8STim Northover     }
3133b0846e8STim Northover     ++NumCmpTermRejs;
314d34e60caSNicola Zaghen     LLVM_DEBUG(dbgs() << "Flags not used by terminator: " << *I);
3153b0846e8STim Northover     return nullptr;
3163b0846e8STim Northover   }
3173b0846e8STim Northover 
3183b0846e8STim Northover   // Now find the instruction controlling the terminator.
3193b0846e8STim Northover   for (MachineBasicBlock::iterator B = MBB->begin(); I != B;) {
320b157974aSVedant Kumar     I = prev_nodbg(I, MBB->begin());
3213b0846e8STim Northover     assert(!I->isTerminator() && "Spurious terminator");
3223b0846e8STim Northover     switch (I->getOpcode()) {
3233b0846e8STim Northover     // cmp is an alias for subs with a dead destination register.
3243b0846e8STim Northover     case AArch64::SUBSWri:
3253b0846e8STim Northover     case AArch64::SUBSXri:
3263b0846e8STim Northover     // cmn is an alias for adds with a dead destination register.
3273b0846e8STim Northover     case AArch64::ADDSWri:
3283b0846e8STim Northover     case AArch64::ADDSXri:
3293b0846e8STim Northover       // Check that the immediate operand is within range, ccmp wants a uimm5.
3303b0846e8STim Northover       // Rd = SUBSri Rn, imm, shift
3313b0846e8STim Northover       if (I->getOperand(3).getImm() || !isUInt<5>(I->getOperand(2).getImm())) {
332d34e60caSNicola Zaghen         LLVM_DEBUG(dbgs() << "Immediate out of range for ccmp: " << *I);
3333b0846e8STim Northover         ++NumImmRangeRejs;
3343b0846e8STim Northover         return nullptr;
3353b0846e8STim Northover       }
336cd1d5aafSJustin Bogner       LLVM_FALLTHROUGH;
3373b0846e8STim Northover     case AArch64::SUBSWrr:
3383b0846e8STim Northover     case AArch64::SUBSXrr:
3393b0846e8STim Northover     case AArch64::ADDSWrr:
3403b0846e8STim Northover     case AArch64::ADDSXrr:
3413b0846e8STim Northover       if (isDeadDef(I->getOperand(0).getReg()))
342ab53fd9bSDuncan P. N. Exon Smith         return &*I;
343d34e60caSNicola Zaghen       LLVM_DEBUG(dbgs() << "Can't convert compare with live destination: "
344d34e60caSNicola Zaghen                         << *I);
3453b0846e8STim Northover       ++NumLiveDstRejs;
3463b0846e8STim Northover       return nullptr;
3473b0846e8STim Northover     case AArch64::FCMPSrr:
3483b0846e8STim Northover     case AArch64::FCMPDrr:
3493b0846e8STim Northover     case AArch64::FCMPESrr:
3503b0846e8STim Northover     case AArch64::FCMPEDrr:
351ab53fd9bSDuncan P. N. Exon Smith       return &*I;
3523b0846e8STim Northover     }
3533b0846e8STim Northover 
3543b0846e8STim Northover     // Check for flag reads and clobbers.
3555154b025SFlorian Hahn     PhysRegInfo PRI = AnalyzePhysRegInBundle(*I, AArch64::NZCV, TRI);
3563b0846e8STim Northover 
35760d69e28SMatthias Braun     if (PRI.Read) {
3583b0846e8STim Northover       // The ccmp doesn't produce exactly the same flags as the original
3593b0846e8STim Northover       // compare, so reject the transform if there are uses of the flags
3603b0846e8STim Northover       // besides the terminators.
361d34e60caSNicola Zaghen       LLVM_DEBUG(dbgs() << "Can't create ccmp with multiple uses: " << *I);
3623b0846e8STim Northover       ++NumMultNZCVUses;
3633b0846e8STim Northover       return nullptr;
3643b0846e8STim Northover     }
3653b0846e8STim Northover 
36660d69e28SMatthias Braun     if (PRI.Defined || PRI.Clobbered) {
367d34e60caSNicola Zaghen       LLVM_DEBUG(dbgs() << "Not convertible compare: " << *I);
3683b0846e8STim Northover       ++NumUnknNZCVDefs;
3693b0846e8STim Northover       return nullptr;
3703b0846e8STim Northover     }
3713b0846e8STim Northover   }
372d34e60caSNicola Zaghen   LLVM_DEBUG(dbgs() << "Flags not defined in " << printMBBReference(*MBB)
373d34e60caSNicola Zaghen                     << '\n');
3743b0846e8STim Northover   return nullptr;
3753b0846e8STim Northover }
3763b0846e8STim Northover 
3773b0846e8STim Northover /// Determine if all the instructions in MBB can safely
3783b0846e8STim Northover /// be speculated. The terminators are not considered.
3793b0846e8STim Northover ///
3803b0846e8STim Northover /// Only CmpMI is allowed to clobber the flags.
3813b0846e8STim Northover ///
canSpeculateInstrs(MachineBasicBlock * MBB,const MachineInstr * CmpMI)3823b0846e8STim Northover bool SSACCmpConv::canSpeculateInstrs(MachineBasicBlock *MBB,
3833b0846e8STim Northover                                      const MachineInstr *CmpMI) {
3843b0846e8STim Northover   // Reject any live-in physregs. It's probably NZCV/EFLAGS, and very hard to
3853b0846e8STim Northover   // get right.
3863b0846e8STim Northover   if (!MBB->livein_empty()) {
387d34e60caSNicola Zaghen     LLVM_DEBUG(dbgs() << printMBBReference(*MBB) << " has live-ins.\n");
3883b0846e8STim Northover     return false;
3893b0846e8STim Northover   }
3903b0846e8STim Northover 
3913b0846e8STim Northover   unsigned InstrCount = 0;
3923b0846e8STim Northover 
3933b0846e8STim Northover   // Check all instructions, except the terminators. It is assumed that
3943b0846e8STim Northover   // terminators never have side effects or define any used register values.
3953b0846e8STim Northover   for (auto &I : make_range(MBB->begin(), MBB->getFirstTerminator())) {
396801bf7ebSShiva Chen     if (I.isDebugInstr())
3973b0846e8STim Northover       continue;
3983b0846e8STim Northover 
3993b0846e8STim Northover     if (++InstrCount > BlockInstrLimit && !Stress) {
400d34e60caSNicola Zaghen       LLVM_DEBUG(dbgs() << printMBBReference(*MBB) << " has more than "
4013b0846e8STim Northover                         << BlockInstrLimit << " instructions.\n");
4023b0846e8STim Northover       return false;
4033b0846e8STim Northover     }
4043b0846e8STim Northover 
4053b0846e8STim Northover     // There shouldn't normally be any phis in a single-predecessor block.
4063b0846e8STim Northover     if (I.isPHI()) {
407d34e60caSNicola Zaghen       LLVM_DEBUG(dbgs() << "Can't hoist: " << I);
4083b0846e8STim Northover       return false;
4093b0846e8STim Northover     }
4103b0846e8STim Northover 
4113b0846e8STim Northover     // Don't speculate loads. Note that it may be possible and desirable to
4123b0846e8STim Northover     // speculate GOT or constant pool loads that are guaranteed not to trap,
4133b0846e8STim Northover     // but we don't support that for now.
4143b0846e8STim Northover     if (I.mayLoad()) {
415d34e60caSNicola Zaghen       LLVM_DEBUG(dbgs() << "Won't speculate load: " << I);
4163b0846e8STim Northover       return false;
4173b0846e8STim Northover     }
4183b0846e8STim Northover 
4193b0846e8STim Northover     // We never speculate stores, so an AA pointer isn't necessary.
4203b0846e8STim Northover     bool DontMoveAcrossStore = true;
42107066ccaSMatthias Braun     if (!I.isSafeToMove(nullptr, DontMoveAcrossStore)) {
422d34e60caSNicola Zaghen       LLVM_DEBUG(dbgs() << "Can't speculate: " << I);
4233b0846e8STim Northover       return false;
4243b0846e8STim Northover     }
4253b0846e8STim Northover 
4263b0846e8STim Northover     // Only CmpMI is allowed to clobber the flags.
4273b0846e8STim Northover     if (&I != CmpMI && I.modifiesRegister(AArch64::NZCV, TRI)) {
428d34e60caSNicola Zaghen       LLVM_DEBUG(dbgs() << "Clobbers flags: " << I);
4293b0846e8STim Northover       return false;
4303b0846e8STim Northover     }
4313b0846e8STim Northover   }
4323b0846e8STim Northover   return true;
4333b0846e8STim Northover }
4343b0846e8STim Northover 
4353b0846e8STim Northover /// Analyze the sub-cfg rooted in MBB, and return true if it is a potential
4363b0846e8STim Northover /// candidate for cmp-conversion. Fill out the internal state.
4373b0846e8STim Northover ///
canConvert(MachineBasicBlock * MBB)4383b0846e8STim Northover bool SSACCmpConv::canConvert(MachineBasicBlock *MBB) {
4393b0846e8STim Northover   Head = MBB;
4403b0846e8STim Northover   Tail = CmpBB = nullptr;
4413b0846e8STim Northover 
4423b0846e8STim Northover   if (Head->succ_size() != 2)
4433b0846e8STim Northover     return false;
4443b0846e8STim Northover   MachineBasicBlock *Succ0 = Head->succ_begin()[0];
4453b0846e8STim Northover   MachineBasicBlock *Succ1 = Head->succ_begin()[1];
4463b0846e8STim Northover 
4473b0846e8STim Northover   // CmpBB can only have a single predecessor. Tail is allowed many.
4483b0846e8STim Northover   if (Succ0->pred_size() != 1)
4493b0846e8STim Northover     std::swap(Succ0, Succ1);
4503b0846e8STim Northover 
4513b0846e8STim Northover   // Succ0 is our candidate for CmpBB.
4523b0846e8STim Northover   if (Succ0->pred_size() != 1 || Succ0->succ_size() != 2)
4533b0846e8STim Northover     return false;
4543b0846e8STim Northover 
4553b0846e8STim Northover   CmpBB = Succ0;
4563b0846e8STim Northover   Tail = Succ1;
4573b0846e8STim Northover 
4583b0846e8STim Northover   if (!CmpBB->isSuccessor(Tail))
4593b0846e8STim Northover     return false;
4603b0846e8STim Northover 
4613b0846e8STim Northover   // The CFG topology checks out.
462d34e60caSNicola Zaghen   LLVM_DEBUG(dbgs() << "\nTriangle: " << printMBBReference(*Head) << " -> "
46325528d6dSFrancis Visoiu Mistrih                     << printMBBReference(*CmpBB) << " -> "
46425528d6dSFrancis Visoiu Mistrih                     << printMBBReference(*Tail) << '\n');
4653b0846e8STim Northover   ++NumConsidered;
4663b0846e8STim Northover 
4673b0846e8STim Northover   // Tail is allowed to have many predecessors, but we can't handle PHIs yet.
4683b0846e8STim Northover   //
4693b0846e8STim Northover   // FIXME: Real PHIs could be if-converted as long as the CmpBB values are
4703b0846e8STim Northover   // defined before The CmpBB cmp clobbers the flags. Alternatively, it should
4713b0846e8STim Northover   // always be safe to sink the ccmp down to immediately before the CmpBB
4723b0846e8STim Northover   // terminators.
4733b0846e8STim Northover   if (!trivialTailPHIs()) {
474d34e60caSNicola Zaghen     LLVM_DEBUG(dbgs() << "Can't handle phis in Tail.\n");
4753b0846e8STim Northover     ++NumPhiRejs;
4763b0846e8STim Northover     return false;
4773b0846e8STim Northover   }
4783b0846e8STim Northover 
4793b0846e8STim Northover   if (!Tail->livein_empty()) {
480d34e60caSNicola Zaghen     LLVM_DEBUG(dbgs() << "Can't handle live-in physregs in Tail.\n");
4813b0846e8STim Northover     ++NumPhysRejs;
4823b0846e8STim Northover     return false;
4833b0846e8STim Northover   }
4843b0846e8STim Northover 
4853b0846e8STim Northover   // CmpBB should never have PHIs since Head is its only predecessor.
4863b0846e8STim Northover   // FIXME: Clean them up if it happens.
4873b0846e8STim Northover   if (!CmpBB->empty() && CmpBB->front().isPHI()) {
488d34e60caSNicola Zaghen     LLVM_DEBUG(dbgs() << "Can't handle phis in CmpBB.\n");
4893b0846e8STim Northover     ++NumPhi2Rejs;
4903b0846e8STim Northover     return false;
4913b0846e8STim Northover   }
4923b0846e8STim Northover 
4933b0846e8STim Northover   if (!CmpBB->livein_empty()) {
494d34e60caSNicola Zaghen     LLVM_DEBUG(dbgs() << "Can't handle live-in physregs in CmpBB.\n");
4953b0846e8STim Northover     ++NumPhysRejs;
4963b0846e8STim Northover     return false;
4973b0846e8STim Northover   }
4983b0846e8STim Northover 
4993b0846e8STim Northover   // The branch we're looking to eliminate must be analyzable.
5003b0846e8STim Northover   HeadCond.clear();
5013b0846e8STim Northover   MachineBasicBlock *TBB = nullptr, *FBB = nullptr;
50271c30a14SJacques Pienaar   if (TII->analyzeBranch(*Head, TBB, FBB, HeadCond)) {
503d34e60caSNicola Zaghen     LLVM_DEBUG(dbgs() << "Head branch not analyzable.\n");
5043b0846e8STim Northover     ++NumHeadBranchRejs;
5053b0846e8STim Northover     return false;
5063b0846e8STim Northover   }
5073b0846e8STim Northover 
5083b0846e8STim Northover   // This is weird, probably some sort of degenerate CFG, or an edge to a
5093b0846e8STim Northover   // landing pad.
5103b0846e8STim Northover   if (!TBB || HeadCond.empty()) {
511d34e60caSNicola Zaghen     LLVM_DEBUG(
512020041d9SKrzysztof Parzyszek         dbgs() << "analyzeBranch didn't find conditional branch in Head.\n");
5133b0846e8STim Northover     ++NumHeadBranchRejs;
5143b0846e8STim Northover     return false;
5153b0846e8STim Northover   }
5163b0846e8STim Northover 
5173b0846e8STim Northover   if (!parseCond(HeadCond, HeadCmpBBCC)) {
518d34e60caSNicola Zaghen     LLVM_DEBUG(dbgs() << "Unsupported branch type on Head\n");
5193b0846e8STim Northover     ++NumHeadBranchRejs;
5203b0846e8STim Northover     return false;
5213b0846e8STim Northover   }
5223b0846e8STim Northover 
5233b0846e8STim Northover   // Make sure the branch direction is right.
5243b0846e8STim Northover   if (TBB != CmpBB) {
5253b0846e8STim Northover     assert(TBB == Tail && "Unexpected TBB");
5263b0846e8STim Northover     HeadCmpBBCC = AArch64CC::getInvertedCondCode(HeadCmpBBCC);
5273b0846e8STim Northover   }
5283b0846e8STim Northover 
5293b0846e8STim Northover   CmpBBCond.clear();
5303b0846e8STim Northover   TBB = FBB = nullptr;
53171c30a14SJacques Pienaar   if (TII->analyzeBranch(*CmpBB, TBB, FBB, CmpBBCond)) {
532d34e60caSNicola Zaghen     LLVM_DEBUG(dbgs() << "CmpBB branch not analyzable.\n");
5333b0846e8STim Northover     ++NumCmpBranchRejs;
5343b0846e8STim Northover     return false;
5353b0846e8STim Northover   }
5363b0846e8STim Northover 
5373b0846e8STim Northover   if (!TBB || CmpBBCond.empty()) {
538d34e60caSNicola Zaghen     LLVM_DEBUG(
539020041d9SKrzysztof Parzyszek         dbgs() << "analyzeBranch didn't find conditional branch in CmpBB.\n");
5403b0846e8STim Northover     ++NumCmpBranchRejs;
5413b0846e8STim Northover     return false;
5423b0846e8STim Northover   }
5433b0846e8STim Northover 
5443b0846e8STim Northover   if (!parseCond(CmpBBCond, CmpBBTailCC)) {
545d34e60caSNicola Zaghen     LLVM_DEBUG(dbgs() << "Unsupported branch type on CmpBB\n");
5463b0846e8STim Northover     ++NumCmpBranchRejs;
5473b0846e8STim Northover     return false;
5483b0846e8STim Northover   }
5493b0846e8STim Northover 
5503b0846e8STim Northover   if (TBB != Tail)
5513b0846e8STim Northover     CmpBBTailCC = AArch64CC::getInvertedCondCode(CmpBBTailCC);
5523b0846e8STim Northover 
553d34e60caSNicola Zaghen   LLVM_DEBUG(dbgs() << "Head->CmpBB on "
554d34e60caSNicola Zaghen                     << AArch64CC::getCondCodeName(HeadCmpBBCC)
555d34e60caSNicola Zaghen                     << ", CmpBB->Tail on "
556d34e60caSNicola Zaghen                     << AArch64CC::getCondCodeName(CmpBBTailCC) << '\n');
5573b0846e8STim Northover 
5583b0846e8STim Northover   CmpMI = findConvertibleCompare(CmpBB);
5593b0846e8STim Northover   if (!CmpMI)
5603b0846e8STim Northover     return false;
5613b0846e8STim Northover 
5623b0846e8STim Northover   if (!canSpeculateInstrs(CmpBB, CmpMI)) {
5633b0846e8STim Northover     ++NumSpeculateRejs;
5643b0846e8STim Northover     return false;
5653b0846e8STim Northover   }
5663b0846e8STim Northover   return true;
5673b0846e8STim Northover }
5683b0846e8STim Northover 
convert(SmallVectorImpl<MachineBasicBlock * > & RemovedBlocks)5693b0846e8STim Northover void SSACCmpConv::convert(SmallVectorImpl<MachineBasicBlock *> &RemovedBlocks) {
570d34e60caSNicola Zaghen   LLVM_DEBUG(dbgs() << "Merging " << printMBBReference(*CmpBB) << " into "
57125528d6dSFrancis Visoiu Mistrih                     << printMBBReference(*Head) << ":\n"
57225528d6dSFrancis Visoiu Mistrih                     << *CmpBB);
5733b0846e8STim Northover 
5743b0846e8STim Northover   // All CmpBB instructions are moved into Head, and CmpBB is deleted.
5753b0846e8STim Northover   // Update the CFG first.
5763b0846e8STim Northover   updateTailPHIs();
5770bd79f41SMatthew Simpson 
5780bd79f41SMatthew Simpson   // Save successor probabilties before removing CmpBB and Tail from their
5790bd79f41SMatthew Simpson   // parents.
5800bd79f41SMatthew Simpson   BranchProbability Head2CmpBB = MBPI->getEdgeProbability(Head, CmpBB);
5810bd79f41SMatthew Simpson   BranchProbability CmpBB2Tail = MBPI->getEdgeProbability(CmpBB, Tail);
5820bd79f41SMatthew Simpson 
5830bd79f41SMatthew Simpson   Head->removeSuccessor(CmpBB);
5840bd79f41SMatthew Simpson   CmpBB->removeSuccessor(Tail);
5850bd79f41SMatthew Simpson 
5860bd79f41SMatthew Simpson   // If Head and CmpBB had successor probabilties, udpate the probabilities to
5870bd79f41SMatthew Simpson   // reflect the ccmp-conversion.
5880bd79f41SMatthew Simpson   if (Head->hasSuccessorProbabilities() && CmpBB->hasSuccessorProbabilities()) {
5890bd79f41SMatthew Simpson 
5900bd79f41SMatthew Simpson     // Head is allowed two successors. We've removed CmpBB, so the remaining
5910bd79f41SMatthew Simpson     // successor is Tail. We need to increase the successor probability for
5920bd79f41SMatthew Simpson     // Tail to account for the CmpBB path we removed.
5930bd79f41SMatthew Simpson     //
5940bd79f41SMatthew Simpson     // Pr(Tail|Head) += Pr(CmpBB|Head) * Pr(Tail|CmpBB).
5950bd79f41SMatthew Simpson     assert(*Head->succ_begin() == Tail && "Head successor is not Tail");
5960bd79f41SMatthew Simpson     BranchProbability Head2Tail = MBPI->getEdgeProbability(Head, Tail);
5970bd79f41SMatthew Simpson     Head->setSuccProbability(Head->succ_begin(),
5980bd79f41SMatthew Simpson                              Head2Tail + Head2CmpBB * CmpBB2Tail);
5990bd79f41SMatthew Simpson 
6000bd79f41SMatthew Simpson     // We will transfer successors of CmpBB to Head in a moment without
6010bd79f41SMatthew Simpson     // normalizing the successor probabilities. Set the successor probabilites
6020bd79f41SMatthew Simpson     // before doing so.
6030bd79f41SMatthew Simpson     //
6040bd79f41SMatthew Simpson     // Pr(I|Head) = Pr(CmpBB|Head) * Pr(I|CmpBB).
6050bd79f41SMatthew Simpson     for (auto I = CmpBB->succ_begin(), E = CmpBB->succ_end(); I != E; ++I) {
6060bd79f41SMatthew Simpson       BranchProbability CmpBB2I = MBPI->getEdgeProbability(CmpBB, *I);
6070bd79f41SMatthew Simpson       CmpBB->setSuccProbability(I, Head2CmpBB * CmpBB2I);
6080bd79f41SMatthew Simpson     }
6090bd79f41SMatthew Simpson   }
6100bd79f41SMatthew Simpson 
6113b0846e8STim Northover   Head->transferSuccessorsAndUpdatePHIs(CmpBB);
6123b0846e8STim Northover   DebugLoc TermDL = Head->getFirstTerminator()->getDebugLoc();
6131b9fc8edSMatt Arsenault   TII->removeBranch(*Head);
6143b0846e8STim Northover 
6153b0846e8STim Northover   // If the Head terminator was one of the cbz / tbz branches with built-in
6163b0846e8STim Northover   // compare, we need to insert an explicit compare instruction in its place.
6173b0846e8STim Northover   if (HeadCond[0].getImm() == -1) {
6183b0846e8STim Northover     ++NumCompBranches;
6193b0846e8STim Northover     unsigned Opc = 0;
6203b0846e8STim Northover     switch (HeadCond[1].getImm()) {
6213b0846e8STim Northover     case AArch64::CBZW:
6223b0846e8STim Northover     case AArch64::CBNZW:
6233b0846e8STim Northover       Opc = AArch64::SUBSWri;
6243b0846e8STim Northover       break;
6253b0846e8STim Northover     case AArch64::CBZX:
6263b0846e8STim Northover     case AArch64::CBNZX:
6273b0846e8STim Northover       Opc = AArch64::SUBSXri;
6283b0846e8STim Northover       break;
6293b0846e8STim Northover     default:
6303b0846e8STim Northover       llvm_unreachable("Cannot convert Head branch");
6313b0846e8STim Northover     }
6323b0846e8STim Northover     const MCInstrDesc &MCID = TII->get(Opc);
6333b0846e8STim Northover     // Create a dummy virtual register for the SUBS def.
6345ae66e56SDaniel Sanders     Register DestReg =
6353b0846e8STim Northover         MRI->createVirtualRegister(TII->getRegClass(MCID, 0, TRI, *MF));
6363b0846e8STim Northover     // Insert a SUBS Rn, #0 instruction instead of the cbz / cbnz.
6373b0846e8STim Northover     BuildMI(*Head, Head->end(), TermDL, MCID)
6383b0846e8STim Northover         .addReg(DestReg, RegState::Define | RegState::Dead)
639116bbab4SDiana Picus         .add(HeadCond[2])
6403b0846e8STim Northover         .addImm(0)
6413b0846e8STim Northover         .addImm(0);
6423b0846e8STim Northover     // SUBS uses the GPR*sp register classes.
6433b0846e8STim Northover     MRI->constrainRegClass(HeadCond[2].getReg(),
6443b0846e8STim Northover                            TII->getRegClass(MCID, 1, TRI, *MF));
6453b0846e8STim Northover   }
6463b0846e8STim Northover 
6473b0846e8STim Northover   Head->splice(Head->end(), CmpBB, CmpBB->begin(), CmpBB->end());
6483b0846e8STim Northover 
6493b0846e8STim Northover   // Now replace CmpMI with a ccmp instruction that also considers the incoming
6503b0846e8STim Northover   // flags.
6513b0846e8STim Northover   unsigned Opc = 0;
6523b0846e8STim Northover   unsigned FirstOp = 1;   // First CmpMI operand to copy.
6533b0846e8STim Northover   bool isZBranch = false; // CmpMI is a cbz/cbnz instruction.
6543b0846e8STim Northover   switch (CmpMI->getOpcode()) {
6553b0846e8STim Northover   default:
6563b0846e8STim Northover     llvm_unreachable("Unknown compare opcode");
6573b0846e8STim Northover   case AArch64::SUBSWri:    Opc = AArch64::CCMPWi; break;
6583b0846e8STim Northover   case AArch64::SUBSWrr:    Opc = AArch64::CCMPWr; break;
6593b0846e8STim Northover   case AArch64::SUBSXri:    Opc = AArch64::CCMPXi; break;
6603b0846e8STim Northover   case AArch64::SUBSXrr:    Opc = AArch64::CCMPXr; break;
6613b0846e8STim Northover   case AArch64::ADDSWri:    Opc = AArch64::CCMNWi; break;
6623b0846e8STim Northover   case AArch64::ADDSWrr:    Opc = AArch64::CCMNWr; break;
6633b0846e8STim Northover   case AArch64::ADDSXri:    Opc = AArch64::CCMNXi; break;
6643b0846e8STim Northover   case AArch64::ADDSXrr:    Opc = AArch64::CCMNXr; break;
6653b0846e8STim Northover   case AArch64::FCMPSrr:    Opc = AArch64::FCCMPSrr; FirstOp = 0; break;
6663b0846e8STim Northover   case AArch64::FCMPDrr:    Opc = AArch64::FCCMPDrr; FirstOp = 0; break;
6673b0846e8STim Northover   case AArch64::FCMPESrr:   Opc = AArch64::FCCMPESrr; FirstOp = 0; break;
6683b0846e8STim Northover   case AArch64::FCMPEDrr:   Opc = AArch64::FCCMPEDrr; FirstOp = 0; break;
6693b0846e8STim Northover   case AArch64::CBZW:
6703b0846e8STim Northover   case AArch64::CBNZW:
6713b0846e8STim Northover     Opc = AArch64::CCMPWi;
6723b0846e8STim Northover     FirstOp = 0;
6733b0846e8STim Northover     isZBranch = true;
6743b0846e8STim Northover     break;
6753b0846e8STim Northover   case AArch64::CBZX:
6763b0846e8STim Northover   case AArch64::CBNZX:
6773b0846e8STim Northover     Opc = AArch64::CCMPXi;
6783b0846e8STim Northover     FirstOp = 0;
6793b0846e8STim Northover     isZBranch = true;
6803b0846e8STim Northover     break;
6813b0846e8STim Northover   }
6823b0846e8STim Northover 
6833b0846e8STim Northover   // The ccmp instruction should set the flags according to the comparison when
6843b0846e8STim Northover   // Head would have branched to CmpBB.
6853b0846e8STim Northover   // The NZCV immediate operand should provide flags for the case where Head
6863b0846e8STim Northover   // would have branched to Tail. These flags should cause the new Head
6873b0846e8STim Northover   // terminator to branch to tail.
6883b0846e8STim Northover   unsigned NZCV = AArch64CC::getNZCVToSatisfyCondCode(CmpBBTailCC);
6893b0846e8STim Northover   const MCInstrDesc &MCID = TII->get(Opc);
6903b0846e8STim Northover   MRI->constrainRegClass(CmpMI->getOperand(FirstOp).getReg(),
6913b0846e8STim Northover                          TII->getRegClass(MCID, 0, TRI, *MF));
6923b0846e8STim Northover   if (CmpMI->getOperand(FirstOp + 1).isReg())
6933b0846e8STim Northover     MRI->constrainRegClass(CmpMI->getOperand(FirstOp + 1).getReg(),
6943b0846e8STim Northover                            TII->getRegClass(MCID, 1, TRI, *MF));
695116bbab4SDiana Picus   MachineInstrBuilder MIB = BuildMI(*Head, CmpMI, CmpMI->getDebugLoc(), MCID)
696116bbab4SDiana Picus                                 .add(CmpMI->getOperand(FirstOp)); // Register Rn
6973b0846e8STim Northover   if (isZBranch)
6983b0846e8STim Northover     MIB.addImm(0); // cbz/cbnz Rn -> ccmp Rn, #0
6993b0846e8STim Northover   else
700116bbab4SDiana Picus     MIB.add(CmpMI->getOperand(FirstOp + 1)); // Register Rm / Immediate
7013b0846e8STim Northover   MIB.addImm(NZCV).addImm(HeadCmpBBCC);
7023b0846e8STim Northover 
7033b0846e8STim Northover   // If CmpMI was a terminator, we need a new conditional branch to replace it.
7043b0846e8STim Northover   // This now becomes a Head terminator.
7053b0846e8STim Northover   if (isZBranch) {
7063b0846e8STim Northover     bool isNZ = CmpMI->getOpcode() == AArch64::CBNZW ||
7073b0846e8STim Northover                 CmpMI->getOpcode() == AArch64::CBNZX;
7083b0846e8STim Northover     BuildMI(*Head, CmpMI, CmpMI->getDebugLoc(), TII->get(AArch64::Bcc))
7093b0846e8STim Northover         .addImm(isNZ ? AArch64CC::NE : AArch64CC::EQ)
710116bbab4SDiana Picus         .add(CmpMI->getOperand(1)); // Branch target.
7113b0846e8STim Northover   }
7123b0846e8STim Northover   CmpMI->eraseFromParent();
7131978309dSJames Y Knight   Head->updateTerminator(CmpBB->getNextNode());
7143b0846e8STim Northover 
7153b0846e8STim Northover   RemovedBlocks.push_back(CmpBB);
7163b0846e8STim Northover   CmpBB->eraseFromParent();
717d34e60caSNicola Zaghen   LLVM_DEBUG(dbgs() << "Result:\n" << *Head);
7183b0846e8STim Northover   ++NumConverted;
7193b0846e8STim Northover }
7203b0846e8STim Northover 
expectedCodeSizeDelta() const7213b0846e8STim Northover int SSACCmpConv::expectedCodeSizeDelta() const {
7223b0846e8STim Northover   int delta = 0;
7233b0846e8STim Northover   // If the Head terminator was one of the cbz / tbz branches with built-in
7243b0846e8STim Northover   // compare, we need to insert an explicit compare instruction in its place
7253b0846e8STim Northover   // plus a branch instruction.
7263b0846e8STim Northover   if (HeadCond[0].getImm() == -1) {
7273b0846e8STim Northover     switch (HeadCond[1].getImm()) {
7283b0846e8STim Northover     case AArch64::CBZW:
7293b0846e8STim Northover     case AArch64::CBNZW:
7303b0846e8STim Northover     case AArch64::CBZX:
7313b0846e8STim Northover     case AArch64::CBNZX:
7323b0846e8STim Northover       // Therefore delta += 1
7333b0846e8STim Northover       delta = 1;
7343b0846e8STim Northover       break;
7353b0846e8STim Northover     default:
7363b0846e8STim Northover       llvm_unreachable("Cannot convert Head branch");
7373b0846e8STim Northover     }
7383b0846e8STim Northover   }
7393b0846e8STim Northover   // If the Cmp terminator was one of the cbz / tbz branches with
7403b0846e8STim Northover   // built-in compare, it will be turned into a compare instruction
7413b0846e8STim Northover   // into Head, but we do not save any instruction.
7423b0846e8STim Northover   // Otherwise, we save the branch instruction.
7433b0846e8STim Northover   switch (CmpMI->getOpcode()) {
7443b0846e8STim Northover   default:
7453b0846e8STim Northover     --delta;
7463b0846e8STim Northover     break;
7473b0846e8STim Northover   case AArch64::CBZW:
7483b0846e8STim Northover   case AArch64::CBNZW:
7493b0846e8STim Northover   case AArch64::CBZX:
7503b0846e8STim Northover   case AArch64::CBNZX:
7513b0846e8STim Northover     break;
7523b0846e8STim Northover   }
7533b0846e8STim Northover   return delta;
7543b0846e8STim Northover }
7553b0846e8STim Northover 
7563b0846e8STim Northover //===----------------------------------------------------------------------===//
7573b0846e8STim Northover //                       AArch64ConditionalCompares Pass
7583b0846e8STim Northover //===----------------------------------------------------------------------===//
7593b0846e8STim Northover 
7603b0846e8STim Northover namespace {
7613b0846e8STim Northover class AArch64ConditionalCompares : public MachineFunctionPass {
7620bd79f41SMatthew Simpson   const MachineBranchProbabilityInfo *MBPI;
7633b0846e8STim Northover   const TargetInstrInfo *TII;
7643b0846e8STim Northover   const TargetRegisterInfo *TRI;
76511759457SPete Cooper   MCSchedModel SchedModel;
7663b0846e8STim Northover   // Does the proceeded function has Oz attribute.
7673b0846e8STim Northover   bool MinSize;
7683b0846e8STim Northover   MachineRegisterInfo *MRI;
7693b0846e8STim Northover   MachineDominatorTree *DomTree;
7703b0846e8STim Northover   MachineLoopInfo *Loops;
7713b0846e8STim Northover   MachineTraceMetrics *Traces;
7723b0846e8STim Northover   MachineTraceMetrics::Ensemble *MinInstr;
7733b0846e8STim Northover   SSACCmpConv CmpConv;
7743b0846e8STim Northover 
7753b0846e8STim Northover public:
7763b0846e8STim Northover   static char ID;
AArch64ConditionalCompares()777850043b2SDiana Picus   AArch64ConditionalCompares() : MachineFunctionPass(ID) {
778850043b2SDiana Picus     initializeAArch64ConditionalComparesPass(*PassRegistry::getPassRegistry());
779850043b2SDiana Picus   }
7803b0846e8STim Northover   void getAnalysisUsage(AnalysisUsage &AU) const override;
7813b0846e8STim Northover   bool runOnMachineFunction(MachineFunction &MF) override;
getPassName() const782117296c0SMehdi Amini   StringRef getPassName() const override {
7833b0846e8STim Northover     return "AArch64 Conditional Compares";
7843b0846e8STim Northover   }
7853b0846e8STim Northover 
7863b0846e8STim Northover private:
7873b0846e8STim Northover   bool tryConvert(MachineBasicBlock *);
7883b0846e8STim Northover   void updateDomTree(ArrayRef<MachineBasicBlock *> Removed);
7893b0846e8STim Northover   void updateLoops(ArrayRef<MachineBasicBlock *> Removed);
7903b0846e8STim Northover   void invalidateTraces();
7913b0846e8STim Northover   bool shouldConvert();
7923b0846e8STim Northover };
7933b0846e8STim Northover } // end anonymous namespace
7943b0846e8STim Northover 
7953b0846e8STim Northover char AArch64ConditionalCompares::ID = 0;
7963b0846e8STim Northover 
7973b0846e8STim Northover INITIALIZE_PASS_BEGIN(AArch64ConditionalCompares, "aarch64-ccmp",
7983b0846e8STim Northover                       "AArch64 CCMP Pass", false, false)
INITIALIZE_PASS_DEPENDENCY(MachineBranchProbabilityInfo)7990bd79f41SMatthew Simpson INITIALIZE_PASS_DEPENDENCY(MachineBranchProbabilityInfo)
8003b0846e8STim Northover INITIALIZE_PASS_DEPENDENCY(MachineDominatorTree)
8013b0846e8STim Northover INITIALIZE_PASS_DEPENDENCY(MachineTraceMetrics)
8023b0846e8STim Northover INITIALIZE_PASS_END(AArch64ConditionalCompares, "aarch64-ccmp",
8033b0846e8STim Northover                     "AArch64 CCMP Pass", false, false)
8043b0846e8STim Northover 
8053b0846e8STim Northover FunctionPass *llvm::createAArch64ConditionalCompares() {
8063b0846e8STim Northover   return new AArch64ConditionalCompares();
8073b0846e8STim Northover }
8083b0846e8STim Northover 
getAnalysisUsage(AnalysisUsage & AU) const8093b0846e8STim Northover void AArch64ConditionalCompares::getAnalysisUsage(AnalysisUsage &AU) const {
8100bd79f41SMatthew Simpson   AU.addRequired<MachineBranchProbabilityInfo>();
8113b0846e8STim Northover   AU.addRequired<MachineDominatorTree>();
8123b0846e8STim Northover   AU.addPreserved<MachineDominatorTree>();
8133b0846e8STim Northover   AU.addRequired<MachineLoopInfo>();
8143b0846e8STim Northover   AU.addPreserved<MachineLoopInfo>();
8153b0846e8STim Northover   AU.addRequired<MachineTraceMetrics>();
8163b0846e8STim Northover   AU.addPreserved<MachineTraceMetrics>();
8173b0846e8STim Northover   MachineFunctionPass::getAnalysisUsage(AU);
8183b0846e8STim Northover }
8193b0846e8STim Northover 
8203b0846e8STim Northover /// Update the dominator tree after if-conversion erased some blocks.
updateDomTree(ArrayRef<MachineBasicBlock * > Removed)8213b0846e8STim Northover void AArch64ConditionalCompares::updateDomTree(
8223b0846e8STim Northover     ArrayRef<MachineBasicBlock *> Removed) {
8233b0846e8STim Northover   // convert() removes CmpBB which was previously dominated by Head.
8243b0846e8STim Northover   // CmpBB children should be transferred to Head.
8253b0846e8STim Northover   MachineDomTreeNode *HeadNode = DomTree->getNode(CmpConv.Head);
8267be8f8f0SPete Cooper   for (MachineBasicBlock *RemovedMBB : Removed) {
8277be8f8f0SPete Cooper     MachineDomTreeNode *Node = DomTree->getNode(RemovedMBB);
8283b0846e8STim Northover     assert(Node != HeadNode && "Cannot erase the head node");
8293b0846e8STim Northover     assert(Node->getIDom() == HeadNode && "CmpBB should be dominated by Head");
8303b0846e8STim Northover     while (Node->getNumChildren())
83176c5cb05SNicolai Hähnle       DomTree->changeImmediateDominator(Node->back(), HeadNode);
8327be8f8f0SPete Cooper     DomTree->eraseNode(RemovedMBB);
8333b0846e8STim Northover   }
8343b0846e8STim Northover }
8353b0846e8STim Northover 
8363b0846e8STim Northover /// Update LoopInfo after if-conversion.
8373b0846e8STim Northover void
updateLoops(ArrayRef<MachineBasicBlock * > Removed)8383b0846e8STim Northover AArch64ConditionalCompares::updateLoops(ArrayRef<MachineBasicBlock *> Removed) {
8393b0846e8STim Northover   if (!Loops)
8403b0846e8STim Northover     return;
8417be8f8f0SPete Cooper   for (MachineBasicBlock *RemovedMBB : Removed)
8427be8f8f0SPete Cooper     Loops->removeBlock(RemovedMBB);
8433b0846e8STim Northover }
8443b0846e8STim Northover 
8453b0846e8STim Northover /// Invalidate MachineTraceMetrics before if-conversion.
invalidateTraces()8463b0846e8STim Northover void AArch64ConditionalCompares::invalidateTraces() {
8473b0846e8STim Northover   Traces->invalidate(CmpConv.Head);
8483b0846e8STim Northover   Traces->invalidate(CmpConv.CmpBB);
8493b0846e8STim Northover }
8503b0846e8STim Northover 
8513b0846e8STim Northover /// Apply cost model and heuristics to the if-conversion in IfConv.
8523b0846e8STim Northover /// Return true if the conversion is a good idea.
8533b0846e8STim Northover ///
shouldConvert()8543b0846e8STim Northover bool AArch64ConditionalCompares::shouldConvert() {
8553b0846e8STim Northover   // Stress testing mode disables all cost considerations.
8563b0846e8STim Northover   if (Stress)
8573b0846e8STim Northover     return true;
8583b0846e8STim Northover   if (!MinInstr)
8593b0846e8STim Northover     MinInstr = Traces->getEnsemble(MachineTraceMetrics::TS_MinInstrCount);
8603b0846e8STim Northover 
8613b0846e8STim Northover   // Head dominates CmpBB, so it is always included in its trace.
8623b0846e8STim Northover   MachineTraceMetrics::Trace Trace = MinInstr->getTrace(CmpConv.CmpBB);
8633b0846e8STim Northover 
8643b0846e8STim Northover   // If code size is the main concern
8653b0846e8STim Northover   if (MinSize) {
8663b0846e8STim Northover     int CodeSizeDelta = CmpConv.expectedCodeSizeDelta();
867d34e60caSNicola Zaghen     LLVM_DEBUG(dbgs() << "Code size delta:  " << CodeSizeDelta << '\n');
8683b0846e8STim Northover     // If we are minimizing the code size, do the conversion whatever
8693b0846e8STim Northover     // the cost is.
8703b0846e8STim Northover     if (CodeSizeDelta < 0)
8713b0846e8STim Northover       return true;
8723b0846e8STim Northover     if (CodeSizeDelta > 0) {
873d34e60caSNicola Zaghen       LLVM_DEBUG(dbgs() << "Code size is increasing, give up on this one.\n");
8743b0846e8STim Northover       return false;
8753b0846e8STim Northover     }
8763b0846e8STim Northover     // CodeSizeDelta == 0, continue with the regular heuristics
8773b0846e8STim Northover   }
8783b0846e8STim Northover 
8793b0846e8STim Northover   // Heuristic: The compare conversion delays the execution of the branch
8803b0846e8STim Northover   // instruction because we must wait for the inputs to the second compare as
8813b0846e8STim Northover   // well. The branch has no dependent instructions, but delaying it increases
8823b0846e8STim Northover   // the cost of a misprediction.
8833b0846e8STim Northover   //
8843b0846e8STim Northover   // Set a limit on the delay we will accept.
88511759457SPete Cooper   unsigned DelayLimit = SchedModel.MispredictPenalty * 3 / 4;
8863b0846e8STim Northover 
8873b0846e8STim Northover   // Instruction depths can be computed for all trace instructions above CmpBB.
8883b0846e8STim Northover   unsigned HeadDepth =
889e59c8af7SDuncan P. N. Exon Smith       Trace.getInstrCycles(*CmpConv.Head->getFirstTerminator()).Depth;
8903b0846e8STim Northover   unsigned CmpBBDepth =
891e59c8af7SDuncan P. N. Exon Smith       Trace.getInstrCycles(*CmpConv.CmpBB->getFirstTerminator()).Depth;
892d34e60caSNicola Zaghen   LLVM_DEBUG(dbgs() << "Head depth:  " << HeadDepth
8933b0846e8STim Northover                     << "\nCmpBB depth: " << CmpBBDepth << '\n');
8943b0846e8STim Northover   if (CmpBBDepth > HeadDepth + DelayLimit) {
895d34e60caSNicola Zaghen     LLVM_DEBUG(dbgs() << "Branch delay would be larger than " << DelayLimit
8963b0846e8STim Northover                       << " cycles.\n");
8973b0846e8STim Northover     return false;
8983b0846e8STim Northover   }
8993b0846e8STim Northover 
9003b0846e8STim Northover   // Check the resource depth at the bottom of CmpBB - these instructions will
9013b0846e8STim Northover   // be speculated.
9023b0846e8STim Northover   unsigned ResDepth = Trace.getResourceDepth(true);
903d34e60caSNicola Zaghen   LLVM_DEBUG(dbgs() << "Resources:   " << ResDepth << '\n');
9043b0846e8STim Northover 
9053b0846e8STim Northover   // Heuristic: The speculatively executed instructions must all be able to
9063b0846e8STim Northover   // merge into the Head block. The Head critical path should dominate the
9073b0846e8STim Northover   // resource cost of the speculated instructions.
9083b0846e8STim Northover   if (ResDepth > HeadDepth) {
909d34e60caSNicola Zaghen     LLVM_DEBUG(dbgs() << "Too many instructions to speculate.\n");
9103b0846e8STim Northover     return false;
9113b0846e8STim Northover   }
9123b0846e8STim Northover   return true;
9133b0846e8STim Northover }
9143b0846e8STim Northover 
tryConvert(MachineBasicBlock * MBB)9153b0846e8STim Northover bool AArch64ConditionalCompares::tryConvert(MachineBasicBlock *MBB) {
9163b0846e8STim Northover   bool Changed = false;
9173b0846e8STim Northover   while (CmpConv.canConvert(MBB) && shouldConvert()) {
9183b0846e8STim Northover     invalidateTraces();
9193b0846e8STim Northover     SmallVector<MachineBasicBlock *, 4> RemovedBlocks;
9203b0846e8STim Northover     CmpConv.convert(RemovedBlocks);
9213b0846e8STim Northover     Changed = true;
9223b0846e8STim Northover     updateDomTree(RemovedBlocks);
9233b0846e8STim Northover     updateLoops(RemovedBlocks);
9243b0846e8STim Northover   }
9253b0846e8STim Northover   return Changed;
9263b0846e8STim Northover }
9273b0846e8STim Northover 
runOnMachineFunction(MachineFunction & MF)9283b0846e8STim Northover bool AArch64ConditionalCompares::runOnMachineFunction(MachineFunction &MF) {
929d34e60caSNicola Zaghen   LLVM_DEBUG(dbgs() << "********** AArch64 Conditional Compares **********\n"
9303b0846e8STim Northover                     << "********** Function: " << MF.getName() << '\n');
931f1caa283SMatthias Braun   if (skipFunction(MF.getFunction()))
9321ac98bb0SAndrew Kaylor     return false;
9331ac98bb0SAndrew Kaylor 
934fc6de428SEric Christopher   TII = MF.getSubtarget().getInstrInfo();
935fc6de428SEric Christopher   TRI = MF.getSubtarget().getRegisterInfo();
9363d4276f0SEric Christopher   SchedModel = MF.getSubtarget().getSchedModel();
9373b0846e8STim Northover   MRI = &MF.getRegInfo();
9383b0846e8STim Northover   DomTree = &getAnalysis<MachineDominatorTree>();
9393b0846e8STim Northover   Loops = getAnalysisIfAvailable<MachineLoopInfo>();
9400bd79f41SMatthew Simpson   MBPI = &getAnalysis<MachineBranchProbabilityInfo>();
9413b0846e8STim Northover   Traces = &getAnalysis<MachineTraceMetrics>();
9423b0846e8STim Northover   MinInstr = nullptr;
94385bd3978SEvandro Menezes   MinSize = MF.getFunction().hasMinSize();
9443b0846e8STim Northover 
9453b0846e8STim Northover   bool Changed = false;
9460bd79f41SMatthew Simpson   CmpConv.runOnMachineFunction(MF, MBPI);
9473b0846e8STim Northover 
9483b0846e8STim Northover   // Visit blocks in dominator tree pre-order. The pre-order enables multiple
9493b0846e8STim Northover   // cmp-conversions from the same head block.
9503b0846e8STim Northover   // Note that updateDomTree() modifies the children of the DomTree node
9513b0846e8STim Northover   // currently being visited. The df_iterator supports that; it doesn't look at
9523b0846e8STim Northover   // child_begin() / child_end() until after a node has been visited.
9533b0846e8STim Northover   for (auto *I : depth_first(DomTree))
9543b0846e8STim Northover     if (tryConvert(I->getBlock()))
9553b0846e8STim Northover       Changed = true;
9563b0846e8STim Northover 
9573b0846e8STim Northover   return Changed;
9583b0846e8STim Northover }
959