14c4d2fe2STim Corringham //===-- SIModeRegister.cpp - Mode Register --------------------------------===//
24c4d2fe2STim Corringham //
32946cd70SChandler Carruth // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
42946cd70SChandler Carruth // See https://llvm.org/LICENSE.txt for license information.
52946cd70SChandler Carruth // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
64c4d2fe2STim Corringham //
74c4d2fe2STim Corringham //===----------------------------------------------------------------------===//
84c4d2fe2STim Corringham /// \file
94c4d2fe2STim Corringham /// This pass inserts changes to the Mode register settings as required.
104c4d2fe2STim Corringham /// Note that currently it only deals with the Double Precision Floating Point
114c4d2fe2STim Corringham /// rounding mode setting, but is intended to be generic enough to be easily
124c4d2fe2STim Corringham /// expanded.
134c4d2fe2STim Corringham ///
144c4d2fe2STim Corringham //===----------------------------------------------------------------------===//
154c4d2fe2STim Corringham //
164c4d2fe2STim Corringham #include "AMDGPU.h"
17560d7e04Sdfukalov #include "GCNSubtarget.h"
18560d7e04Sdfukalov #include "MCTargetDesc/AMDGPUMCTargetDesc.h"
194c4d2fe2STim Corringham #include "llvm/ADT/Statistic.h"
204c4d2fe2STim Corringham #include <queue>
214c4d2fe2STim Corringham 
224c4d2fe2STim Corringham #define DEBUG_TYPE "si-mode-register"
234c4d2fe2STim Corringham 
244c4d2fe2STim Corringham STATISTIC(NumSetregInserted, "Number of setreg of mode register inserted.");
254c4d2fe2STim Corringham 
264c4d2fe2STim Corringham using namespace llvm;
274c4d2fe2STim Corringham 
284c4d2fe2STim Corringham struct Status {
294c4d2fe2STim Corringham   // Mask is a bitmask where a '1' indicates the corresponding Mode bit has a
304c4d2fe2STim Corringham   // known value
314c4d2fe2STim Corringham   unsigned Mask;
324c4d2fe2STim Corringham   unsigned Mode;
334c4d2fe2STim Corringham 
344c4d2fe2STim Corringham   Status() : Mask(0), Mode(0){};
354c4d2fe2STim Corringham 
36be9ade93SSimon Pilgrim   Status(unsigned NewMask, unsigned NewMode) : Mask(NewMask), Mode(NewMode) {
374c4d2fe2STim Corringham     Mode &= Mask;
384c4d2fe2STim Corringham   };
394c4d2fe2STim Corringham 
404c4d2fe2STim Corringham   // merge two status values such that only values that don't conflict are
414c4d2fe2STim Corringham   // preserved
424c4d2fe2STim Corringham   Status merge(const Status &S) const {
434c4d2fe2STim Corringham     return Status((Mask | S.Mask), ((Mode & ~S.Mask) | (S.Mode & S.Mask)));
444c4d2fe2STim Corringham   }
454c4d2fe2STim Corringham 
464c4d2fe2STim Corringham   // merge an unknown value by using the unknown value's mask to remove bits
474c4d2fe2STim Corringham   // from the result
484c4d2fe2STim Corringham   Status mergeUnknown(unsigned newMask) {
494c4d2fe2STim Corringham     return Status(Mask & ~newMask, Mode & ~newMask);
504c4d2fe2STim Corringham   }
514c4d2fe2STim Corringham 
524c4d2fe2STim Corringham   // intersect two Status values to produce a mode and mask that is a subset
534c4d2fe2STim Corringham   // of both values
544c4d2fe2STim Corringham   Status intersect(const Status &S) const {
554c4d2fe2STim Corringham     unsigned NewMask = (Mask & S.Mask) & (Mode ^ ~S.Mode);
564c4d2fe2STim Corringham     unsigned NewMode = (Mode & NewMask);
574c4d2fe2STim Corringham     return Status(NewMask, NewMode);
584c4d2fe2STim Corringham   }
594c4d2fe2STim Corringham 
604c4d2fe2STim Corringham   // produce the delta required to change the Mode to the required Mode
614c4d2fe2STim Corringham   Status delta(const Status &S) const {
624c4d2fe2STim Corringham     return Status((S.Mask & (Mode ^ S.Mode)) | (~Mask & S.Mask), S.Mode);
634c4d2fe2STim Corringham   }
644c4d2fe2STim Corringham 
654c4d2fe2STim Corringham   bool operator==(const Status &S) const {
664c4d2fe2STim Corringham     return (Mask == S.Mask) && (Mode == S.Mode);
674c4d2fe2STim Corringham   }
684c4d2fe2STim Corringham 
694c4d2fe2STim Corringham   bool operator!=(const Status &S) const { return !(*this == S); }
704c4d2fe2STim Corringham 
714c4d2fe2STim Corringham   bool isCompatible(Status &S) {
724c4d2fe2STim Corringham     return ((Mask & S.Mask) == S.Mask) && ((Mode & S.Mask) == S.Mode);
734c4d2fe2STim Corringham   }
744c4d2fe2STim Corringham 
7596ecead5STim Corringham   bool isCombinable(Status &S) { return !(Mask & S.Mask) || isCompatible(S); }
764c4d2fe2STim Corringham };
774c4d2fe2STim Corringham 
784c4d2fe2STim Corringham class BlockData {
794c4d2fe2STim Corringham public:
804c4d2fe2STim Corringham   // The Status that represents the mode register settings required by the
814c4d2fe2STim Corringham   // FirstInsertionPoint (if any) in this block. Calculated in Phase 1.
824c4d2fe2STim Corringham   Status Require;
834c4d2fe2STim Corringham 
844c4d2fe2STim Corringham   // The Status that represents the net changes to the Mode register made by
854c4d2fe2STim Corringham   // this block, Calculated in Phase 1.
864c4d2fe2STim Corringham   Status Change;
874c4d2fe2STim Corringham 
884c4d2fe2STim Corringham   // The Status that represents the mode register settings on exit from this
894c4d2fe2STim Corringham   // block. Calculated in Phase 2.
904c4d2fe2STim Corringham   Status Exit;
914c4d2fe2STim Corringham 
924c4d2fe2STim Corringham   // The Status that represents the intersection of exit Mode register settings
934c4d2fe2STim Corringham   // from all predecessor blocks. Calculated in Phase 2, and used by Phase 3.
944c4d2fe2STim Corringham   Status Pred;
954c4d2fe2STim Corringham 
964c4d2fe2STim Corringham   // In Phase 1 we record the first instruction that has a mode requirement,
974c4d2fe2STim Corringham   // which is used in Phase 3 if we need to insert a mode change.
984c4d2fe2STim Corringham   MachineInstr *FirstInsertionPoint;
994c4d2fe2STim Corringham 
100c3b3b999STim Corringham   // A flag to indicate whether an Exit value has been set (we can't tell by
101c3b3b999STim Corringham   // examining the Exit value itself as all values may be valid results).
102c3b3b999STim Corringham   bool ExitSet;
103c3b3b999STim Corringham 
104c3b3b999STim Corringham   BlockData() : FirstInsertionPoint(nullptr), ExitSet(false){};
1054c4d2fe2STim Corringham };
1064c4d2fe2STim Corringham 
1074c4d2fe2STim Corringham namespace {
1084c4d2fe2STim Corringham 
1094c4d2fe2STim Corringham class SIModeRegister : public MachineFunctionPass {
1104c4d2fe2STim Corringham public:
1114c4d2fe2STim Corringham   static char ID;
1124c4d2fe2STim Corringham 
1134c4d2fe2STim Corringham   std::vector<std::unique_ptr<BlockData>> BlockInfo;
1144c4d2fe2STim Corringham   std::queue<MachineBasicBlock *> Phase2List;
1154c4d2fe2STim Corringham 
1164c4d2fe2STim Corringham   // The default mode register setting currently only caters for the floating
1174c4d2fe2STim Corringham   // point double precision rounding mode.
1184c4d2fe2STim Corringham   // We currently assume the default rounding mode is Round to Nearest
1194c4d2fe2STim Corringham   // NOTE: this should come from a per function rounding mode setting once such
1204c4d2fe2STim Corringham   // a setting exists.
1214c4d2fe2STim Corringham   unsigned DefaultMode = FP_ROUND_ROUND_TO_NEAREST;
1224c4d2fe2STim Corringham   Status DefaultStatus =
1234c4d2fe2STim Corringham       Status(FP_ROUND_MODE_DP(0x3), FP_ROUND_MODE_DP(DefaultMode));
1244c4d2fe2STim Corringham 
125c3b3b999STim Corringham   bool Changed = false;
126c3b3b999STim Corringham 
1274c4d2fe2STim Corringham public:
1284c4d2fe2STim Corringham   SIModeRegister() : MachineFunctionPass(ID) {}
1294c4d2fe2STim Corringham 
1304c4d2fe2STim Corringham   bool runOnMachineFunction(MachineFunction &MF) override;
1314c4d2fe2STim Corringham 
1324c4d2fe2STim Corringham   void getAnalysisUsage(AnalysisUsage &AU) const override {
1334c4d2fe2STim Corringham     AU.setPreservesCFG();
1344c4d2fe2STim Corringham     MachineFunctionPass::getAnalysisUsage(AU);
1354c4d2fe2STim Corringham   }
1364c4d2fe2STim Corringham 
1374c4d2fe2STim Corringham   void processBlockPhase1(MachineBasicBlock &MBB, const SIInstrInfo *TII);
1384c4d2fe2STim Corringham 
1394c4d2fe2STim Corringham   void processBlockPhase2(MachineBasicBlock &MBB, const SIInstrInfo *TII);
1404c4d2fe2STim Corringham 
1414c4d2fe2STim Corringham   void processBlockPhase3(MachineBasicBlock &MBB, const SIInstrInfo *TII);
1424c4d2fe2STim Corringham 
1434c4d2fe2STim Corringham   Status getInstructionMode(MachineInstr &MI, const SIInstrInfo *TII);
1444c4d2fe2STim Corringham 
1454c4d2fe2STim Corringham   void insertSetreg(MachineBasicBlock &MBB, MachineInstr *I,
1464c4d2fe2STim Corringham                     const SIInstrInfo *TII, Status InstrMode);
1474c4d2fe2STim Corringham };
1484c4d2fe2STim Corringham } // End anonymous namespace.
1494c4d2fe2STim Corringham 
1504c4d2fe2STim Corringham INITIALIZE_PASS(SIModeRegister, DEBUG_TYPE,
1514c4d2fe2STim Corringham                 "Insert required mode register values", false, false)
1524c4d2fe2STim Corringham 
1534c4d2fe2STim Corringham char SIModeRegister::ID = 0;
1544c4d2fe2STim Corringham 
1554c4d2fe2STim Corringham char &llvm::SIModeRegisterID = SIModeRegister::ID;
1564c4d2fe2STim Corringham 
1574c4d2fe2STim Corringham FunctionPass *llvm::createSIModeRegisterPass() { return new SIModeRegister(); }
1584c4d2fe2STim Corringham 
1594c4d2fe2STim Corringham // Determine the Mode register setting required for this instruction.
1604c4d2fe2STim Corringham // Instructions which don't use the Mode register return a null Status.
1614c4d2fe2STim Corringham // Note this currently only deals with instructions that use the floating point
1624c4d2fe2STim Corringham // double precision setting.
1634c4d2fe2STim Corringham Status SIModeRegister::getInstructionMode(MachineInstr &MI,
1644c4d2fe2STim Corringham                                           const SIInstrInfo *TII) {
165*dcb2da13SJulien Pages   if (TII->usesFPDPRounding(MI) ||
166*dcb2da13SJulien Pages       MI.getOpcode() == AMDGPU::FPTRUNC_UPWARD_PSEUDO ||
167*dcb2da13SJulien Pages       MI.getOpcode() == AMDGPU::FPTRUNC_DOWNWARD_PSEUDO) {
1684c4d2fe2STim Corringham     switch (MI.getOpcode()) {
1694c4d2fe2STim Corringham     case AMDGPU::V_INTERP_P1LL_F16:
1704c4d2fe2STim Corringham     case AMDGPU::V_INTERP_P1LV_F16:
1714c4d2fe2STim Corringham     case AMDGPU::V_INTERP_P2_F16:
1724c4d2fe2STim Corringham       // f16 interpolation instructions need double precision round to zero
1734c4d2fe2STim Corringham       return Status(FP_ROUND_MODE_DP(3),
1744c4d2fe2STim Corringham                     FP_ROUND_MODE_DP(FP_ROUND_ROUND_TO_ZERO));
175*dcb2da13SJulien Pages     case AMDGPU::FPTRUNC_UPWARD_PSEUDO: {
176*dcb2da13SJulien Pages       // Replacing the pseudo by a real instruction
177*dcb2da13SJulien Pages       MI.setDesc(TII->get(AMDGPU::V_CVT_F16_F32_e32));
178*dcb2da13SJulien Pages       return Status(FP_ROUND_MODE_DP(3),
179*dcb2da13SJulien Pages                     FP_ROUND_MODE_DP(FP_ROUND_ROUND_TO_INF));
180*dcb2da13SJulien Pages     }
181*dcb2da13SJulien Pages     case AMDGPU::FPTRUNC_DOWNWARD_PSEUDO: {
182*dcb2da13SJulien Pages       // Replacing the pseudo by a real instruction
183*dcb2da13SJulien Pages       MI.setDesc(TII->get(AMDGPU::V_CVT_F16_F32_e32));
184*dcb2da13SJulien Pages       return Status(FP_ROUND_MODE_DP(3),
185*dcb2da13SJulien Pages                     FP_ROUND_MODE_DP(FP_ROUND_ROUND_TO_NEGINF));
186*dcb2da13SJulien Pages     }
1874c4d2fe2STim Corringham     default:
1884c4d2fe2STim Corringham       return DefaultStatus;
1894c4d2fe2STim Corringham     }
1904c4d2fe2STim Corringham   }
1914c4d2fe2STim Corringham   return Status();
1924c4d2fe2STim Corringham }
1934c4d2fe2STim Corringham 
1944c4d2fe2STim Corringham // Insert a setreg instruction to update the Mode register.
1954c4d2fe2STim Corringham // It is possible (though unlikely) for an instruction to require a change to
1964c4d2fe2STim Corringham // the value of disjoint parts of the Mode register when we don't know the
1974c4d2fe2STim Corringham // value of the intervening bits. In that case we need to use more than one
1984c4d2fe2STim Corringham // setreg instruction.
1994c4d2fe2STim Corringham void SIModeRegister::insertSetreg(MachineBasicBlock &MBB, MachineInstr *MI,
2004c4d2fe2STim Corringham                                   const SIInstrInfo *TII, Status InstrMode) {
2014c4d2fe2STim Corringham   while (InstrMode.Mask) {
2024c4d2fe2STim Corringham     unsigned Offset = countTrailingZeros<unsigned>(InstrMode.Mask);
2034c4d2fe2STim Corringham     unsigned Width = countTrailingOnes<unsigned>(InstrMode.Mask >> Offset);
2044c4d2fe2STim Corringham     unsigned Value = (InstrMode.Mode >> Offset) & ((1 << Width) - 1);
2055a667c0eSKazu Hirata     BuildMI(MBB, MI, nullptr, TII->get(AMDGPU::S_SETREG_IMM32_B32))
2064c4d2fe2STim Corringham         .addImm(Value)
2074c4d2fe2STim Corringham         .addImm(((Width - 1) << AMDGPU::Hwreg::WIDTH_M1_SHIFT_) |
2084c4d2fe2STim Corringham                 (Offset << AMDGPU::Hwreg::OFFSET_SHIFT_) |
2094c4d2fe2STim Corringham                 (AMDGPU::Hwreg::ID_MODE << AMDGPU::Hwreg::ID_SHIFT_));
2104c4d2fe2STim Corringham     ++NumSetregInserted;
211c3b3b999STim Corringham     Changed = true;
2122faadb15STim Corringham     InstrMode.Mask &= ~(((1 << Width) - 1) << Offset);
2134c4d2fe2STim Corringham   }
2144c4d2fe2STim Corringham }
2154c4d2fe2STim Corringham 
2164c4d2fe2STim Corringham // In Phase 1 we iterate through the instructions of the block and for each
2174c4d2fe2STim Corringham // instruction we get its mode usage. If the instruction uses the Mode register
2184c4d2fe2STim Corringham // we:
2194c4d2fe2STim Corringham // - update the Change status, which tracks the changes to the Mode register
2204c4d2fe2STim Corringham //   made by this block
2214c4d2fe2STim Corringham // - if this instruction's requirements are compatible with the current setting
2224c4d2fe2STim Corringham //   of the Mode register we merge the modes
2234c4d2fe2STim Corringham // - if it isn't compatible and an InsertionPoint isn't set, then we set the
2244c4d2fe2STim Corringham //   InsertionPoint to the current instruction, and we remember the current
2254c4d2fe2STim Corringham //   mode
2264c4d2fe2STim Corringham // - if it isn't compatible and InsertionPoint is set we insert a seteg before
2274c4d2fe2STim Corringham //   that instruction (unless this instruction forms part of the block's
2284c4d2fe2STim Corringham //   entry requirements in which case the insertion is deferred until Phase 3
2294c4d2fe2STim Corringham //   when predecessor exit values are known), and move the insertion point to
2304c4d2fe2STim Corringham //   this instruction
2314c4d2fe2STim Corringham // - if this is a setreg instruction we treat it as an incompatible instruction.
2324c4d2fe2STim Corringham //   This is sub-optimal but avoids some nasty corner cases, and is expected to
2334c4d2fe2STim Corringham //   occur very rarely.
2344c4d2fe2STim Corringham // - on exit we have set the Require, Change, and initial Exit modes.
2354c4d2fe2STim Corringham void SIModeRegister::processBlockPhase1(MachineBasicBlock &MBB,
2364c4d2fe2STim Corringham                                         const SIInstrInfo *TII) {
2370eaee545SJonas Devlieghere   auto NewInfo = std::make_unique<BlockData>();
2384c4d2fe2STim Corringham   MachineInstr *InsertionPoint = nullptr;
2394c4d2fe2STim Corringham   // RequirePending is used to indicate whether we are collecting the initial
2404c4d2fe2STim Corringham   // requirements for the block, and need to defer the first InsertionPoint to
2414c4d2fe2STim Corringham   // Phase 3. It is set to false once we have set FirstInsertionPoint, or when
242d1f45ed5SNeubauer, Sebastian   // we discover an explicit setreg that means this block doesn't have any
2434c4d2fe2STim Corringham   // initial requirements.
2444c4d2fe2STim Corringham   bool RequirePending = true;
2454c4d2fe2STim Corringham   Status IPChange;
2464c4d2fe2STim Corringham   for (MachineInstr &MI : MBB) {
2474c4d2fe2STim Corringham     Status InstrMode = getInstructionMode(MI, TII);
24890777e29SJay Foad     if (MI.getOpcode() == AMDGPU::S_SETREG_B32 ||
24990777e29SJay Foad         MI.getOpcode() == AMDGPU::S_SETREG_B32_mode ||
25090777e29SJay Foad         MI.getOpcode() == AMDGPU::S_SETREG_IMM32_B32 ||
25190777e29SJay Foad         MI.getOpcode() == AMDGPU::S_SETREG_IMM32_B32_mode) {
2524c4d2fe2STim Corringham       // We preserve any explicit mode register setreg instruction we encounter,
2534c4d2fe2STim Corringham       // as we assume it has been inserted by a higher authority (this is
2544c4d2fe2STim Corringham       // likely to be a very rare occurrence).
2554c4d2fe2STim Corringham       unsigned Dst = TII->getNamedOperand(MI, AMDGPU::OpName::simm16)->getImm();
2564c4d2fe2STim Corringham       if (((Dst & AMDGPU::Hwreg::ID_MASK_) >> AMDGPU::Hwreg::ID_SHIFT_) !=
2574c4d2fe2STim Corringham           AMDGPU::Hwreg::ID_MODE)
2584c4d2fe2STim Corringham         continue;
2594c4d2fe2STim Corringham 
2604c4d2fe2STim Corringham       unsigned Width = ((Dst & AMDGPU::Hwreg::WIDTH_M1_MASK_) >>
2614c4d2fe2STim Corringham                         AMDGPU::Hwreg::WIDTH_M1_SHIFT_) +
2624c4d2fe2STim Corringham                        1;
2634c4d2fe2STim Corringham       unsigned Offset =
2644c4d2fe2STim Corringham           (Dst & AMDGPU::Hwreg::OFFSET_MASK_) >> AMDGPU::Hwreg::OFFSET_SHIFT_;
2654c4d2fe2STim Corringham       unsigned Mask = ((1 << Width) - 1) << Offset;
2664c4d2fe2STim Corringham 
2674c4d2fe2STim Corringham       // If an InsertionPoint is set we will insert a setreg there.
2684c4d2fe2STim Corringham       if (InsertionPoint) {
2694c4d2fe2STim Corringham         insertSetreg(MBB, InsertionPoint, TII, IPChange.delta(NewInfo->Change));
2704c4d2fe2STim Corringham         InsertionPoint = nullptr;
2714c4d2fe2STim Corringham       }
2724c4d2fe2STim Corringham       // If this is an immediate then we know the value being set, but if it is
2734c4d2fe2STim Corringham       // not an immediate then we treat the modified bits of the mode register
2744c4d2fe2STim Corringham       // as unknown.
27590777e29SJay Foad       if (MI.getOpcode() == AMDGPU::S_SETREG_IMM32_B32 ||
27690777e29SJay Foad           MI.getOpcode() == AMDGPU::S_SETREG_IMM32_B32_mode) {
2774c4d2fe2STim Corringham         unsigned Val = TII->getNamedOperand(MI, AMDGPU::OpName::imm)->getImm();
2784c4d2fe2STim Corringham         unsigned Mode = (Val << Offset) & Mask;
2794c4d2fe2STim Corringham         Status Setreg = Status(Mask, Mode);
2804c4d2fe2STim Corringham         // If we haven't already set the initial requirements for the block we
2814c4d2fe2STim Corringham         // don't need to as the requirements start from this explicit setreg.
2824c4d2fe2STim Corringham         RequirePending = false;
2834c4d2fe2STim Corringham         NewInfo->Change = NewInfo->Change.merge(Setreg);
2844c4d2fe2STim Corringham       } else {
2854c4d2fe2STim Corringham         NewInfo->Change = NewInfo->Change.mergeUnknown(Mask);
2864c4d2fe2STim Corringham       }
2874c4d2fe2STim Corringham     } else if (!NewInfo->Change.isCompatible(InstrMode)) {
2884c4d2fe2STim Corringham       // This instruction uses the Mode register and its requirements aren't
2894c4d2fe2STim Corringham       // compatible with the current mode.
2904c4d2fe2STim Corringham       if (InsertionPoint) {
2914c4d2fe2STim Corringham         // If the required mode change cannot be included in the current
2924c4d2fe2STim Corringham         // InsertionPoint changes, we need a setreg and start a new
2934c4d2fe2STim Corringham         // InsertionPoint.
2944c4d2fe2STim Corringham         if (!IPChange.delta(NewInfo->Change).isCombinable(InstrMode)) {
2954c4d2fe2STim Corringham           if (RequirePending) {
2964c4d2fe2STim Corringham             // This is the first insertionPoint in the block so we will defer
2974c4d2fe2STim Corringham             // the insertion of the setreg to Phase 3 where we know whether or
2984c4d2fe2STim Corringham             // not it is actually needed.
2994c4d2fe2STim Corringham             NewInfo->FirstInsertionPoint = InsertionPoint;
3004c4d2fe2STim Corringham             NewInfo->Require = NewInfo->Change;
3014c4d2fe2STim Corringham             RequirePending = false;
3024c4d2fe2STim Corringham           } else {
3034c4d2fe2STim Corringham             insertSetreg(MBB, InsertionPoint, TII,
3044c4d2fe2STim Corringham                          IPChange.delta(NewInfo->Change));
3054c4d2fe2STim Corringham             IPChange = NewInfo->Change;
3064c4d2fe2STim Corringham           }
3074c4d2fe2STim Corringham           // Set the new InsertionPoint
3084c4d2fe2STim Corringham           InsertionPoint = &MI;
3094c4d2fe2STim Corringham         }
3104c4d2fe2STim Corringham         NewInfo->Change = NewInfo->Change.merge(InstrMode);
3114c4d2fe2STim Corringham       } else {
3124c4d2fe2STim Corringham         // No InsertionPoint is currently set - this is either the first in
3134c4d2fe2STim Corringham         // the block or we have previously seen an explicit setreg.
3144c4d2fe2STim Corringham         InsertionPoint = &MI;
3154c4d2fe2STim Corringham         IPChange = NewInfo->Change;
3164c4d2fe2STim Corringham         NewInfo->Change = NewInfo->Change.merge(InstrMode);
3174c4d2fe2STim Corringham       }
3184c4d2fe2STim Corringham     }
3194c4d2fe2STim Corringham   }
3204c4d2fe2STim Corringham   if (RequirePending) {
3214c4d2fe2STim Corringham     // If we haven't yet set the initial requirements for the block we set them
3224c4d2fe2STim Corringham     // now.
3234c4d2fe2STim Corringham     NewInfo->FirstInsertionPoint = InsertionPoint;
3244c4d2fe2STim Corringham     NewInfo->Require = NewInfo->Change;
3254c4d2fe2STim Corringham   } else if (InsertionPoint) {
3264c4d2fe2STim Corringham     // We need to insert a setreg at the InsertionPoint
3274c4d2fe2STim Corringham     insertSetreg(MBB, InsertionPoint, TII, IPChange.delta(NewInfo->Change));
3284c4d2fe2STim Corringham   }
3294c4d2fe2STim Corringham   NewInfo->Exit = NewInfo->Change;
3304c4d2fe2STim Corringham   BlockInfo[MBB.getNumber()] = std::move(NewInfo);
3314c4d2fe2STim Corringham }
3324c4d2fe2STim Corringham 
3334c4d2fe2STim Corringham // In Phase 2 we revisit each block and calculate the common Mode register
3344c4d2fe2STim Corringham // value provided by all predecessor blocks. If the Exit value for the block
3354c4d2fe2STim Corringham // is changed, then we add the successor blocks to the worklist so that the
3364c4d2fe2STim Corringham // exit value is propagated.
3374c4d2fe2STim Corringham void SIModeRegister::processBlockPhase2(MachineBasicBlock &MBB,
3384c4d2fe2STim Corringham                                         const SIInstrInfo *TII) {
339c3b3b999STim Corringham   bool RevisitRequired = false;
340c3b3b999STim Corringham   bool ExitSet = false;
3414c4d2fe2STim Corringham   unsigned ThisBlock = MBB.getNumber();
3424c4d2fe2STim Corringham   if (MBB.pred_empty()) {
3434c4d2fe2STim Corringham     // There are no predecessors, so use the default starting status.
3444c4d2fe2STim Corringham     BlockInfo[ThisBlock]->Pred = DefaultStatus;
345c3b3b999STim Corringham     ExitSet = true;
3464c4d2fe2STim Corringham   } else {
3474c4d2fe2STim Corringham     // Build a status that is common to all the predecessors by intersecting
3484c4d2fe2STim Corringham     // all the predecessor exit status values.
349c3b3b999STim Corringham     // Mask bits (which represent the Mode bits with a known value) can only be
350c3b3b999STim Corringham     // added by explicit SETREG instructions or the initial default value -
351c3b3b999STim Corringham     // the intersection process may remove Mask bits.
352c3b3b999STim Corringham     // If we find a predecessor that has not yet had an exit value determined
353c3b3b999STim Corringham     // (this can happen for example if a block is its own predecessor) we defer
354c3b3b999STim Corringham     // use of that value as the Mask will be all zero, and we will revisit this
355c3b3b999STim Corringham     // block again later (unless the only predecessor without an exit value is
356c3b3b999STim Corringham     // this block).
3574c4d2fe2STim Corringham     MachineBasicBlock::pred_iterator P = MBB.pred_begin(), E = MBB.pred_end();
3584c4d2fe2STim Corringham     MachineBasicBlock &PB = *(*P);
359c3b3b999STim Corringham     unsigned PredBlock = PB.getNumber();
360c3b3b999STim Corringham     if ((ThisBlock == PredBlock) && (std::next(P) == E)) {
361c3b3b999STim Corringham       BlockInfo[ThisBlock]->Pred = DefaultStatus;
362c3b3b999STim Corringham       ExitSet = true;
363c3b3b999STim Corringham     } else if (BlockInfo[PredBlock]->ExitSet) {
364c3b3b999STim Corringham       BlockInfo[ThisBlock]->Pred = BlockInfo[PredBlock]->Exit;
365c3b3b999STim Corringham       ExitSet = true;
366c3b3b999STim Corringham     } else if (PredBlock != ThisBlock)
367c3b3b999STim Corringham       RevisitRequired = true;
3684c4d2fe2STim Corringham 
3694c4d2fe2STim Corringham     for (P = std::next(P); P != E; P = std::next(P)) {
3704c4d2fe2STim Corringham       MachineBasicBlock *Pred = *P;
371c3b3b999STim Corringham       unsigned PredBlock = Pred->getNumber();
372c3b3b999STim Corringham       if (BlockInfo[PredBlock]->ExitSet) {
373c3b3b999STim Corringham         if (BlockInfo[ThisBlock]->ExitSet) {
374c3b3b999STim Corringham           BlockInfo[ThisBlock]->Pred =
375c3b3b999STim Corringham               BlockInfo[ThisBlock]->Pred.intersect(BlockInfo[PredBlock]->Exit);
376c3b3b999STim Corringham         } else {
377c3b3b999STim Corringham           BlockInfo[ThisBlock]->Pred = BlockInfo[PredBlock]->Exit;
378c3b3b999STim Corringham         }
379c3b3b999STim Corringham         ExitSet = true;
380c3b3b999STim Corringham       } else if (PredBlock != ThisBlock)
381c3b3b999STim Corringham         RevisitRequired = true;
3824c4d2fe2STim Corringham     }
3834c4d2fe2STim Corringham   }
38496ecead5STim Corringham   Status TmpStatus =
38596ecead5STim Corringham       BlockInfo[ThisBlock]->Pred.merge(BlockInfo[ThisBlock]->Change);
3864c4d2fe2STim Corringham   if (BlockInfo[ThisBlock]->Exit != TmpStatus) {
3874c4d2fe2STim Corringham     BlockInfo[ThisBlock]->Exit = TmpStatus;
3884c4d2fe2STim Corringham     // Add the successors to the work list so we can propagate the changed exit
3894c4d2fe2STim Corringham     // status.
390e4bab218SKazu Hirata     for (MachineBasicBlock *Succ : MBB.successors())
391e4bab218SKazu Hirata       Phase2List.push(Succ);
3924c4d2fe2STim Corringham   }
393c3b3b999STim Corringham   BlockInfo[ThisBlock]->ExitSet = ExitSet;
394c3b3b999STim Corringham   if (RevisitRequired)
395c3b3b999STim Corringham     Phase2List.push(&MBB);
3964c4d2fe2STim Corringham }
3974c4d2fe2STim Corringham 
3984c4d2fe2STim Corringham // In Phase 3 we revisit each block and if it has an insertion point defined we
3994c4d2fe2STim Corringham // check whether the predecessor mode meets the block's entry requirements. If
4004c4d2fe2STim Corringham // not we insert an appropriate setreg instruction to modify the Mode register.
4014c4d2fe2STim Corringham void SIModeRegister::processBlockPhase3(MachineBasicBlock &MBB,
4024c4d2fe2STim Corringham                                         const SIInstrInfo *TII) {
4034c4d2fe2STim Corringham   unsigned ThisBlock = MBB.getNumber();
4044c4d2fe2STim Corringham   if (!BlockInfo[ThisBlock]->Pred.isCompatible(BlockInfo[ThisBlock]->Require)) {
40596ecead5STim Corringham     Status Delta =
40696ecead5STim Corringham         BlockInfo[ThisBlock]->Pred.delta(BlockInfo[ThisBlock]->Require);
4074c4d2fe2STim Corringham     if (BlockInfo[ThisBlock]->FirstInsertionPoint)
4084c4d2fe2STim Corringham       insertSetreg(MBB, BlockInfo[ThisBlock]->FirstInsertionPoint, TII, Delta);
4094c4d2fe2STim Corringham     else
4104c4d2fe2STim Corringham       insertSetreg(MBB, &MBB.instr_front(), TII, Delta);
4114c4d2fe2STim Corringham   }
4124c4d2fe2STim Corringham }
4134c4d2fe2STim Corringham 
4144c4d2fe2STim Corringham bool SIModeRegister::runOnMachineFunction(MachineFunction &MF) {
4154c4d2fe2STim Corringham   BlockInfo.resize(MF.getNumBlockIDs());
4164c4d2fe2STim Corringham   const GCNSubtarget &ST = MF.getSubtarget<GCNSubtarget>();
4174c4d2fe2STim Corringham   const SIInstrInfo *TII = ST.getInstrInfo();
4184c4d2fe2STim Corringham 
4194c4d2fe2STim Corringham   // Processing is performed in a number of phases
4204c4d2fe2STim Corringham 
4214c4d2fe2STim Corringham   // Phase 1 - determine the initial mode required by each block, and add setreg
4224c4d2fe2STim Corringham   // instructions for intra block requirements.
4234c4d2fe2STim Corringham   for (MachineBasicBlock &BB : MF)
4244c4d2fe2STim Corringham     processBlockPhase1(BB, TII);
4254c4d2fe2STim Corringham 
4264c4d2fe2STim Corringham   // Phase 2 - determine the exit mode from each block. We add all blocks to the
4274c4d2fe2STim Corringham   // list here, but will also add any that need to be revisited during Phase 2
4284c4d2fe2STim Corringham   // processing.
4294c4d2fe2STim Corringham   for (MachineBasicBlock &BB : MF)
4304c4d2fe2STim Corringham     Phase2List.push(&BB);
4314c4d2fe2STim Corringham   while (!Phase2List.empty()) {
4324c4d2fe2STim Corringham     processBlockPhase2(*Phase2List.front(), TII);
4334c4d2fe2STim Corringham     Phase2List.pop();
4344c4d2fe2STim Corringham   }
4354c4d2fe2STim Corringham 
4364c4d2fe2STim Corringham   // Phase 3 - add an initial setreg to each block where the required entry mode
4374c4d2fe2STim Corringham   // is not satisfied by the exit mode of all its predecessors.
4384c4d2fe2STim Corringham   for (MachineBasicBlock &BB : MF)
4394c4d2fe2STim Corringham     processBlockPhase3(BB, TII);
4404c4d2fe2STim Corringham 
4414c4d2fe2STim Corringham   BlockInfo.clear();
4424c4d2fe2STim Corringham 
443c3b3b999STim Corringham   return Changed;
4444c4d2fe2STim Corringham }
445