14c4d2fe2STim Corringham //===-- SIModeRegister.cpp - Mode Register --------------------------------===//
24c4d2fe2STim Corringham //
32946cd70SChandler Carruth // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
42946cd70SChandler Carruth // See https://llvm.org/LICENSE.txt for license information.
52946cd70SChandler Carruth // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
64c4d2fe2STim Corringham //
74c4d2fe2STim Corringham //===----------------------------------------------------------------------===//
84c4d2fe2STim Corringham /// \file
94c4d2fe2STim Corringham /// This pass inserts changes to the Mode register settings as required.
104c4d2fe2STim Corringham /// Note that currently it only deals with the Double Precision Floating Point
114c4d2fe2STim Corringham /// rounding mode setting, but is intended to be generic enough to be easily
124c4d2fe2STim Corringham /// expanded.
134c4d2fe2STim Corringham ///
144c4d2fe2STim Corringham //===----------------------------------------------------------------------===//
154c4d2fe2STim Corringham //
164c4d2fe2STim Corringham #include "AMDGPU.h"
174c4d2fe2STim Corringham #include "AMDGPUInstrInfo.h"
184c4d2fe2STim Corringham #include "AMDGPUSubtarget.h"
194c4d2fe2STim Corringham #include "SIInstrInfo.h"
204c4d2fe2STim Corringham #include "SIMachineFunctionInfo.h"
214c4d2fe2STim Corringham #include "llvm/ADT/Statistic.h"
224c4d2fe2STim Corringham #include "llvm/CodeGen/MachineFunctionPass.h"
234c4d2fe2STim Corringham #include "llvm/CodeGen/MachineInstrBuilder.h"
244c4d2fe2STim Corringham #include "llvm/CodeGen/MachineRegisterInfo.h"
254c4d2fe2STim Corringham #include "llvm/IR/Constants.h"
264c4d2fe2STim Corringham #include "llvm/IR/Function.h"
274c4d2fe2STim Corringham #include "llvm/IR/LLVMContext.h"
284c4d2fe2STim Corringham #include "llvm/Support/Debug.h"
294c4d2fe2STim Corringham #include "llvm/Support/raw_ostream.h"
304c4d2fe2STim Corringham #include "llvm/Target/TargetMachine.h"
314c4d2fe2STim Corringham #include <queue>
324c4d2fe2STim Corringham 
334c4d2fe2STim Corringham #define DEBUG_TYPE "si-mode-register"
344c4d2fe2STim Corringham 
354c4d2fe2STim Corringham STATISTIC(NumSetregInserted, "Number of setreg of mode register inserted.");
364c4d2fe2STim Corringham 
374c4d2fe2STim Corringham using namespace llvm;
384c4d2fe2STim Corringham 
394c4d2fe2STim Corringham struct Status {
404c4d2fe2STim Corringham   // Mask is a bitmask where a '1' indicates the corresponding Mode bit has a
414c4d2fe2STim Corringham   // known value
424c4d2fe2STim Corringham   unsigned Mask;
434c4d2fe2STim Corringham   unsigned Mode;
444c4d2fe2STim Corringham 
454c4d2fe2STim Corringham   Status() : Mask(0), Mode(0){};
464c4d2fe2STim Corringham 
47be9ade93SSimon Pilgrim   Status(unsigned NewMask, unsigned NewMode) : Mask(NewMask), Mode(NewMode) {
484c4d2fe2STim Corringham     Mode &= Mask;
494c4d2fe2STim Corringham   };
504c4d2fe2STim Corringham 
514c4d2fe2STim Corringham   // merge two status values such that only values that don't conflict are
524c4d2fe2STim Corringham   // preserved
534c4d2fe2STim Corringham   Status merge(const Status &S) const {
544c4d2fe2STim Corringham     return Status((Mask | S.Mask), ((Mode & ~S.Mask) | (S.Mode & S.Mask)));
554c4d2fe2STim Corringham   }
564c4d2fe2STim Corringham 
574c4d2fe2STim Corringham   // merge an unknown value by using the unknown value's mask to remove bits
584c4d2fe2STim Corringham   // from the result
594c4d2fe2STim Corringham   Status mergeUnknown(unsigned newMask) {
604c4d2fe2STim Corringham     return Status(Mask & ~newMask, Mode & ~newMask);
614c4d2fe2STim Corringham   }
624c4d2fe2STim Corringham 
634c4d2fe2STim Corringham   // intersect two Status values to produce a mode and mask that is a subset
644c4d2fe2STim Corringham   // of both values
654c4d2fe2STim Corringham   Status intersect(const Status &S) const {
664c4d2fe2STim Corringham     unsigned NewMask = (Mask & S.Mask) & (Mode ^ ~S.Mode);
674c4d2fe2STim Corringham     unsigned NewMode = (Mode & NewMask);
684c4d2fe2STim Corringham     return Status(NewMask, NewMode);
694c4d2fe2STim Corringham   }
704c4d2fe2STim Corringham 
714c4d2fe2STim Corringham   // produce the delta required to change the Mode to the required Mode
724c4d2fe2STim Corringham   Status delta(const Status &S) const {
734c4d2fe2STim Corringham     return Status((S.Mask & (Mode ^ S.Mode)) | (~Mask & S.Mask), S.Mode);
744c4d2fe2STim Corringham   }
754c4d2fe2STim Corringham 
764c4d2fe2STim Corringham   bool operator==(const Status &S) const {
774c4d2fe2STim Corringham     return (Mask == S.Mask) && (Mode == S.Mode);
784c4d2fe2STim Corringham   }
794c4d2fe2STim Corringham 
804c4d2fe2STim Corringham   bool operator!=(const Status &S) const { return !(*this == S); }
814c4d2fe2STim Corringham 
824c4d2fe2STim Corringham   bool isCompatible(Status &S) {
834c4d2fe2STim Corringham     return ((Mask & S.Mask) == S.Mask) && ((Mode & S.Mask) == S.Mode);
844c4d2fe2STim Corringham   }
854c4d2fe2STim Corringham 
864c4d2fe2STim Corringham   bool isCombinable(Status &S) {
874c4d2fe2STim Corringham     return !(Mask & S.Mask) || isCompatible(S);
884c4d2fe2STim Corringham   }
894c4d2fe2STim Corringham };
904c4d2fe2STim Corringham 
914c4d2fe2STim Corringham class BlockData {
924c4d2fe2STim Corringham public:
934c4d2fe2STim Corringham   // The Status that represents the mode register settings required by the
944c4d2fe2STim Corringham   // FirstInsertionPoint (if any) in this block. Calculated in Phase 1.
954c4d2fe2STim Corringham   Status Require;
964c4d2fe2STim Corringham 
974c4d2fe2STim Corringham   // The Status that represents the net changes to the Mode register made by
984c4d2fe2STim Corringham   // this block, Calculated in Phase 1.
994c4d2fe2STim Corringham   Status Change;
1004c4d2fe2STim Corringham 
1014c4d2fe2STim Corringham   // The Status that represents the mode register settings on exit from this
1024c4d2fe2STim Corringham   // block. Calculated in Phase 2.
1034c4d2fe2STim Corringham   Status Exit;
1044c4d2fe2STim Corringham 
1054c4d2fe2STim Corringham   // The Status that represents the intersection of exit Mode register settings
1064c4d2fe2STim Corringham   // from all predecessor blocks. Calculated in Phase 2, and used by Phase 3.
1074c4d2fe2STim Corringham   Status Pred;
1084c4d2fe2STim Corringham 
1094c4d2fe2STim Corringham   // In Phase 1 we record the first instruction that has a mode requirement,
1104c4d2fe2STim Corringham   // which is used in Phase 3 if we need to insert a mode change.
1114c4d2fe2STim Corringham   MachineInstr *FirstInsertionPoint;
1124c4d2fe2STim Corringham 
1134c4d2fe2STim Corringham   BlockData() : FirstInsertionPoint(nullptr) {};
1144c4d2fe2STim Corringham };
1154c4d2fe2STim Corringham 
1164c4d2fe2STim Corringham namespace {
1174c4d2fe2STim Corringham 
1184c4d2fe2STim Corringham class SIModeRegister : public MachineFunctionPass {
1194c4d2fe2STim Corringham public:
1204c4d2fe2STim Corringham   static char ID;
1214c4d2fe2STim Corringham 
1224c4d2fe2STim Corringham   std::vector<std::unique_ptr<BlockData>> BlockInfo;
1234c4d2fe2STim Corringham   std::queue<MachineBasicBlock *> Phase2List;
1244c4d2fe2STim Corringham 
1254c4d2fe2STim Corringham   // The default mode register setting currently only caters for the floating
1264c4d2fe2STim Corringham   // point double precision rounding mode.
1274c4d2fe2STim Corringham   // We currently assume the default rounding mode is Round to Nearest
1284c4d2fe2STim Corringham   // NOTE: this should come from a per function rounding mode setting once such
1294c4d2fe2STim Corringham   // a setting exists.
1304c4d2fe2STim Corringham   unsigned DefaultMode = FP_ROUND_ROUND_TO_NEAREST;
1314c4d2fe2STim Corringham   Status DefaultStatus =
1324c4d2fe2STim Corringham       Status(FP_ROUND_MODE_DP(0x3), FP_ROUND_MODE_DP(DefaultMode));
1334c4d2fe2STim Corringham 
1344c4d2fe2STim Corringham public:
1354c4d2fe2STim Corringham   SIModeRegister() : MachineFunctionPass(ID) {}
1364c4d2fe2STim Corringham 
1374c4d2fe2STim Corringham   bool runOnMachineFunction(MachineFunction &MF) override;
1384c4d2fe2STim Corringham 
1394c4d2fe2STim Corringham   void getAnalysisUsage(AnalysisUsage &AU) const override {
1404c4d2fe2STim Corringham     AU.setPreservesCFG();
1414c4d2fe2STim Corringham     MachineFunctionPass::getAnalysisUsage(AU);
1424c4d2fe2STim Corringham   }
1434c4d2fe2STim Corringham 
1444c4d2fe2STim Corringham   void processBlockPhase1(MachineBasicBlock &MBB, const SIInstrInfo *TII);
1454c4d2fe2STim Corringham 
1464c4d2fe2STim Corringham   void processBlockPhase2(MachineBasicBlock &MBB, const SIInstrInfo *TII);
1474c4d2fe2STim Corringham 
1484c4d2fe2STim Corringham   void processBlockPhase3(MachineBasicBlock &MBB, const SIInstrInfo *TII);
1494c4d2fe2STim Corringham 
1504c4d2fe2STim Corringham   Status getInstructionMode(MachineInstr &MI, const SIInstrInfo *TII);
1514c4d2fe2STim Corringham 
1524c4d2fe2STim Corringham   void insertSetreg(MachineBasicBlock &MBB, MachineInstr *I,
1534c4d2fe2STim Corringham                     const SIInstrInfo *TII, Status InstrMode);
1544c4d2fe2STim Corringham };
1554c4d2fe2STim Corringham } // End anonymous namespace.
1564c4d2fe2STim Corringham 
1574c4d2fe2STim Corringham INITIALIZE_PASS(SIModeRegister, DEBUG_TYPE,
1584c4d2fe2STim Corringham                 "Insert required mode register values", false, false)
1594c4d2fe2STim Corringham 
1604c4d2fe2STim Corringham char SIModeRegister::ID = 0;
1614c4d2fe2STim Corringham 
1624c4d2fe2STim Corringham char &llvm::SIModeRegisterID = SIModeRegister::ID;
1634c4d2fe2STim Corringham 
1644c4d2fe2STim Corringham FunctionPass *llvm::createSIModeRegisterPass() { return new SIModeRegister(); }
1654c4d2fe2STim Corringham 
1664c4d2fe2STim Corringham // Determine the Mode register setting required for this instruction.
1674c4d2fe2STim Corringham // Instructions which don't use the Mode register return a null Status.
1684c4d2fe2STim Corringham // Note this currently only deals with instructions that use the floating point
1694c4d2fe2STim Corringham // double precision setting.
1704c4d2fe2STim Corringham Status SIModeRegister::getInstructionMode(MachineInstr &MI,
1714c4d2fe2STim Corringham                                           const SIInstrInfo *TII) {
1724c4d2fe2STim Corringham   if (TII->usesFPDPRounding(MI)) {
1734c4d2fe2STim Corringham     switch (MI.getOpcode()) {
1744c4d2fe2STim Corringham     case AMDGPU::V_INTERP_P1LL_F16:
1754c4d2fe2STim Corringham     case AMDGPU::V_INTERP_P1LV_F16:
1764c4d2fe2STim Corringham     case AMDGPU::V_INTERP_P2_F16:
1774c4d2fe2STim Corringham       // f16 interpolation instructions need double precision round to zero
1784c4d2fe2STim Corringham       return Status(FP_ROUND_MODE_DP(3),
1794c4d2fe2STim Corringham                     FP_ROUND_MODE_DP(FP_ROUND_ROUND_TO_ZERO));
1804c4d2fe2STim Corringham     default:
1814c4d2fe2STim Corringham       return DefaultStatus;
1824c4d2fe2STim Corringham     }
1834c4d2fe2STim Corringham   }
1844c4d2fe2STim Corringham   return Status();
1854c4d2fe2STim Corringham }
1864c4d2fe2STim Corringham 
1874c4d2fe2STim Corringham // Insert a setreg instruction to update the Mode register.
1884c4d2fe2STim Corringham // It is possible (though unlikely) for an instruction to require a change to
1894c4d2fe2STim Corringham // the value of disjoint parts of the Mode register when we don't know the
1904c4d2fe2STim Corringham // value of the intervening bits. In that case we need to use more than one
1914c4d2fe2STim Corringham // setreg instruction.
1924c4d2fe2STim Corringham void SIModeRegister::insertSetreg(MachineBasicBlock &MBB, MachineInstr *MI,
1934c4d2fe2STim Corringham                                   const SIInstrInfo *TII, Status InstrMode) {
1944c4d2fe2STim Corringham   while (InstrMode.Mask) {
1954c4d2fe2STim Corringham     unsigned Offset = countTrailingZeros<unsigned>(InstrMode.Mask);
1964c4d2fe2STim Corringham     unsigned Width = countTrailingOnes<unsigned>(InstrMode.Mask >> Offset);
1974c4d2fe2STim Corringham     unsigned Value = (InstrMode.Mode >> Offset) & ((1 << Width) - 1);
1984c4d2fe2STim Corringham     BuildMI(MBB, MI, 0, TII->get(AMDGPU::S_SETREG_IMM32_B32))
1994c4d2fe2STim Corringham         .addImm(Value)
2004c4d2fe2STim Corringham         .addImm(((Width - 1) << AMDGPU::Hwreg::WIDTH_M1_SHIFT_) |
2014c4d2fe2STim Corringham                 (Offset << AMDGPU::Hwreg::OFFSET_SHIFT_) |
2024c4d2fe2STim Corringham                 (AMDGPU::Hwreg::ID_MODE << AMDGPU::Hwreg::ID_SHIFT_));
2034c4d2fe2STim Corringham     ++NumSetregInserted;
2042faadb15STim Corringham     InstrMode.Mask &= ~(((1 << Width) - 1) << Offset);
2054c4d2fe2STim Corringham   }
2064c4d2fe2STim Corringham }
2074c4d2fe2STim Corringham 
2084c4d2fe2STim Corringham // In Phase 1 we iterate through the instructions of the block and for each
2094c4d2fe2STim Corringham // instruction we get its mode usage. If the instruction uses the Mode register
2104c4d2fe2STim Corringham // we:
2114c4d2fe2STim Corringham // - update the Change status, which tracks the changes to the Mode register
2124c4d2fe2STim Corringham //   made by this block
2134c4d2fe2STim Corringham // - if this instruction's requirements are compatible with the current setting
2144c4d2fe2STim Corringham //   of the Mode register we merge the modes
2154c4d2fe2STim Corringham // - if it isn't compatible and an InsertionPoint isn't set, then we set the
2164c4d2fe2STim Corringham //   InsertionPoint to the current instruction, and we remember the current
2174c4d2fe2STim Corringham //   mode
2184c4d2fe2STim Corringham // - if it isn't compatible and InsertionPoint is set we insert a seteg before
2194c4d2fe2STim Corringham //   that instruction (unless this instruction forms part of the block's
2204c4d2fe2STim Corringham //   entry requirements in which case the insertion is deferred until Phase 3
2214c4d2fe2STim Corringham //   when predecessor exit values are known), and move the insertion point to
2224c4d2fe2STim Corringham //   this instruction
2234c4d2fe2STim Corringham // - if this is a setreg instruction we treat it as an incompatible instruction.
2244c4d2fe2STim Corringham //   This is sub-optimal but avoids some nasty corner cases, and is expected to
2254c4d2fe2STim Corringham //   occur very rarely.
2264c4d2fe2STim Corringham // - on exit we have set the Require, Change, and initial Exit modes.
2274c4d2fe2STim Corringham void SIModeRegister::processBlockPhase1(MachineBasicBlock &MBB,
2284c4d2fe2STim Corringham                                         const SIInstrInfo *TII) {
229*0eaee545SJonas Devlieghere   auto NewInfo = std::make_unique<BlockData>();
2304c4d2fe2STim Corringham   MachineInstr *InsertionPoint = nullptr;
2314c4d2fe2STim Corringham   // RequirePending is used to indicate whether we are collecting the initial
2324c4d2fe2STim Corringham   // requirements for the block, and need to defer the first InsertionPoint to
2334c4d2fe2STim Corringham   // Phase 3. It is set to false once we have set FirstInsertionPoint, or when
2344c4d2fe2STim Corringham   // we discover an explict setreg that means this block doesn't have any
2354c4d2fe2STim Corringham   // initial requirements.
2364c4d2fe2STim Corringham   bool RequirePending = true;
2374c4d2fe2STim Corringham   Status IPChange;
2384c4d2fe2STim Corringham   for (MachineInstr &MI : MBB) {
2394c4d2fe2STim Corringham     Status InstrMode = getInstructionMode(MI, TII);
2404c4d2fe2STim Corringham     if ((MI.getOpcode() == AMDGPU::S_SETREG_B32) ||
2414c4d2fe2STim Corringham         (MI.getOpcode() == AMDGPU::S_SETREG_IMM32_B32)) {
2424c4d2fe2STim Corringham       // We preserve any explicit mode register setreg instruction we encounter,
2434c4d2fe2STim Corringham       // as we assume it has been inserted by a higher authority (this is
2444c4d2fe2STim Corringham       // likely to be a very rare occurrence).
2454c4d2fe2STim Corringham       unsigned Dst = TII->getNamedOperand(MI, AMDGPU::OpName::simm16)->getImm();
2464c4d2fe2STim Corringham       if (((Dst & AMDGPU::Hwreg::ID_MASK_) >> AMDGPU::Hwreg::ID_SHIFT_) !=
2474c4d2fe2STim Corringham           AMDGPU::Hwreg::ID_MODE)
2484c4d2fe2STim Corringham         continue;
2494c4d2fe2STim Corringham 
2504c4d2fe2STim Corringham       unsigned Width = ((Dst & AMDGPU::Hwreg::WIDTH_M1_MASK_) >>
2514c4d2fe2STim Corringham                         AMDGPU::Hwreg::WIDTH_M1_SHIFT_) +
2524c4d2fe2STim Corringham                        1;
2534c4d2fe2STim Corringham       unsigned Offset =
2544c4d2fe2STim Corringham           (Dst & AMDGPU::Hwreg::OFFSET_MASK_) >> AMDGPU::Hwreg::OFFSET_SHIFT_;
2554c4d2fe2STim Corringham       unsigned Mask = ((1 << Width) - 1) << Offset;
2564c4d2fe2STim Corringham 
2574c4d2fe2STim Corringham       // If an InsertionPoint is set we will insert a setreg there.
2584c4d2fe2STim Corringham       if (InsertionPoint) {
2594c4d2fe2STim Corringham         insertSetreg(MBB, InsertionPoint, TII, IPChange.delta(NewInfo->Change));
2604c4d2fe2STim Corringham         InsertionPoint = nullptr;
2614c4d2fe2STim Corringham       }
2624c4d2fe2STim Corringham       // If this is an immediate then we know the value being set, but if it is
2634c4d2fe2STim Corringham       // not an immediate then we treat the modified bits of the mode register
2644c4d2fe2STim Corringham       // as unknown.
2654c4d2fe2STim Corringham       if (MI.getOpcode() == AMDGPU::S_SETREG_IMM32_B32) {
2664c4d2fe2STim Corringham         unsigned Val = TII->getNamedOperand(MI, AMDGPU::OpName::imm)->getImm();
2674c4d2fe2STim Corringham         unsigned Mode = (Val << Offset) & Mask;
2684c4d2fe2STim Corringham         Status Setreg = Status(Mask, Mode);
2694c4d2fe2STim Corringham         // If we haven't already set the initial requirements for the block we
2704c4d2fe2STim Corringham         // don't need to as the requirements start from this explicit setreg.
2714c4d2fe2STim Corringham         RequirePending = false;
2724c4d2fe2STim Corringham         NewInfo->Change = NewInfo->Change.merge(Setreg);
2734c4d2fe2STim Corringham       } else {
2744c4d2fe2STim Corringham         NewInfo->Change = NewInfo->Change.mergeUnknown(Mask);
2754c4d2fe2STim Corringham       }
2764c4d2fe2STim Corringham     } else if (!NewInfo->Change.isCompatible(InstrMode)) {
2774c4d2fe2STim Corringham       // This instruction uses the Mode register and its requirements aren't
2784c4d2fe2STim Corringham       // compatible with the current mode.
2794c4d2fe2STim Corringham       if (InsertionPoint) {
2804c4d2fe2STim Corringham         // If the required mode change cannot be included in the current
2814c4d2fe2STim Corringham         // InsertionPoint changes, we need a setreg and start a new
2824c4d2fe2STim Corringham         // InsertionPoint.
2834c4d2fe2STim Corringham         if (!IPChange.delta(NewInfo->Change).isCombinable(InstrMode)) {
2844c4d2fe2STim Corringham           if (RequirePending) {
2854c4d2fe2STim Corringham             // This is the first insertionPoint in the block so we will defer
2864c4d2fe2STim Corringham             // the insertion of the setreg to Phase 3 where we know whether or
2874c4d2fe2STim Corringham             // not it is actually needed.
2884c4d2fe2STim Corringham             NewInfo->FirstInsertionPoint = InsertionPoint;
2894c4d2fe2STim Corringham             NewInfo->Require = NewInfo->Change;
2904c4d2fe2STim Corringham             RequirePending = false;
2914c4d2fe2STim Corringham           } else {
2924c4d2fe2STim Corringham             insertSetreg(MBB, InsertionPoint, TII,
2934c4d2fe2STim Corringham                          IPChange.delta(NewInfo->Change));
2944c4d2fe2STim Corringham             IPChange = NewInfo->Change;
2954c4d2fe2STim Corringham           }
2964c4d2fe2STim Corringham           // Set the new InsertionPoint
2974c4d2fe2STim Corringham           InsertionPoint = &MI;
2984c4d2fe2STim Corringham         }
2994c4d2fe2STim Corringham         NewInfo->Change = NewInfo->Change.merge(InstrMode);
3004c4d2fe2STim Corringham       } else {
3014c4d2fe2STim Corringham         // No InsertionPoint is currently set - this is either the first in
3024c4d2fe2STim Corringham         // the block or we have previously seen an explicit setreg.
3034c4d2fe2STim Corringham         InsertionPoint = &MI;
3044c4d2fe2STim Corringham         IPChange = NewInfo->Change;
3054c4d2fe2STim Corringham         NewInfo->Change = NewInfo->Change.merge(InstrMode);
3064c4d2fe2STim Corringham       }
3074c4d2fe2STim Corringham     }
3084c4d2fe2STim Corringham   }
3094c4d2fe2STim Corringham   if (RequirePending) {
3104c4d2fe2STim Corringham     // If we haven't yet set the initial requirements for the block we set them
3114c4d2fe2STim Corringham     // now.
3124c4d2fe2STim Corringham     NewInfo->FirstInsertionPoint = InsertionPoint;
3134c4d2fe2STim Corringham     NewInfo->Require = NewInfo->Change;
3144c4d2fe2STim Corringham   } else if (InsertionPoint) {
3154c4d2fe2STim Corringham     // We need to insert a setreg at the InsertionPoint
3164c4d2fe2STim Corringham     insertSetreg(MBB, InsertionPoint, TII, IPChange.delta(NewInfo->Change));
3174c4d2fe2STim Corringham   }
3184c4d2fe2STim Corringham   NewInfo->Exit = NewInfo->Change;
3194c4d2fe2STim Corringham   BlockInfo[MBB.getNumber()] = std::move(NewInfo);
3204c4d2fe2STim Corringham }
3214c4d2fe2STim Corringham 
3224c4d2fe2STim Corringham // In Phase 2 we revisit each block and calculate the common Mode register
3234c4d2fe2STim Corringham // value provided by all predecessor blocks. If the Exit value for the block
3244c4d2fe2STim Corringham // is changed, then we add the successor blocks to the worklist so that the
3254c4d2fe2STim Corringham // exit value is propagated.
3264c4d2fe2STim Corringham void SIModeRegister::processBlockPhase2(MachineBasicBlock &MBB,
3274c4d2fe2STim Corringham                                         const SIInstrInfo *TII) {
3284c4d2fe2STim Corringham //  BlockData *BI = BlockInfo[MBB.getNumber()];
3294c4d2fe2STim Corringham   unsigned ThisBlock = MBB.getNumber();
3304c4d2fe2STim Corringham   if (MBB.pred_empty()) {
3314c4d2fe2STim Corringham     // There are no predecessors, so use the default starting status.
3324c4d2fe2STim Corringham     BlockInfo[ThisBlock]->Pred = DefaultStatus;
3334c4d2fe2STim Corringham   } else {
3344c4d2fe2STim Corringham     // Build a status that is common to all the predecessors by intersecting
3354c4d2fe2STim Corringham     // all the predecessor exit status values.
3364c4d2fe2STim Corringham     MachineBasicBlock::pred_iterator P = MBB.pred_begin(), E = MBB.pred_end();
3374c4d2fe2STim Corringham     MachineBasicBlock &PB = *(*P);
3384c4d2fe2STim Corringham     BlockInfo[ThisBlock]->Pred = BlockInfo[PB.getNumber()]->Exit;
3394c4d2fe2STim Corringham 
3404c4d2fe2STim Corringham     for (P = std::next(P); P != E; P = std::next(P)) {
3414c4d2fe2STim Corringham       MachineBasicBlock *Pred = *P;
3424c4d2fe2STim Corringham       BlockInfo[ThisBlock]->Pred = BlockInfo[ThisBlock]->Pred.intersect(BlockInfo[Pred->getNumber()]->Exit);
3434c4d2fe2STim Corringham     }
3444c4d2fe2STim Corringham   }
3454c4d2fe2STim Corringham   Status TmpStatus = BlockInfo[ThisBlock]->Pred.merge(BlockInfo[ThisBlock]->Change);
3464c4d2fe2STim Corringham   if (BlockInfo[ThisBlock]->Exit != TmpStatus) {
3474c4d2fe2STim Corringham     BlockInfo[ThisBlock]->Exit = TmpStatus;
3484c4d2fe2STim Corringham     // Add the successors to the work list so we can propagate the changed exit
3494c4d2fe2STim Corringham     // status.
3504c4d2fe2STim Corringham     for (MachineBasicBlock::succ_iterator S = MBB.succ_begin(),
3514c4d2fe2STim Corringham                                           E = MBB.succ_end();
3524c4d2fe2STim Corringham          S != E; S = std::next(S)) {
3534c4d2fe2STim Corringham       MachineBasicBlock &B = *(*S);
3544c4d2fe2STim Corringham       Phase2List.push(&B);
3554c4d2fe2STim Corringham     }
3564c4d2fe2STim Corringham   }
3574c4d2fe2STim Corringham }
3584c4d2fe2STim Corringham 
3594c4d2fe2STim Corringham // In Phase 3 we revisit each block and if it has an insertion point defined we
3604c4d2fe2STim Corringham // check whether the predecessor mode meets the block's entry requirements. If
3614c4d2fe2STim Corringham // not we insert an appropriate setreg instruction to modify the Mode register.
3624c4d2fe2STim Corringham void SIModeRegister::processBlockPhase3(MachineBasicBlock &MBB,
3634c4d2fe2STim Corringham                                         const SIInstrInfo *TII) {
3644c4d2fe2STim Corringham //  BlockData *BI = BlockInfo[MBB.getNumber()];
3654c4d2fe2STim Corringham   unsigned ThisBlock = MBB.getNumber();
3664c4d2fe2STim Corringham   if (!BlockInfo[ThisBlock]->Pred.isCompatible(BlockInfo[ThisBlock]->Require)) {
3674c4d2fe2STim Corringham     Status Delta = BlockInfo[ThisBlock]->Pred.delta(BlockInfo[ThisBlock]->Require);
3684c4d2fe2STim Corringham     if (BlockInfo[ThisBlock]->FirstInsertionPoint)
3694c4d2fe2STim Corringham       insertSetreg(MBB, BlockInfo[ThisBlock]->FirstInsertionPoint, TII, Delta);
3704c4d2fe2STim Corringham     else
3714c4d2fe2STim Corringham       insertSetreg(MBB, &MBB.instr_front(), TII, Delta);
3724c4d2fe2STim Corringham   }
3734c4d2fe2STim Corringham }
3744c4d2fe2STim Corringham 
3754c4d2fe2STim Corringham bool SIModeRegister::runOnMachineFunction(MachineFunction &MF) {
3764c4d2fe2STim Corringham   BlockInfo.resize(MF.getNumBlockIDs());
3774c4d2fe2STim Corringham   const GCNSubtarget &ST = MF.getSubtarget<GCNSubtarget>();
3784c4d2fe2STim Corringham   const SIInstrInfo *TII = ST.getInstrInfo();
3794c4d2fe2STim Corringham 
3804c4d2fe2STim Corringham   // Processing is performed in a number of phases
3814c4d2fe2STim Corringham 
3824c4d2fe2STim Corringham   // Phase 1 - determine the initial mode required by each block, and add setreg
3834c4d2fe2STim Corringham   // instructions for intra block requirements.
3844c4d2fe2STim Corringham   for (MachineBasicBlock &BB : MF)
3854c4d2fe2STim Corringham     processBlockPhase1(BB, TII);
3864c4d2fe2STim Corringham 
3874c4d2fe2STim Corringham   // Phase 2 - determine the exit mode from each block. We add all blocks to the
3884c4d2fe2STim Corringham   // list here, but will also add any that need to be revisited during Phase 2
3894c4d2fe2STim Corringham   // processing.
3904c4d2fe2STim Corringham   for (MachineBasicBlock &BB : MF)
3914c4d2fe2STim Corringham     Phase2List.push(&BB);
3924c4d2fe2STim Corringham   while (!Phase2List.empty()) {
3934c4d2fe2STim Corringham     processBlockPhase2(*Phase2List.front(), TII);
3944c4d2fe2STim Corringham     Phase2List.pop();
3954c4d2fe2STim Corringham   }
3964c4d2fe2STim Corringham 
3974c4d2fe2STim Corringham   // Phase 3 - add an initial setreg to each block where the required entry mode
3984c4d2fe2STim Corringham   // is not satisfied by the exit mode of all its predecessors.
3994c4d2fe2STim Corringham   for (MachineBasicBlock &BB : MF)
4004c4d2fe2STim Corringham     processBlockPhase3(BB, TII);
4014c4d2fe2STim Corringham 
4024c4d2fe2STim Corringham   BlockInfo.clear();
4034c4d2fe2STim Corringham 
4044c4d2fe2STim Corringham   return NumSetregInserted > 0;
4054c4d2fe2STim Corringham }
406