14c4d2fe2STim Corringham //===-- SIModeRegister.cpp - Mode Register --------------------------------===//
24c4d2fe2STim Corringham //
32946cd70SChandler Carruth // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
42946cd70SChandler Carruth // See https://llvm.org/LICENSE.txt for license information.
52946cd70SChandler Carruth // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
64c4d2fe2STim Corringham //
74c4d2fe2STim Corringham //===----------------------------------------------------------------------===//
84c4d2fe2STim Corringham /// \file
94c4d2fe2STim Corringham /// This pass inserts changes to the Mode register settings as required.
104c4d2fe2STim Corringham /// Note that currently it only deals with the Double Precision Floating Point
114c4d2fe2STim Corringham /// rounding mode setting, but is intended to be generic enough to be easily
124c4d2fe2STim Corringham /// expanded.
134c4d2fe2STim Corringham ///
144c4d2fe2STim Corringham //===----------------------------------------------------------------------===//
154c4d2fe2STim Corringham //
164c4d2fe2STim Corringham #include "AMDGPU.h"
17560d7e04Sdfukalov #include "GCNSubtarget.h"
18560d7e04Sdfukalov #include "MCTargetDesc/AMDGPUMCTargetDesc.h"
194c4d2fe2STim Corringham #include "llvm/ADT/Statistic.h"
20*989f1c72Sserge-sans-paille #include "llvm/CodeGen/MachineFunctionPass.h"
214c4d2fe2STim Corringham #include <queue>
224c4d2fe2STim Corringham
234c4d2fe2STim Corringham #define DEBUG_TYPE "si-mode-register"
244c4d2fe2STim Corringham
254c4d2fe2STim Corringham STATISTIC(NumSetregInserted, "Number of setreg of mode register inserted.");
264c4d2fe2STim Corringham
274c4d2fe2STim Corringham using namespace llvm;
284c4d2fe2STim Corringham
294c4d2fe2STim Corringham struct Status {
304c4d2fe2STim Corringham // Mask is a bitmask where a '1' indicates the corresponding Mode bit has a
314c4d2fe2STim Corringham // known value
324c4d2fe2STim Corringham unsigned Mask;
334c4d2fe2STim Corringham unsigned Mode;
344c4d2fe2STim Corringham
StatusStatus354c4d2fe2STim Corringham Status() : Mask(0), Mode(0){};
364c4d2fe2STim Corringham
StatusStatus37be9ade93SSimon Pilgrim Status(unsigned NewMask, unsigned NewMode) : Mask(NewMask), Mode(NewMode) {
384c4d2fe2STim Corringham Mode &= Mask;
394c4d2fe2STim Corringham };
404c4d2fe2STim Corringham
414c4d2fe2STim Corringham // merge two status values such that only values that don't conflict are
424c4d2fe2STim Corringham // preserved
mergeStatus434c4d2fe2STim Corringham Status merge(const Status &S) const {
444c4d2fe2STim Corringham return Status((Mask | S.Mask), ((Mode & ~S.Mask) | (S.Mode & S.Mask)));
454c4d2fe2STim Corringham }
464c4d2fe2STim Corringham
474c4d2fe2STim Corringham // merge an unknown value by using the unknown value's mask to remove bits
484c4d2fe2STim Corringham // from the result
mergeUnknownStatus494c4d2fe2STim Corringham Status mergeUnknown(unsigned newMask) {
504c4d2fe2STim Corringham return Status(Mask & ~newMask, Mode & ~newMask);
514c4d2fe2STim Corringham }
524c4d2fe2STim Corringham
534c4d2fe2STim Corringham // intersect two Status values to produce a mode and mask that is a subset
544c4d2fe2STim Corringham // of both values
intersectStatus554c4d2fe2STim Corringham Status intersect(const Status &S) const {
564c4d2fe2STim Corringham unsigned NewMask = (Mask & S.Mask) & (Mode ^ ~S.Mode);
574c4d2fe2STim Corringham unsigned NewMode = (Mode & NewMask);
584c4d2fe2STim Corringham return Status(NewMask, NewMode);
594c4d2fe2STim Corringham }
604c4d2fe2STim Corringham
614c4d2fe2STim Corringham // produce the delta required to change the Mode to the required Mode
deltaStatus624c4d2fe2STim Corringham Status delta(const Status &S) const {
634c4d2fe2STim Corringham return Status((S.Mask & (Mode ^ S.Mode)) | (~Mask & S.Mask), S.Mode);
644c4d2fe2STim Corringham }
654c4d2fe2STim Corringham
operator ==Status664c4d2fe2STim Corringham bool operator==(const Status &S) const {
674c4d2fe2STim Corringham return (Mask == S.Mask) && (Mode == S.Mode);
684c4d2fe2STim Corringham }
694c4d2fe2STim Corringham
operator !=Status704c4d2fe2STim Corringham bool operator!=(const Status &S) const { return !(*this == S); }
714c4d2fe2STim Corringham
isCompatibleStatus724c4d2fe2STim Corringham bool isCompatible(Status &S) {
734c4d2fe2STim Corringham return ((Mask & S.Mask) == S.Mask) && ((Mode & S.Mask) == S.Mode);
744c4d2fe2STim Corringham }
754c4d2fe2STim Corringham
isCombinableStatus7696ecead5STim Corringham bool isCombinable(Status &S) { return !(Mask & S.Mask) || isCompatible(S); }
774c4d2fe2STim Corringham };
784c4d2fe2STim Corringham
794c4d2fe2STim Corringham class BlockData {
804c4d2fe2STim Corringham public:
814c4d2fe2STim Corringham // The Status that represents the mode register settings required by the
824c4d2fe2STim Corringham // FirstInsertionPoint (if any) in this block. Calculated in Phase 1.
834c4d2fe2STim Corringham Status Require;
844c4d2fe2STim Corringham
854c4d2fe2STim Corringham // The Status that represents the net changes to the Mode register made by
864c4d2fe2STim Corringham // this block, Calculated in Phase 1.
874c4d2fe2STim Corringham Status Change;
884c4d2fe2STim Corringham
894c4d2fe2STim Corringham // The Status that represents the mode register settings on exit from this
904c4d2fe2STim Corringham // block. Calculated in Phase 2.
914c4d2fe2STim Corringham Status Exit;
924c4d2fe2STim Corringham
934c4d2fe2STim Corringham // The Status that represents the intersection of exit Mode register settings
944c4d2fe2STim Corringham // from all predecessor blocks. Calculated in Phase 2, and used by Phase 3.
954c4d2fe2STim Corringham Status Pred;
964c4d2fe2STim Corringham
974c4d2fe2STim Corringham // In Phase 1 we record the first instruction that has a mode requirement,
984c4d2fe2STim Corringham // which is used in Phase 3 if we need to insert a mode change.
994c4d2fe2STim Corringham MachineInstr *FirstInsertionPoint;
1004c4d2fe2STim Corringham
101c3b3b999STim Corringham // A flag to indicate whether an Exit value has been set (we can't tell by
102c3b3b999STim Corringham // examining the Exit value itself as all values may be valid results).
103c3b3b999STim Corringham bool ExitSet;
104c3b3b999STim Corringham
BlockData()105c3b3b999STim Corringham BlockData() : FirstInsertionPoint(nullptr), ExitSet(false){};
1064c4d2fe2STim Corringham };
1074c4d2fe2STim Corringham
1084c4d2fe2STim Corringham namespace {
1094c4d2fe2STim Corringham
1104c4d2fe2STim Corringham class SIModeRegister : public MachineFunctionPass {
1114c4d2fe2STim Corringham public:
1124c4d2fe2STim Corringham static char ID;
1134c4d2fe2STim Corringham
1144c4d2fe2STim Corringham std::vector<std::unique_ptr<BlockData>> BlockInfo;
1154c4d2fe2STim Corringham std::queue<MachineBasicBlock *> Phase2List;
1164c4d2fe2STim Corringham
1174c4d2fe2STim Corringham // The default mode register setting currently only caters for the floating
1184c4d2fe2STim Corringham // point double precision rounding mode.
1194c4d2fe2STim Corringham // We currently assume the default rounding mode is Round to Nearest
1204c4d2fe2STim Corringham // NOTE: this should come from a per function rounding mode setting once such
1214c4d2fe2STim Corringham // a setting exists.
1224c4d2fe2STim Corringham unsigned DefaultMode = FP_ROUND_ROUND_TO_NEAREST;
1234c4d2fe2STim Corringham Status DefaultStatus =
1244c4d2fe2STim Corringham Status(FP_ROUND_MODE_DP(0x3), FP_ROUND_MODE_DP(DefaultMode));
1254c4d2fe2STim Corringham
126c3b3b999STim Corringham bool Changed = false;
127c3b3b999STim Corringham
1284c4d2fe2STim Corringham public:
SIModeRegister()1294c4d2fe2STim Corringham SIModeRegister() : MachineFunctionPass(ID) {}
1304c4d2fe2STim Corringham
1314c4d2fe2STim Corringham bool runOnMachineFunction(MachineFunction &MF) override;
1324c4d2fe2STim Corringham
getAnalysisUsage(AnalysisUsage & AU) const1334c4d2fe2STim Corringham void getAnalysisUsage(AnalysisUsage &AU) const override {
1344c4d2fe2STim Corringham AU.setPreservesCFG();
1354c4d2fe2STim Corringham MachineFunctionPass::getAnalysisUsage(AU);
1364c4d2fe2STim Corringham }
1374c4d2fe2STim Corringham
1384c4d2fe2STim Corringham void processBlockPhase1(MachineBasicBlock &MBB, const SIInstrInfo *TII);
1394c4d2fe2STim Corringham
1404c4d2fe2STim Corringham void processBlockPhase2(MachineBasicBlock &MBB, const SIInstrInfo *TII);
1414c4d2fe2STim Corringham
1424c4d2fe2STim Corringham void processBlockPhase3(MachineBasicBlock &MBB, const SIInstrInfo *TII);
1434c4d2fe2STim Corringham
1444c4d2fe2STim Corringham Status getInstructionMode(MachineInstr &MI, const SIInstrInfo *TII);
1454c4d2fe2STim Corringham
1464c4d2fe2STim Corringham void insertSetreg(MachineBasicBlock &MBB, MachineInstr *I,
1474c4d2fe2STim Corringham const SIInstrInfo *TII, Status InstrMode);
1484c4d2fe2STim Corringham };
1494c4d2fe2STim Corringham } // End anonymous namespace.
1504c4d2fe2STim Corringham
1514c4d2fe2STim Corringham INITIALIZE_PASS(SIModeRegister, DEBUG_TYPE,
1524c4d2fe2STim Corringham "Insert required mode register values", false, false)
1534c4d2fe2STim Corringham
1544c4d2fe2STim Corringham char SIModeRegister::ID = 0;
1554c4d2fe2STim Corringham
1564c4d2fe2STim Corringham char &llvm::SIModeRegisterID = SIModeRegister::ID;
1574c4d2fe2STim Corringham
createSIModeRegisterPass()1584c4d2fe2STim Corringham FunctionPass *llvm::createSIModeRegisterPass() { return new SIModeRegister(); }
1594c4d2fe2STim Corringham
1604c4d2fe2STim Corringham // Determine the Mode register setting required for this instruction.
1614c4d2fe2STim Corringham // Instructions which don't use the Mode register return a null Status.
1624c4d2fe2STim Corringham // Note this currently only deals with instructions that use the floating point
1634c4d2fe2STim Corringham // double precision setting.
getInstructionMode(MachineInstr & MI,const SIInstrInfo * TII)1644c4d2fe2STim Corringham Status SIModeRegister::getInstructionMode(MachineInstr &MI,
1654c4d2fe2STim Corringham const SIInstrInfo *TII) {
166dcb2da13SJulien Pages if (TII->usesFPDPRounding(MI) ||
167dcb2da13SJulien Pages MI.getOpcode() == AMDGPU::FPTRUNC_UPWARD_PSEUDO ||
168dcb2da13SJulien Pages MI.getOpcode() == AMDGPU::FPTRUNC_DOWNWARD_PSEUDO) {
1694c4d2fe2STim Corringham switch (MI.getOpcode()) {
1704c4d2fe2STim Corringham case AMDGPU::V_INTERP_P1LL_F16:
1714c4d2fe2STim Corringham case AMDGPU::V_INTERP_P1LV_F16:
1724c4d2fe2STim Corringham case AMDGPU::V_INTERP_P2_F16:
1734c4d2fe2STim Corringham // f16 interpolation instructions need double precision round to zero
1744c4d2fe2STim Corringham return Status(FP_ROUND_MODE_DP(3),
1754c4d2fe2STim Corringham FP_ROUND_MODE_DP(FP_ROUND_ROUND_TO_ZERO));
176dcb2da13SJulien Pages case AMDGPU::FPTRUNC_UPWARD_PSEUDO: {
177dcb2da13SJulien Pages // Replacing the pseudo by a real instruction
178dcb2da13SJulien Pages MI.setDesc(TII->get(AMDGPU::V_CVT_F16_F32_e32));
179dcb2da13SJulien Pages return Status(FP_ROUND_MODE_DP(3),
180dcb2da13SJulien Pages FP_ROUND_MODE_DP(FP_ROUND_ROUND_TO_INF));
181dcb2da13SJulien Pages }
182dcb2da13SJulien Pages case AMDGPU::FPTRUNC_DOWNWARD_PSEUDO: {
183dcb2da13SJulien Pages // Replacing the pseudo by a real instruction
184dcb2da13SJulien Pages MI.setDesc(TII->get(AMDGPU::V_CVT_F16_F32_e32));
185dcb2da13SJulien Pages return Status(FP_ROUND_MODE_DP(3),
186dcb2da13SJulien Pages FP_ROUND_MODE_DP(FP_ROUND_ROUND_TO_NEGINF));
187dcb2da13SJulien Pages }
1884c4d2fe2STim Corringham default:
1894c4d2fe2STim Corringham return DefaultStatus;
1904c4d2fe2STim Corringham }
1914c4d2fe2STim Corringham }
1924c4d2fe2STim Corringham return Status();
1934c4d2fe2STim Corringham }
1944c4d2fe2STim Corringham
1954c4d2fe2STim Corringham // Insert a setreg instruction to update the Mode register.
1964c4d2fe2STim Corringham // It is possible (though unlikely) for an instruction to require a change to
1974c4d2fe2STim Corringham // the value of disjoint parts of the Mode register when we don't know the
1984c4d2fe2STim Corringham // value of the intervening bits. In that case we need to use more than one
1994c4d2fe2STim Corringham // setreg instruction.
insertSetreg(MachineBasicBlock & MBB,MachineInstr * MI,const SIInstrInfo * TII,Status InstrMode)2004c4d2fe2STim Corringham void SIModeRegister::insertSetreg(MachineBasicBlock &MBB, MachineInstr *MI,
2014c4d2fe2STim Corringham const SIInstrInfo *TII, Status InstrMode) {
2024c4d2fe2STim Corringham while (InstrMode.Mask) {
2034c4d2fe2STim Corringham unsigned Offset = countTrailingZeros<unsigned>(InstrMode.Mask);
2044c4d2fe2STim Corringham unsigned Width = countTrailingOnes<unsigned>(InstrMode.Mask >> Offset);
2054c4d2fe2STim Corringham unsigned Value = (InstrMode.Mode >> Offset) & ((1 << Width) - 1);
2065a667c0eSKazu Hirata BuildMI(MBB, MI, nullptr, TII->get(AMDGPU::S_SETREG_IMM32_B32))
2074c4d2fe2STim Corringham .addImm(Value)
2084c4d2fe2STim Corringham .addImm(((Width - 1) << AMDGPU::Hwreg::WIDTH_M1_SHIFT_) |
2094c4d2fe2STim Corringham (Offset << AMDGPU::Hwreg::OFFSET_SHIFT_) |
2104c4d2fe2STim Corringham (AMDGPU::Hwreg::ID_MODE << AMDGPU::Hwreg::ID_SHIFT_));
2114c4d2fe2STim Corringham ++NumSetregInserted;
212c3b3b999STim Corringham Changed = true;
2132faadb15STim Corringham InstrMode.Mask &= ~(((1 << Width) - 1) << Offset);
2144c4d2fe2STim Corringham }
2154c4d2fe2STim Corringham }
2164c4d2fe2STim Corringham
2174c4d2fe2STim Corringham // In Phase 1 we iterate through the instructions of the block and for each
2184c4d2fe2STim Corringham // instruction we get its mode usage. If the instruction uses the Mode register
2194c4d2fe2STim Corringham // we:
2204c4d2fe2STim Corringham // - update the Change status, which tracks the changes to the Mode register
2214c4d2fe2STim Corringham // made by this block
2224c4d2fe2STim Corringham // - if this instruction's requirements are compatible with the current setting
2234c4d2fe2STim Corringham // of the Mode register we merge the modes
2244c4d2fe2STim Corringham // - if it isn't compatible and an InsertionPoint isn't set, then we set the
2254c4d2fe2STim Corringham // InsertionPoint to the current instruction, and we remember the current
2264c4d2fe2STim Corringham // mode
2274c4d2fe2STim Corringham // - if it isn't compatible and InsertionPoint is set we insert a seteg before
2284c4d2fe2STim Corringham // that instruction (unless this instruction forms part of the block's
2294c4d2fe2STim Corringham // entry requirements in which case the insertion is deferred until Phase 3
2304c4d2fe2STim Corringham // when predecessor exit values are known), and move the insertion point to
2314c4d2fe2STim Corringham // this instruction
2324c4d2fe2STim Corringham // - if this is a setreg instruction we treat it as an incompatible instruction.
2334c4d2fe2STim Corringham // This is sub-optimal but avoids some nasty corner cases, and is expected to
2344c4d2fe2STim Corringham // occur very rarely.
2354c4d2fe2STim Corringham // - on exit we have set the Require, Change, and initial Exit modes.
processBlockPhase1(MachineBasicBlock & MBB,const SIInstrInfo * TII)2364c4d2fe2STim Corringham void SIModeRegister::processBlockPhase1(MachineBasicBlock &MBB,
2374c4d2fe2STim Corringham const SIInstrInfo *TII) {
2380eaee545SJonas Devlieghere auto NewInfo = std::make_unique<BlockData>();
2394c4d2fe2STim Corringham MachineInstr *InsertionPoint = nullptr;
2404c4d2fe2STim Corringham // RequirePending is used to indicate whether we are collecting the initial
2414c4d2fe2STim Corringham // requirements for the block, and need to defer the first InsertionPoint to
2424c4d2fe2STim Corringham // Phase 3. It is set to false once we have set FirstInsertionPoint, or when
243d1f45ed5SNeubauer, Sebastian // we discover an explicit setreg that means this block doesn't have any
2444c4d2fe2STim Corringham // initial requirements.
2454c4d2fe2STim Corringham bool RequirePending = true;
2464c4d2fe2STim Corringham Status IPChange;
2474c4d2fe2STim Corringham for (MachineInstr &MI : MBB) {
2484c4d2fe2STim Corringham Status InstrMode = getInstructionMode(MI, TII);
24990777e29SJay Foad if (MI.getOpcode() == AMDGPU::S_SETREG_B32 ||
25090777e29SJay Foad MI.getOpcode() == AMDGPU::S_SETREG_B32_mode ||
25190777e29SJay Foad MI.getOpcode() == AMDGPU::S_SETREG_IMM32_B32 ||
25290777e29SJay Foad MI.getOpcode() == AMDGPU::S_SETREG_IMM32_B32_mode) {
2534c4d2fe2STim Corringham // We preserve any explicit mode register setreg instruction we encounter,
2544c4d2fe2STim Corringham // as we assume it has been inserted by a higher authority (this is
2554c4d2fe2STim Corringham // likely to be a very rare occurrence).
2564c4d2fe2STim Corringham unsigned Dst = TII->getNamedOperand(MI, AMDGPU::OpName::simm16)->getImm();
2574c4d2fe2STim Corringham if (((Dst & AMDGPU::Hwreg::ID_MASK_) >> AMDGPU::Hwreg::ID_SHIFT_) !=
2584c4d2fe2STim Corringham AMDGPU::Hwreg::ID_MODE)
2594c4d2fe2STim Corringham continue;
2604c4d2fe2STim Corringham
2614c4d2fe2STim Corringham unsigned Width = ((Dst & AMDGPU::Hwreg::WIDTH_M1_MASK_) >>
2624c4d2fe2STim Corringham AMDGPU::Hwreg::WIDTH_M1_SHIFT_) +
2634c4d2fe2STim Corringham 1;
2644c4d2fe2STim Corringham unsigned Offset =
2654c4d2fe2STim Corringham (Dst & AMDGPU::Hwreg::OFFSET_MASK_) >> AMDGPU::Hwreg::OFFSET_SHIFT_;
2664c4d2fe2STim Corringham unsigned Mask = ((1 << Width) - 1) << Offset;
2674c4d2fe2STim Corringham
2684c4d2fe2STim Corringham // If an InsertionPoint is set we will insert a setreg there.
2694c4d2fe2STim Corringham if (InsertionPoint) {
2704c4d2fe2STim Corringham insertSetreg(MBB, InsertionPoint, TII, IPChange.delta(NewInfo->Change));
2714c4d2fe2STim Corringham InsertionPoint = nullptr;
2724c4d2fe2STim Corringham }
2734c4d2fe2STim Corringham // If this is an immediate then we know the value being set, but if it is
2744c4d2fe2STim Corringham // not an immediate then we treat the modified bits of the mode register
2754c4d2fe2STim Corringham // as unknown.
27690777e29SJay Foad if (MI.getOpcode() == AMDGPU::S_SETREG_IMM32_B32 ||
27790777e29SJay Foad MI.getOpcode() == AMDGPU::S_SETREG_IMM32_B32_mode) {
2784c4d2fe2STim Corringham unsigned Val = TII->getNamedOperand(MI, AMDGPU::OpName::imm)->getImm();
2794c4d2fe2STim Corringham unsigned Mode = (Val << Offset) & Mask;
2804c4d2fe2STim Corringham Status Setreg = Status(Mask, Mode);
2814c4d2fe2STim Corringham // If we haven't already set the initial requirements for the block we
2824c4d2fe2STim Corringham // don't need to as the requirements start from this explicit setreg.
2834c4d2fe2STim Corringham RequirePending = false;
2844c4d2fe2STim Corringham NewInfo->Change = NewInfo->Change.merge(Setreg);
2854c4d2fe2STim Corringham } else {
2864c4d2fe2STim Corringham NewInfo->Change = NewInfo->Change.mergeUnknown(Mask);
2874c4d2fe2STim Corringham }
2884c4d2fe2STim Corringham } else if (!NewInfo->Change.isCompatible(InstrMode)) {
2894c4d2fe2STim Corringham // This instruction uses the Mode register and its requirements aren't
2904c4d2fe2STim Corringham // compatible with the current mode.
2914c4d2fe2STim Corringham if (InsertionPoint) {
2924c4d2fe2STim Corringham // If the required mode change cannot be included in the current
2934c4d2fe2STim Corringham // InsertionPoint changes, we need a setreg and start a new
2944c4d2fe2STim Corringham // InsertionPoint.
2954c4d2fe2STim Corringham if (!IPChange.delta(NewInfo->Change).isCombinable(InstrMode)) {
2964c4d2fe2STim Corringham if (RequirePending) {
2974c4d2fe2STim Corringham // This is the first insertionPoint in the block so we will defer
2984c4d2fe2STim Corringham // the insertion of the setreg to Phase 3 where we know whether or
2994c4d2fe2STim Corringham // not it is actually needed.
3004c4d2fe2STim Corringham NewInfo->FirstInsertionPoint = InsertionPoint;
3014c4d2fe2STim Corringham NewInfo->Require = NewInfo->Change;
3024c4d2fe2STim Corringham RequirePending = false;
3034c4d2fe2STim Corringham } else {
3044c4d2fe2STim Corringham insertSetreg(MBB, InsertionPoint, TII,
3054c4d2fe2STim Corringham IPChange.delta(NewInfo->Change));
3064c4d2fe2STim Corringham IPChange = NewInfo->Change;
3074c4d2fe2STim Corringham }
3084c4d2fe2STim Corringham // Set the new InsertionPoint
3094c4d2fe2STim Corringham InsertionPoint = &MI;
3104c4d2fe2STim Corringham }
3114c4d2fe2STim Corringham NewInfo->Change = NewInfo->Change.merge(InstrMode);
3124c4d2fe2STim Corringham } else {
3134c4d2fe2STim Corringham // No InsertionPoint is currently set - this is either the first in
3144c4d2fe2STim Corringham // the block or we have previously seen an explicit setreg.
3154c4d2fe2STim Corringham InsertionPoint = &MI;
3164c4d2fe2STim Corringham IPChange = NewInfo->Change;
3174c4d2fe2STim Corringham NewInfo->Change = NewInfo->Change.merge(InstrMode);
3184c4d2fe2STim Corringham }
3194c4d2fe2STim Corringham }
3204c4d2fe2STim Corringham }
3214c4d2fe2STim Corringham if (RequirePending) {
3224c4d2fe2STim Corringham // If we haven't yet set the initial requirements for the block we set them
3234c4d2fe2STim Corringham // now.
3244c4d2fe2STim Corringham NewInfo->FirstInsertionPoint = InsertionPoint;
3254c4d2fe2STim Corringham NewInfo->Require = NewInfo->Change;
3264c4d2fe2STim Corringham } else if (InsertionPoint) {
3274c4d2fe2STim Corringham // We need to insert a setreg at the InsertionPoint
3284c4d2fe2STim Corringham insertSetreg(MBB, InsertionPoint, TII, IPChange.delta(NewInfo->Change));
3294c4d2fe2STim Corringham }
3304c4d2fe2STim Corringham NewInfo->Exit = NewInfo->Change;
3314c4d2fe2STim Corringham BlockInfo[MBB.getNumber()] = std::move(NewInfo);
3324c4d2fe2STim Corringham }
3334c4d2fe2STim Corringham
3344c4d2fe2STim Corringham // In Phase 2 we revisit each block and calculate the common Mode register
3354c4d2fe2STim Corringham // value provided by all predecessor blocks. If the Exit value for the block
3364c4d2fe2STim Corringham // is changed, then we add the successor blocks to the worklist so that the
3374c4d2fe2STim Corringham // exit value is propagated.
processBlockPhase2(MachineBasicBlock & MBB,const SIInstrInfo * TII)3384c4d2fe2STim Corringham void SIModeRegister::processBlockPhase2(MachineBasicBlock &MBB,
3394c4d2fe2STim Corringham const SIInstrInfo *TII) {
340c3b3b999STim Corringham bool RevisitRequired = false;
341c3b3b999STim Corringham bool ExitSet = false;
3424c4d2fe2STim Corringham unsigned ThisBlock = MBB.getNumber();
3434c4d2fe2STim Corringham if (MBB.pred_empty()) {
3444c4d2fe2STim Corringham // There are no predecessors, so use the default starting status.
3454c4d2fe2STim Corringham BlockInfo[ThisBlock]->Pred = DefaultStatus;
346c3b3b999STim Corringham ExitSet = true;
3474c4d2fe2STim Corringham } else {
3484c4d2fe2STim Corringham // Build a status that is common to all the predecessors by intersecting
3494c4d2fe2STim Corringham // all the predecessor exit status values.
350c3b3b999STim Corringham // Mask bits (which represent the Mode bits with a known value) can only be
351c3b3b999STim Corringham // added by explicit SETREG instructions or the initial default value -
352c3b3b999STim Corringham // the intersection process may remove Mask bits.
353c3b3b999STim Corringham // If we find a predecessor that has not yet had an exit value determined
354c3b3b999STim Corringham // (this can happen for example if a block is its own predecessor) we defer
355c3b3b999STim Corringham // use of that value as the Mask will be all zero, and we will revisit this
356c3b3b999STim Corringham // block again later (unless the only predecessor without an exit value is
357c3b3b999STim Corringham // this block).
3584c4d2fe2STim Corringham MachineBasicBlock::pred_iterator P = MBB.pred_begin(), E = MBB.pred_end();
3594c4d2fe2STim Corringham MachineBasicBlock &PB = *(*P);
360c3b3b999STim Corringham unsigned PredBlock = PB.getNumber();
361c3b3b999STim Corringham if ((ThisBlock == PredBlock) && (std::next(P) == E)) {
362c3b3b999STim Corringham BlockInfo[ThisBlock]->Pred = DefaultStatus;
363c3b3b999STim Corringham ExitSet = true;
364c3b3b999STim Corringham } else if (BlockInfo[PredBlock]->ExitSet) {
365c3b3b999STim Corringham BlockInfo[ThisBlock]->Pred = BlockInfo[PredBlock]->Exit;
366c3b3b999STim Corringham ExitSet = true;
367c3b3b999STim Corringham } else if (PredBlock != ThisBlock)
368c3b3b999STim Corringham RevisitRequired = true;
3694c4d2fe2STim Corringham
3704c4d2fe2STim Corringham for (P = std::next(P); P != E; P = std::next(P)) {
3714c4d2fe2STim Corringham MachineBasicBlock *Pred = *P;
372c3b3b999STim Corringham unsigned PredBlock = Pred->getNumber();
373c3b3b999STim Corringham if (BlockInfo[PredBlock]->ExitSet) {
374c3b3b999STim Corringham if (BlockInfo[ThisBlock]->ExitSet) {
375c3b3b999STim Corringham BlockInfo[ThisBlock]->Pred =
376c3b3b999STim Corringham BlockInfo[ThisBlock]->Pred.intersect(BlockInfo[PredBlock]->Exit);
377c3b3b999STim Corringham } else {
378c3b3b999STim Corringham BlockInfo[ThisBlock]->Pred = BlockInfo[PredBlock]->Exit;
379c3b3b999STim Corringham }
380c3b3b999STim Corringham ExitSet = true;
381c3b3b999STim Corringham } else if (PredBlock != ThisBlock)
382c3b3b999STim Corringham RevisitRequired = true;
3834c4d2fe2STim Corringham }
3844c4d2fe2STim Corringham }
38596ecead5STim Corringham Status TmpStatus =
38696ecead5STim Corringham BlockInfo[ThisBlock]->Pred.merge(BlockInfo[ThisBlock]->Change);
3874c4d2fe2STim Corringham if (BlockInfo[ThisBlock]->Exit != TmpStatus) {
3884c4d2fe2STim Corringham BlockInfo[ThisBlock]->Exit = TmpStatus;
3894c4d2fe2STim Corringham // Add the successors to the work list so we can propagate the changed exit
3904c4d2fe2STim Corringham // status.
391e4bab218SKazu Hirata for (MachineBasicBlock *Succ : MBB.successors())
392e4bab218SKazu Hirata Phase2List.push(Succ);
3934c4d2fe2STim Corringham }
394c3b3b999STim Corringham BlockInfo[ThisBlock]->ExitSet = ExitSet;
395c3b3b999STim Corringham if (RevisitRequired)
396c3b3b999STim Corringham Phase2List.push(&MBB);
3974c4d2fe2STim Corringham }
3984c4d2fe2STim Corringham
3994c4d2fe2STim Corringham // In Phase 3 we revisit each block and if it has an insertion point defined we
4004c4d2fe2STim Corringham // check whether the predecessor mode meets the block's entry requirements. If
4014c4d2fe2STim Corringham // not we insert an appropriate setreg instruction to modify the Mode register.
processBlockPhase3(MachineBasicBlock & MBB,const SIInstrInfo * TII)4024c4d2fe2STim Corringham void SIModeRegister::processBlockPhase3(MachineBasicBlock &MBB,
4034c4d2fe2STim Corringham const SIInstrInfo *TII) {
4044c4d2fe2STim Corringham unsigned ThisBlock = MBB.getNumber();
4054c4d2fe2STim Corringham if (!BlockInfo[ThisBlock]->Pred.isCompatible(BlockInfo[ThisBlock]->Require)) {
40696ecead5STim Corringham Status Delta =
40796ecead5STim Corringham BlockInfo[ThisBlock]->Pred.delta(BlockInfo[ThisBlock]->Require);
4084c4d2fe2STim Corringham if (BlockInfo[ThisBlock]->FirstInsertionPoint)
4094c4d2fe2STim Corringham insertSetreg(MBB, BlockInfo[ThisBlock]->FirstInsertionPoint, TII, Delta);
4104c4d2fe2STim Corringham else
4114c4d2fe2STim Corringham insertSetreg(MBB, &MBB.instr_front(), TII, Delta);
4124c4d2fe2STim Corringham }
4134c4d2fe2STim Corringham }
4144c4d2fe2STim Corringham
runOnMachineFunction(MachineFunction & MF)4154c4d2fe2STim Corringham bool SIModeRegister::runOnMachineFunction(MachineFunction &MF) {
4164c4d2fe2STim Corringham BlockInfo.resize(MF.getNumBlockIDs());
4174c4d2fe2STim Corringham const GCNSubtarget &ST = MF.getSubtarget<GCNSubtarget>();
4184c4d2fe2STim Corringham const SIInstrInfo *TII = ST.getInstrInfo();
4194c4d2fe2STim Corringham
4204c4d2fe2STim Corringham // Processing is performed in a number of phases
4214c4d2fe2STim Corringham
4224c4d2fe2STim Corringham // Phase 1 - determine the initial mode required by each block, and add setreg
4234c4d2fe2STim Corringham // instructions for intra block requirements.
4244c4d2fe2STim Corringham for (MachineBasicBlock &BB : MF)
4254c4d2fe2STim Corringham processBlockPhase1(BB, TII);
4264c4d2fe2STim Corringham
4274c4d2fe2STim Corringham // Phase 2 - determine the exit mode from each block. We add all blocks to the
4284c4d2fe2STim Corringham // list here, but will also add any that need to be revisited during Phase 2
4294c4d2fe2STim Corringham // processing.
4304c4d2fe2STim Corringham for (MachineBasicBlock &BB : MF)
4314c4d2fe2STim Corringham Phase2List.push(&BB);
4324c4d2fe2STim Corringham while (!Phase2List.empty()) {
4334c4d2fe2STim Corringham processBlockPhase2(*Phase2List.front(), TII);
4344c4d2fe2STim Corringham Phase2List.pop();
4354c4d2fe2STim Corringham }
4364c4d2fe2STim Corringham
4374c4d2fe2STim Corringham // Phase 3 - add an initial setreg to each block where the required entry mode
4384c4d2fe2STim Corringham // is not satisfied by the exit mode of all its predecessors.
4394c4d2fe2STim Corringham for (MachineBasicBlock &BB : MF)
4404c4d2fe2STim Corringham processBlockPhase3(BB, TII);
4414c4d2fe2STim Corringham
4424c4d2fe2STim Corringham BlockInfo.clear();
4434c4d2fe2STim Corringham
444c3b3b999STim Corringham return Changed;
4454c4d2fe2STim Corringham }
446