14c4d2fe2STim Corringham //===-- SIModeRegister.cpp - Mode Register --------------------------------===// 24c4d2fe2STim Corringham // 32946cd70SChandler Carruth // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 42946cd70SChandler Carruth // See https://llvm.org/LICENSE.txt for license information. 52946cd70SChandler Carruth // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 64c4d2fe2STim Corringham // 74c4d2fe2STim Corringham //===----------------------------------------------------------------------===// 84c4d2fe2STim Corringham /// \file 94c4d2fe2STim Corringham /// This pass inserts changes to the Mode register settings as required. 104c4d2fe2STim Corringham /// Note that currently it only deals with the Double Precision Floating Point 114c4d2fe2STim Corringham /// rounding mode setting, but is intended to be generic enough to be easily 124c4d2fe2STim Corringham /// expanded. 134c4d2fe2STim Corringham /// 144c4d2fe2STim Corringham //===----------------------------------------------------------------------===// 154c4d2fe2STim Corringham // 164c4d2fe2STim Corringham #include "AMDGPU.h" 17560d7e04Sdfukalov #include "GCNSubtarget.h" 18560d7e04Sdfukalov #include "MCTargetDesc/AMDGPUMCTargetDesc.h" 194c4d2fe2STim Corringham #include "llvm/ADT/Statistic.h" 204c4d2fe2STim Corringham #include <queue> 214c4d2fe2STim Corringham 224c4d2fe2STim Corringham #define DEBUG_TYPE "si-mode-register" 234c4d2fe2STim Corringham 244c4d2fe2STim Corringham STATISTIC(NumSetregInserted, "Number of setreg of mode register inserted."); 254c4d2fe2STim Corringham 264c4d2fe2STim Corringham using namespace llvm; 274c4d2fe2STim Corringham 284c4d2fe2STim Corringham struct Status { 294c4d2fe2STim Corringham // Mask is a bitmask where a '1' indicates the corresponding Mode bit has a 304c4d2fe2STim Corringham // known value 314c4d2fe2STim Corringham unsigned Mask; 324c4d2fe2STim Corringham unsigned Mode; 334c4d2fe2STim Corringham 344c4d2fe2STim Corringham Status() : Mask(0), Mode(0){}; 354c4d2fe2STim Corringham 36be9ade93SSimon Pilgrim Status(unsigned NewMask, unsigned NewMode) : Mask(NewMask), Mode(NewMode) { 374c4d2fe2STim Corringham Mode &= Mask; 384c4d2fe2STim Corringham }; 394c4d2fe2STim Corringham 404c4d2fe2STim Corringham // merge two status values such that only values that don't conflict are 414c4d2fe2STim Corringham // preserved 424c4d2fe2STim Corringham Status merge(const Status &S) const { 434c4d2fe2STim Corringham return Status((Mask | S.Mask), ((Mode & ~S.Mask) | (S.Mode & S.Mask))); 444c4d2fe2STim Corringham } 454c4d2fe2STim Corringham 464c4d2fe2STim Corringham // merge an unknown value by using the unknown value's mask to remove bits 474c4d2fe2STim Corringham // from the result 484c4d2fe2STim Corringham Status mergeUnknown(unsigned newMask) { 494c4d2fe2STim Corringham return Status(Mask & ~newMask, Mode & ~newMask); 504c4d2fe2STim Corringham } 514c4d2fe2STim Corringham 524c4d2fe2STim Corringham // intersect two Status values to produce a mode and mask that is a subset 534c4d2fe2STim Corringham // of both values 544c4d2fe2STim Corringham Status intersect(const Status &S) const { 554c4d2fe2STim Corringham unsigned NewMask = (Mask & S.Mask) & (Mode ^ ~S.Mode); 564c4d2fe2STim Corringham unsigned NewMode = (Mode & NewMask); 574c4d2fe2STim Corringham return Status(NewMask, NewMode); 584c4d2fe2STim Corringham } 594c4d2fe2STim Corringham 604c4d2fe2STim Corringham // produce the delta required to change the Mode to the required Mode 614c4d2fe2STim Corringham Status delta(const Status &S) const { 624c4d2fe2STim Corringham return Status((S.Mask & (Mode ^ S.Mode)) | (~Mask & S.Mask), S.Mode); 634c4d2fe2STim Corringham } 644c4d2fe2STim Corringham 654c4d2fe2STim Corringham bool operator==(const Status &S) const { 664c4d2fe2STim Corringham return (Mask == S.Mask) && (Mode == S.Mode); 674c4d2fe2STim Corringham } 684c4d2fe2STim Corringham 694c4d2fe2STim Corringham bool operator!=(const Status &S) const { return !(*this == S); } 704c4d2fe2STim Corringham 714c4d2fe2STim Corringham bool isCompatible(Status &S) { 724c4d2fe2STim Corringham return ((Mask & S.Mask) == S.Mask) && ((Mode & S.Mask) == S.Mode); 734c4d2fe2STim Corringham } 744c4d2fe2STim Corringham 7596ecead5STim Corringham bool isCombinable(Status &S) { return !(Mask & S.Mask) || isCompatible(S); } 764c4d2fe2STim Corringham }; 774c4d2fe2STim Corringham 784c4d2fe2STim Corringham class BlockData { 794c4d2fe2STim Corringham public: 804c4d2fe2STim Corringham // The Status that represents the mode register settings required by the 814c4d2fe2STim Corringham // FirstInsertionPoint (if any) in this block. Calculated in Phase 1. 824c4d2fe2STim Corringham Status Require; 834c4d2fe2STim Corringham 844c4d2fe2STim Corringham // The Status that represents the net changes to the Mode register made by 854c4d2fe2STim Corringham // this block, Calculated in Phase 1. 864c4d2fe2STim Corringham Status Change; 874c4d2fe2STim Corringham 884c4d2fe2STim Corringham // The Status that represents the mode register settings on exit from this 894c4d2fe2STim Corringham // block. Calculated in Phase 2. 904c4d2fe2STim Corringham Status Exit; 914c4d2fe2STim Corringham 924c4d2fe2STim Corringham // The Status that represents the intersection of exit Mode register settings 934c4d2fe2STim Corringham // from all predecessor blocks. Calculated in Phase 2, and used by Phase 3. 944c4d2fe2STim Corringham Status Pred; 954c4d2fe2STim Corringham 964c4d2fe2STim Corringham // In Phase 1 we record the first instruction that has a mode requirement, 974c4d2fe2STim Corringham // which is used in Phase 3 if we need to insert a mode change. 984c4d2fe2STim Corringham MachineInstr *FirstInsertionPoint; 994c4d2fe2STim Corringham 100c3b3b999STim Corringham // A flag to indicate whether an Exit value has been set (we can't tell by 101c3b3b999STim Corringham // examining the Exit value itself as all values may be valid results). 102c3b3b999STim Corringham bool ExitSet; 103c3b3b999STim Corringham 104c3b3b999STim Corringham BlockData() : FirstInsertionPoint(nullptr), ExitSet(false){}; 1054c4d2fe2STim Corringham }; 1064c4d2fe2STim Corringham 1074c4d2fe2STim Corringham namespace { 1084c4d2fe2STim Corringham 1094c4d2fe2STim Corringham class SIModeRegister : public MachineFunctionPass { 1104c4d2fe2STim Corringham public: 1114c4d2fe2STim Corringham static char ID; 1124c4d2fe2STim Corringham 1134c4d2fe2STim Corringham std::vector<std::unique_ptr<BlockData>> BlockInfo; 1144c4d2fe2STim Corringham std::queue<MachineBasicBlock *> Phase2List; 1154c4d2fe2STim Corringham 1164c4d2fe2STim Corringham // The default mode register setting currently only caters for the floating 1174c4d2fe2STim Corringham // point double precision rounding mode. 1184c4d2fe2STim Corringham // We currently assume the default rounding mode is Round to Nearest 1194c4d2fe2STim Corringham // NOTE: this should come from a per function rounding mode setting once such 1204c4d2fe2STim Corringham // a setting exists. 1214c4d2fe2STim Corringham unsigned DefaultMode = FP_ROUND_ROUND_TO_NEAREST; 1224c4d2fe2STim Corringham Status DefaultStatus = 1234c4d2fe2STim Corringham Status(FP_ROUND_MODE_DP(0x3), FP_ROUND_MODE_DP(DefaultMode)); 1244c4d2fe2STim Corringham 125c3b3b999STim Corringham bool Changed = false; 126c3b3b999STim Corringham 1274c4d2fe2STim Corringham public: 1284c4d2fe2STim Corringham SIModeRegister() : MachineFunctionPass(ID) {} 1294c4d2fe2STim Corringham 1304c4d2fe2STim Corringham bool runOnMachineFunction(MachineFunction &MF) override; 1314c4d2fe2STim Corringham 1324c4d2fe2STim Corringham void getAnalysisUsage(AnalysisUsage &AU) const override { 1334c4d2fe2STim Corringham AU.setPreservesCFG(); 1344c4d2fe2STim Corringham MachineFunctionPass::getAnalysisUsage(AU); 1354c4d2fe2STim Corringham } 1364c4d2fe2STim Corringham 1374c4d2fe2STim Corringham void processBlockPhase1(MachineBasicBlock &MBB, const SIInstrInfo *TII); 1384c4d2fe2STim Corringham 1394c4d2fe2STim Corringham void processBlockPhase2(MachineBasicBlock &MBB, const SIInstrInfo *TII); 1404c4d2fe2STim Corringham 1414c4d2fe2STim Corringham void processBlockPhase3(MachineBasicBlock &MBB, const SIInstrInfo *TII); 1424c4d2fe2STim Corringham 1434c4d2fe2STim Corringham Status getInstructionMode(MachineInstr &MI, const SIInstrInfo *TII); 1444c4d2fe2STim Corringham 1454c4d2fe2STim Corringham void insertSetreg(MachineBasicBlock &MBB, MachineInstr *I, 1464c4d2fe2STim Corringham const SIInstrInfo *TII, Status InstrMode); 1474c4d2fe2STim Corringham }; 1484c4d2fe2STim Corringham } // End anonymous namespace. 1494c4d2fe2STim Corringham 1504c4d2fe2STim Corringham INITIALIZE_PASS(SIModeRegister, DEBUG_TYPE, 1514c4d2fe2STim Corringham "Insert required mode register values", false, false) 1524c4d2fe2STim Corringham 1534c4d2fe2STim Corringham char SIModeRegister::ID = 0; 1544c4d2fe2STim Corringham 1554c4d2fe2STim Corringham char &llvm::SIModeRegisterID = SIModeRegister::ID; 1564c4d2fe2STim Corringham 1574c4d2fe2STim Corringham FunctionPass *llvm::createSIModeRegisterPass() { return new SIModeRegister(); } 1584c4d2fe2STim Corringham 1594c4d2fe2STim Corringham // Determine the Mode register setting required for this instruction. 1604c4d2fe2STim Corringham // Instructions which don't use the Mode register return a null Status. 1614c4d2fe2STim Corringham // Note this currently only deals with instructions that use the floating point 1624c4d2fe2STim Corringham // double precision setting. 1634c4d2fe2STim Corringham Status SIModeRegister::getInstructionMode(MachineInstr &MI, 1644c4d2fe2STim Corringham const SIInstrInfo *TII) { 165*dcb2da13SJulien Pages if (TII->usesFPDPRounding(MI) || 166*dcb2da13SJulien Pages MI.getOpcode() == AMDGPU::FPTRUNC_UPWARD_PSEUDO || 167*dcb2da13SJulien Pages MI.getOpcode() == AMDGPU::FPTRUNC_DOWNWARD_PSEUDO) { 1684c4d2fe2STim Corringham switch (MI.getOpcode()) { 1694c4d2fe2STim Corringham case AMDGPU::V_INTERP_P1LL_F16: 1704c4d2fe2STim Corringham case AMDGPU::V_INTERP_P1LV_F16: 1714c4d2fe2STim Corringham case AMDGPU::V_INTERP_P2_F16: 1724c4d2fe2STim Corringham // f16 interpolation instructions need double precision round to zero 1734c4d2fe2STim Corringham return Status(FP_ROUND_MODE_DP(3), 1744c4d2fe2STim Corringham FP_ROUND_MODE_DP(FP_ROUND_ROUND_TO_ZERO)); 175*dcb2da13SJulien Pages case AMDGPU::FPTRUNC_UPWARD_PSEUDO: { 176*dcb2da13SJulien Pages // Replacing the pseudo by a real instruction 177*dcb2da13SJulien Pages MI.setDesc(TII->get(AMDGPU::V_CVT_F16_F32_e32)); 178*dcb2da13SJulien Pages return Status(FP_ROUND_MODE_DP(3), 179*dcb2da13SJulien Pages FP_ROUND_MODE_DP(FP_ROUND_ROUND_TO_INF)); 180*dcb2da13SJulien Pages } 181*dcb2da13SJulien Pages case AMDGPU::FPTRUNC_DOWNWARD_PSEUDO: { 182*dcb2da13SJulien Pages // Replacing the pseudo by a real instruction 183*dcb2da13SJulien Pages MI.setDesc(TII->get(AMDGPU::V_CVT_F16_F32_e32)); 184*dcb2da13SJulien Pages return Status(FP_ROUND_MODE_DP(3), 185*dcb2da13SJulien Pages FP_ROUND_MODE_DP(FP_ROUND_ROUND_TO_NEGINF)); 186*dcb2da13SJulien Pages } 1874c4d2fe2STim Corringham default: 1884c4d2fe2STim Corringham return DefaultStatus; 1894c4d2fe2STim Corringham } 1904c4d2fe2STim Corringham } 1914c4d2fe2STim Corringham return Status(); 1924c4d2fe2STim Corringham } 1934c4d2fe2STim Corringham 1944c4d2fe2STim Corringham // Insert a setreg instruction to update the Mode register. 1954c4d2fe2STim Corringham // It is possible (though unlikely) for an instruction to require a change to 1964c4d2fe2STim Corringham // the value of disjoint parts of the Mode register when we don't know the 1974c4d2fe2STim Corringham // value of the intervening bits. In that case we need to use more than one 1984c4d2fe2STim Corringham // setreg instruction. 1994c4d2fe2STim Corringham void SIModeRegister::insertSetreg(MachineBasicBlock &MBB, MachineInstr *MI, 2004c4d2fe2STim Corringham const SIInstrInfo *TII, Status InstrMode) { 2014c4d2fe2STim Corringham while (InstrMode.Mask) { 2024c4d2fe2STim Corringham unsigned Offset = countTrailingZeros<unsigned>(InstrMode.Mask); 2034c4d2fe2STim Corringham unsigned Width = countTrailingOnes<unsigned>(InstrMode.Mask >> Offset); 2044c4d2fe2STim Corringham unsigned Value = (InstrMode.Mode >> Offset) & ((1 << Width) - 1); 2055a667c0eSKazu Hirata BuildMI(MBB, MI, nullptr, TII->get(AMDGPU::S_SETREG_IMM32_B32)) 2064c4d2fe2STim Corringham .addImm(Value) 2074c4d2fe2STim Corringham .addImm(((Width - 1) << AMDGPU::Hwreg::WIDTH_M1_SHIFT_) | 2084c4d2fe2STim Corringham (Offset << AMDGPU::Hwreg::OFFSET_SHIFT_) | 2094c4d2fe2STim Corringham (AMDGPU::Hwreg::ID_MODE << AMDGPU::Hwreg::ID_SHIFT_)); 2104c4d2fe2STim Corringham ++NumSetregInserted; 211c3b3b999STim Corringham Changed = true; 2122faadb15STim Corringham InstrMode.Mask &= ~(((1 << Width) - 1) << Offset); 2134c4d2fe2STim Corringham } 2144c4d2fe2STim Corringham } 2154c4d2fe2STim Corringham 2164c4d2fe2STim Corringham // In Phase 1 we iterate through the instructions of the block and for each 2174c4d2fe2STim Corringham // instruction we get its mode usage. If the instruction uses the Mode register 2184c4d2fe2STim Corringham // we: 2194c4d2fe2STim Corringham // - update the Change status, which tracks the changes to the Mode register 2204c4d2fe2STim Corringham // made by this block 2214c4d2fe2STim Corringham // - if this instruction's requirements are compatible with the current setting 2224c4d2fe2STim Corringham // of the Mode register we merge the modes 2234c4d2fe2STim Corringham // - if it isn't compatible and an InsertionPoint isn't set, then we set the 2244c4d2fe2STim Corringham // InsertionPoint to the current instruction, and we remember the current 2254c4d2fe2STim Corringham // mode 2264c4d2fe2STim Corringham // - if it isn't compatible and InsertionPoint is set we insert a seteg before 2274c4d2fe2STim Corringham // that instruction (unless this instruction forms part of the block's 2284c4d2fe2STim Corringham // entry requirements in which case the insertion is deferred until Phase 3 2294c4d2fe2STim Corringham // when predecessor exit values are known), and move the insertion point to 2304c4d2fe2STim Corringham // this instruction 2314c4d2fe2STim Corringham // - if this is a setreg instruction we treat it as an incompatible instruction. 2324c4d2fe2STim Corringham // This is sub-optimal but avoids some nasty corner cases, and is expected to 2334c4d2fe2STim Corringham // occur very rarely. 2344c4d2fe2STim Corringham // - on exit we have set the Require, Change, and initial Exit modes. 2354c4d2fe2STim Corringham void SIModeRegister::processBlockPhase1(MachineBasicBlock &MBB, 2364c4d2fe2STim Corringham const SIInstrInfo *TII) { 2370eaee545SJonas Devlieghere auto NewInfo = std::make_unique<BlockData>(); 2384c4d2fe2STim Corringham MachineInstr *InsertionPoint = nullptr; 2394c4d2fe2STim Corringham // RequirePending is used to indicate whether we are collecting the initial 2404c4d2fe2STim Corringham // requirements for the block, and need to defer the first InsertionPoint to 2414c4d2fe2STim Corringham // Phase 3. It is set to false once we have set FirstInsertionPoint, or when 242d1f45ed5SNeubauer, Sebastian // we discover an explicit setreg that means this block doesn't have any 2434c4d2fe2STim Corringham // initial requirements. 2444c4d2fe2STim Corringham bool RequirePending = true; 2454c4d2fe2STim Corringham Status IPChange; 2464c4d2fe2STim Corringham for (MachineInstr &MI : MBB) { 2474c4d2fe2STim Corringham Status InstrMode = getInstructionMode(MI, TII); 24890777e29SJay Foad if (MI.getOpcode() == AMDGPU::S_SETREG_B32 || 24990777e29SJay Foad MI.getOpcode() == AMDGPU::S_SETREG_B32_mode || 25090777e29SJay Foad MI.getOpcode() == AMDGPU::S_SETREG_IMM32_B32 || 25190777e29SJay Foad MI.getOpcode() == AMDGPU::S_SETREG_IMM32_B32_mode) { 2524c4d2fe2STim Corringham // We preserve any explicit mode register setreg instruction we encounter, 2534c4d2fe2STim Corringham // as we assume it has been inserted by a higher authority (this is 2544c4d2fe2STim Corringham // likely to be a very rare occurrence). 2554c4d2fe2STim Corringham unsigned Dst = TII->getNamedOperand(MI, AMDGPU::OpName::simm16)->getImm(); 2564c4d2fe2STim Corringham if (((Dst & AMDGPU::Hwreg::ID_MASK_) >> AMDGPU::Hwreg::ID_SHIFT_) != 2574c4d2fe2STim Corringham AMDGPU::Hwreg::ID_MODE) 2584c4d2fe2STim Corringham continue; 2594c4d2fe2STim Corringham 2604c4d2fe2STim Corringham unsigned Width = ((Dst & AMDGPU::Hwreg::WIDTH_M1_MASK_) >> 2614c4d2fe2STim Corringham AMDGPU::Hwreg::WIDTH_M1_SHIFT_) + 2624c4d2fe2STim Corringham 1; 2634c4d2fe2STim Corringham unsigned Offset = 2644c4d2fe2STim Corringham (Dst & AMDGPU::Hwreg::OFFSET_MASK_) >> AMDGPU::Hwreg::OFFSET_SHIFT_; 2654c4d2fe2STim Corringham unsigned Mask = ((1 << Width) - 1) << Offset; 2664c4d2fe2STim Corringham 2674c4d2fe2STim Corringham // If an InsertionPoint is set we will insert a setreg there. 2684c4d2fe2STim Corringham if (InsertionPoint) { 2694c4d2fe2STim Corringham insertSetreg(MBB, InsertionPoint, TII, IPChange.delta(NewInfo->Change)); 2704c4d2fe2STim Corringham InsertionPoint = nullptr; 2714c4d2fe2STim Corringham } 2724c4d2fe2STim Corringham // If this is an immediate then we know the value being set, but if it is 2734c4d2fe2STim Corringham // not an immediate then we treat the modified bits of the mode register 2744c4d2fe2STim Corringham // as unknown. 27590777e29SJay Foad if (MI.getOpcode() == AMDGPU::S_SETREG_IMM32_B32 || 27690777e29SJay Foad MI.getOpcode() == AMDGPU::S_SETREG_IMM32_B32_mode) { 2774c4d2fe2STim Corringham unsigned Val = TII->getNamedOperand(MI, AMDGPU::OpName::imm)->getImm(); 2784c4d2fe2STim Corringham unsigned Mode = (Val << Offset) & Mask; 2794c4d2fe2STim Corringham Status Setreg = Status(Mask, Mode); 2804c4d2fe2STim Corringham // If we haven't already set the initial requirements for the block we 2814c4d2fe2STim Corringham // don't need to as the requirements start from this explicit setreg. 2824c4d2fe2STim Corringham RequirePending = false; 2834c4d2fe2STim Corringham NewInfo->Change = NewInfo->Change.merge(Setreg); 2844c4d2fe2STim Corringham } else { 2854c4d2fe2STim Corringham NewInfo->Change = NewInfo->Change.mergeUnknown(Mask); 2864c4d2fe2STim Corringham } 2874c4d2fe2STim Corringham } else if (!NewInfo->Change.isCompatible(InstrMode)) { 2884c4d2fe2STim Corringham // This instruction uses the Mode register and its requirements aren't 2894c4d2fe2STim Corringham // compatible with the current mode. 2904c4d2fe2STim Corringham if (InsertionPoint) { 2914c4d2fe2STim Corringham // If the required mode change cannot be included in the current 2924c4d2fe2STim Corringham // InsertionPoint changes, we need a setreg and start a new 2934c4d2fe2STim Corringham // InsertionPoint. 2944c4d2fe2STim Corringham if (!IPChange.delta(NewInfo->Change).isCombinable(InstrMode)) { 2954c4d2fe2STim Corringham if (RequirePending) { 2964c4d2fe2STim Corringham // This is the first insertionPoint in the block so we will defer 2974c4d2fe2STim Corringham // the insertion of the setreg to Phase 3 where we know whether or 2984c4d2fe2STim Corringham // not it is actually needed. 2994c4d2fe2STim Corringham NewInfo->FirstInsertionPoint = InsertionPoint; 3004c4d2fe2STim Corringham NewInfo->Require = NewInfo->Change; 3014c4d2fe2STim Corringham RequirePending = false; 3024c4d2fe2STim Corringham } else { 3034c4d2fe2STim Corringham insertSetreg(MBB, InsertionPoint, TII, 3044c4d2fe2STim Corringham IPChange.delta(NewInfo->Change)); 3054c4d2fe2STim Corringham IPChange = NewInfo->Change; 3064c4d2fe2STim Corringham } 3074c4d2fe2STim Corringham // Set the new InsertionPoint 3084c4d2fe2STim Corringham InsertionPoint = &MI; 3094c4d2fe2STim Corringham } 3104c4d2fe2STim Corringham NewInfo->Change = NewInfo->Change.merge(InstrMode); 3114c4d2fe2STim Corringham } else { 3124c4d2fe2STim Corringham // No InsertionPoint is currently set - this is either the first in 3134c4d2fe2STim Corringham // the block or we have previously seen an explicit setreg. 3144c4d2fe2STim Corringham InsertionPoint = &MI; 3154c4d2fe2STim Corringham IPChange = NewInfo->Change; 3164c4d2fe2STim Corringham NewInfo->Change = NewInfo->Change.merge(InstrMode); 3174c4d2fe2STim Corringham } 3184c4d2fe2STim Corringham } 3194c4d2fe2STim Corringham } 3204c4d2fe2STim Corringham if (RequirePending) { 3214c4d2fe2STim Corringham // If we haven't yet set the initial requirements for the block we set them 3224c4d2fe2STim Corringham // now. 3234c4d2fe2STim Corringham NewInfo->FirstInsertionPoint = InsertionPoint; 3244c4d2fe2STim Corringham NewInfo->Require = NewInfo->Change; 3254c4d2fe2STim Corringham } else if (InsertionPoint) { 3264c4d2fe2STim Corringham // We need to insert a setreg at the InsertionPoint 3274c4d2fe2STim Corringham insertSetreg(MBB, InsertionPoint, TII, IPChange.delta(NewInfo->Change)); 3284c4d2fe2STim Corringham } 3294c4d2fe2STim Corringham NewInfo->Exit = NewInfo->Change; 3304c4d2fe2STim Corringham BlockInfo[MBB.getNumber()] = std::move(NewInfo); 3314c4d2fe2STim Corringham } 3324c4d2fe2STim Corringham 3334c4d2fe2STim Corringham // In Phase 2 we revisit each block and calculate the common Mode register 3344c4d2fe2STim Corringham // value provided by all predecessor blocks. If the Exit value for the block 3354c4d2fe2STim Corringham // is changed, then we add the successor blocks to the worklist so that the 3364c4d2fe2STim Corringham // exit value is propagated. 3374c4d2fe2STim Corringham void SIModeRegister::processBlockPhase2(MachineBasicBlock &MBB, 3384c4d2fe2STim Corringham const SIInstrInfo *TII) { 339c3b3b999STim Corringham bool RevisitRequired = false; 340c3b3b999STim Corringham bool ExitSet = false; 3414c4d2fe2STim Corringham unsigned ThisBlock = MBB.getNumber(); 3424c4d2fe2STim Corringham if (MBB.pred_empty()) { 3434c4d2fe2STim Corringham // There are no predecessors, so use the default starting status. 3444c4d2fe2STim Corringham BlockInfo[ThisBlock]->Pred = DefaultStatus; 345c3b3b999STim Corringham ExitSet = true; 3464c4d2fe2STim Corringham } else { 3474c4d2fe2STim Corringham // Build a status that is common to all the predecessors by intersecting 3484c4d2fe2STim Corringham // all the predecessor exit status values. 349c3b3b999STim Corringham // Mask bits (which represent the Mode bits with a known value) can only be 350c3b3b999STim Corringham // added by explicit SETREG instructions or the initial default value - 351c3b3b999STim Corringham // the intersection process may remove Mask bits. 352c3b3b999STim Corringham // If we find a predecessor that has not yet had an exit value determined 353c3b3b999STim Corringham // (this can happen for example if a block is its own predecessor) we defer 354c3b3b999STim Corringham // use of that value as the Mask will be all zero, and we will revisit this 355c3b3b999STim Corringham // block again later (unless the only predecessor without an exit value is 356c3b3b999STim Corringham // this block). 3574c4d2fe2STim Corringham MachineBasicBlock::pred_iterator P = MBB.pred_begin(), E = MBB.pred_end(); 3584c4d2fe2STim Corringham MachineBasicBlock &PB = *(*P); 359c3b3b999STim Corringham unsigned PredBlock = PB.getNumber(); 360c3b3b999STim Corringham if ((ThisBlock == PredBlock) && (std::next(P) == E)) { 361c3b3b999STim Corringham BlockInfo[ThisBlock]->Pred = DefaultStatus; 362c3b3b999STim Corringham ExitSet = true; 363c3b3b999STim Corringham } else if (BlockInfo[PredBlock]->ExitSet) { 364c3b3b999STim Corringham BlockInfo[ThisBlock]->Pred = BlockInfo[PredBlock]->Exit; 365c3b3b999STim Corringham ExitSet = true; 366c3b3b999STim Corringham } else if (PredBlock != ThisBlock) 367c3b3b999STim Corringham RevisitRequired = true; 3684c4d2fe2STim Corringham 3694c4d2fe2STim Corringham for (P = std::next(P); P != E; P = std::next(P)) { 3704c4d2fe2STim Corringham MachineBasicBlock *Pred = *P; 371c3b3b999STim Corringham unsigned PredBlock = Pred->getNumber(); 372c3b3b999STim Corringham if (BlockInfo[PredBlock]->ExitSet) { 373c3b3b999STim Corringham if (BlockInfo[ThisBlock]->ExitSet) { 374c3b3b999STim Corringham BlockInfo[ThisBlock]->Pred = 375c3b3b999STim Corringham BlockInfo[ThisBlock]->Pred.intersect(BlockInfo[PredBlock]->Exit); 376c3b3b999STim Corringham } else { 377c3b3b999STim Corringham BlockInfo[ThisBlock]->Pred = BlockInfo[PredBlock]->Exit; 378c3b3b999STim Corringham } 379c3b3b999STim Corringham ExitSet = true; 380c3b3b999STim Corringham } else if (PredBlock != ThisBlock) 381c3b3b999STim Corringham RevisitRequired = true; 3824c4d2fe2STim Corringham } 3834c4d2fe2STim Corringham } 38496ecead5STim Corringham Status TmpStatus = 38596ecead5STim Corringham BlockInfo[ThisBlock]->Pred.merge(BlockInfo[ThisBlock]->Change); 3864c4d2fe2STim Corringham if (BlockInfo[ThisBlock]->Exit != TmpStatus) { 3874c4d2fe2STim Corringham BlockInfo[ThisBlock]->Exit = TmpStatus; 3884c4d2fe2STim Corringham // Add the successors to the work list so we can propagate the changed exit 3894c4d2fe2STim Corringham // status. 390e4bab218SKazu Hirata for (MachineBasicBlock *Succ : MBB.successors()) 391e4bab218SKazu Hirata Phase2List.push(Succ); 3924c4d2fe2STim Corringham } 393c3b3b999STim Corringham BlockInfo[ThisBlock]->ExitSet = ExitSet; 394c3b3b999STim Corringham if (RevisitRequired) 395c3b3b999STim Corringham Phase2List.push(&MBB); 3964c4d2fe2STim Corringham } 3974c4d2fe2STim Corringham 3984c4d2fe2STim Corringham // In Phase 3 we revisit each block and if it has an insertion point defined we 3994c4d2fe2STim Corringham // check whether the predecessor mode meets the block's entry requirements. If 4004c4d2fe2STim Corringham // not we insert an appropriate setreg instruction to modify the Mode register. 4014c4d2fe2STim Corringham void SIModeRegister::processBlockPhase3(MachineBasicBlock &MBB, 4024c4d2fe2STim Corringham const SIInstrInfo *TII) { 4034c4d2fe2STim Corringham unsigned ThisBlock = MBB.getNumber(); 4044c4d2fe2STim Corringham if (!BlockInfo[ThisBlock]->Pred.isCompatible(BlockInfo[ThisBlock]->Require)) { 40596ecead5STim Corringham Status Delta = 40696ecead5STim Corringham BlockInfo[ThisBlock]->Pred.delta(BlockInfo[ThisBlock]->Require); 4074c4d2fe2STim Corringham if (BlockInfo[ThisBlock]->FirstInsertionPoint) 4084c4d2fe2STim Corringham insertSetreg(MBB, BlockInfo[ThisBlock]->FirstInsertionPoint, TII, Delta); 4094c4d2fe2STim Corringham else 4104c4d2fe2STim Corringham insertSetreg(MBB, &MBB.instr_front(), TII, Delta); 4114c4d2fe2STim Corringham } 4124c4d2fe2STim Corringham } 4134c4d2fe2STim Corringham 4144c4d2fe2STim Corringham bool SIModeRegister::runOnMachineFunction(MachineFunction &MF) { 4154c4d2fe2STim Corringham BlockInfo.resize(MF.getNumBlockIDs()); 4164c4d2fe2STim Corringham const GCNSubtarget &ST = MF.getSubtarget<GCNSubtarget>(); 4174c4d2fe2STim Corringham const SIInstrInfo *TII = ST.getInstrInfo(); 4184c4d2fe2STim Corringham 4194c4d2fe2STim Corringham // Processing is performed in a number of phases 4204c4d2fe2STim Corringham 4214c4d2fe2STim Corringham // Phase 1 - determine the initial mode required by each block, and add setreg 4224c4d2fe2STim Corringham // instructions for intra block requirements. 4234c4d2fe2STim Corringham for (MachineBasicBlock &BB : MF) 4244c4d2fe2STim Corringham processBlockPhase1(BB, TII); 4254c4d2fe2STim Corringham 4264c4d2fe2STim Corringham // Phase 2 - determine the exit mode from each block. We add all blocks to the 4274c4d2fe2STim Corringham // list here, but will also add any that need to be revisited during Phase 2 4284c4d2fe2STim Corringham // processing. 4294c4d2fe2STim Corringham for (MachineBasicBlock &BB : MF) 4304c4d2fe2STim Corringham Phase2List.push(&BB); 4314c4d2fe2STim Corringham while (!Phase2List.empty()) { 4324c4d2fe2STim Corringham processBlockPhase2(*Phase2List.front(), TII); 4334c4d2fe2STim Corringham Phase2List.pop(); 4344c4d2fe2STim Corringham } 4354c4d2fe2STim Corringham 4364c4d2fe2STim Corringham // Phase 3 - add an initial setreg to each block where the required entry mode 4374c4d2fe2STim Corringham // is not satisfied by the exit mode of all its predecessors. 4384c4d2fe2STim Corringham for (MachineBasicBlock &BB : MF) 4394c4d2fe2STim Corringham processBlockPhase3(BB, TII); 4404c4d2fe2STim Corringham 4414c4d2fe2STim Corringham BlockInfo.clear(); 4424c4d2fe2STim Corringham 443c3b3b999STim Corringham return Changed; 4444c4d2fe2STim Corringham } 445