14c4d2fe2STim Corringham //===-- SIModeRegister.cpp - Mode Register --------------------------------===// 24c4d2fe2STim Corringham // 32946cd70SChandler Carruth // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 42946cd70SChandler Carruth // See https://llvm.org/LICENSE.txt for license information. 52946cd70SChandler Carruth // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 64c4d2fe2STim Corringham // 74c4d2fe2STim Corringham //===----------------------------------------------------------------------===// 84c4d2fe2STim Corringham /// \file 94c4d2fe2STim Corringham /// This pass inserts changes to the Mode register settings as required. 104c4d2fe2STim Corringham /// Note that currently it only deals with the Double Precision Floating Point 114c4d2fe2STim Corringham /// rounding mode setting, but is intended to be generic enough to be easily 124c4d2fe2STim Corringham /// expanded. 134c4d2fe2STim Corringham /// 144c4d2fe2STim Corringham //===----------------------------------------------------------------------===// 154c4d2fe2STim Corringham // 164c4d2fe2STim Corringham #include "AMDGPU.h" 174c4d2fe2STim Corringham #include "AMDGPUInstrInfo.h" 184c4d2fe2STim Corringham #include "AMDGPUSubtarget.h" 194c4d2fe2STim Corringham #include "SIInstrInfo.h" 204c4d2fe2STim Corringham #include "SIMachineFunctionInfo.h" 214c4d2fe2STim Corringham #include "llvm/ADT/Statistic.h" 224c4d2fe2STim Corringham #include "llvm/CodeGen/MachineFunctionPass.h" 234c4d2fe2STim Corringham #include "llvm/CodeGen/MachineInstrBuilder.h" 244c4d2fe2STim Corringham #include "llvm/CodeGen/MachineRegisterInfo.h" 254c4d2fe2STim Corringham #include "llvm/IR/Constants.h" 264c4d2fe2STim Corringham #include "llvm/IR/Function.h" 274c4d2fe2STim Corringham #include "llvm/IR/LLVMContext.h" 284c4d2fe2STim Corringham #include "llvm/Support/Debug.h" 294c4d2fe2STim Corringham #include "llvm/Support/raw_ostream.h" 304c4d2fe2STim Corringham #include "llvm/Target/TargetMachine.h" 314c4d2fe2STim Corringham #include <queue> 324c4d2fe2STim Corringham 334c4d2fe2STim Corringham #define DEBUG_TYPE "si-mode-register" 344c4d2fe2STim Corringham 354c4d2fe2STim Corringham STATISTIC(NumSetregInserted, "Number of setreg of mode register inserted."); 364c4d2fe2STim Corringham 374c4d2fe2STim Corringham using namespace llvm; 384c4d2fe2STim Corringham 394c4d2fe2STim Corringham struct Status { 404c4d2fe2STim Corringham // Mask is a bitmask where a '1' indicates the corresponding Mode bit has a 414c4d2fe2STim Corringham // known value 424c4d2fe2STim Corringham unsigned Mask; 434c4d2fe2STim Corringham unsigned Mode; 444c4d2fe2STim Corringham 454c4d2fe2STim Corringham Status() : Mask(0), Mode(0){}; 464c4d2fe2STim Corringham 47be9ade93SSimon Pilgrim Status(unsigned NewMask, unsigned NewMode) : Mask(NewMask), Mode(NewMode) { 484c4d2fe2STim Corringham Mode &= Mask; 494c4d2fe2STim Corringham }; 504c4d2fe2STim Corringham 514c4d2fe2STim Corringham // merge two status values such that only values that don't conflict are 524c4d2fe2STim Corringham // preserved 534c4d2fe2STim Corringham Status merge(const Status &S) const { 544c4d2fe2STim Corringham return Status((Mask | S.Mask), ((Mode & ~S.Mask) | (S.Mode & S.Mask))); 554c4d2fe2STim Corringham } 564c4d2fe2STim Corringham 574c4d2fe2STim Corringham // merge an unknown value by using the unknown value's mask to remove bits 584c4d2fe2STim Corringham // from the result 594c4d2fe2STim Corringham Status mergeUnknown(unsigned newMask) { 604c4d2fe2STim Corringham return Status(Mask & ~newMask, Mode & ~newMask); 614c4d2fe2STim Corringham } 624c4d2fe2STim Corringham 634c4d2fe2STim Corringham // intersect two Status values to produce a mode and mask that is a subset 644c4d2fe2STim Corringham // of both values 654c4d2fe2STim Corringham Status intersect(const Status &S) const { 664c4d2fe2STim Corringham unsigned NewMask = (Mask & S.Mask) & (Mode ^ ~S.Mode); 674c4d2fe2STim Corringham unsigned NewMode = (Mode & NewMask); 684c4d2fe2STim Corringham return Status(NewMask, NewMode); 694c4d2fe2STim Corringham } 704c4d2fe2STim Corringham 714c4d2fe2STim Corringham // produce the delta required to change the Mode to the required Mode 724c4d2fe2STim Corringham Status delta(const Status &S) const { 734c4d2fe2STim Corringham return Status((S.Mask & (Mode ^ S.Mode)) | (~Mask & S.Mask), S.Mode); 744c4d2fe2STim Corringham } 754c4d2fe2STim Corringham 764c4d2fe2STim Corringham bool operator==(const Status &S) const { 774c4d2fe2STim Corringham return (Mask == S.Mask) && (Mode == S.Mode); 784c4d2fe2STim Corringham } 794c4d2fe2STim Corringham 804c4d2fe2STim Corringham bool operator!=(const Status &S) const { return !(*this == S); } 814c4d2fe2STim Corringham 824c4d2fe2STim Corringham bool isCompatible(Status &S) { 834c4d2fe2STim Corringham return ((Mask & S.Mask) == S.Mask) && ((Mode & S.Mask) == S.Mode); 844c4d2fe2STim Corringham } 854c4d2fe2STim Corringham 864c4d2fe2STim Corringham bool isCombinable(Status &S) { 874c4d2fe2STim Corringham return !(Mask & S.Mask) || isCompatible(S); 884c4d2fe2STim Corringham } 894c4d2fe2STim Corringham }; 904c4d2fe2STim Corringham 914c4d2fe2STim Corringham class BlockData { 924c4d2fe2STim Corringham public: 934c4d2fe2STim Corringham // The Status that represents the mode register settings required by the 944c4d2fe2STim Corringham // FirstInsertionPoint (if any) in this block. Calculated in Phase 1. 954c4d2fe2STim Corringham Status Require; 964c4d2fe2STim Corringham 974c4d2fe2STim Corringham // The Status that represents the net changes to the Mode register made by 984c4d2fe2STim Corringham // this block, Calculated in Phase 1. 994c4d2fe2STim Corringham Status Change; 1004c4d2fe2STim Corringham 1014c4d2fe2STim Corringham // The Status that represents the mode register settings on exit from this 1024c4d2fe2STim Corringham // block. Calculated in Phase 2. 1034c4d2fe2STim Corringham Status Exit; 1044c4d2fe2STim Corringham 1054c4d2fe2STim Corringham // The Status that represents the intersection of exit Mode register settings 1064c4d2fe2STim Corringham // from all predecessor blocks. Calculated in Phase 2, and used by Phase 3. 1074c4d2fe2STim Corringham Status Pred; 1084c4d2fe2STim Corringham 1094c4d2fe2STim Corringham // In Phase 1 we record the first instruction that has a mode requirement, 1104c4d2fe2STim Corringham // which is used in Phase 3 if we need to insert a mode change. 1114c4d2fe2STim Corringham MachineInstr *FirstInsertionPoint; 1124c4d2fe2STim Corringham 1134c4d2fe2STim Corringham BlockData() : FirstInsertionPoint(nullptr) {}; 1144c4d2fe2STim Corringham }; 1154c4d2fe2STim Corringham 1164c4d2fe2STim Corringham namespace { 1174c4d2fe2STim Corringham 1184c4d2fe2STim Corringham class SIModeRegister : public MachineFunctionPass { 1194c4d2fe2STim Corringham public: 1204c4d2fe2STim Corringham static char ID; 1214c4d2fe2STim Corringham 1224c4d2fe2STim Corringham std::vector<std::unique_ptr<BlockData>> BlockInfo; 1234c4d2fe2STim Corringham std::queue<MachineBasicBlock *> Phase2List; 1244c4d2fe2STim Corringham 1254c4d2fe2STim Corringham // The default mode register setting currently only caters for the floating 1264c4d2fe2STim Corringham // point double precision rounding mode. 1274c4d2fe2STim Corringham // We currently assume the default rounding mode is Round to Nearest 1284c4d2fe2STim Corringham // NOTE: this should come from a per function rounding mode setting once such 1294c4d2fe2STim Corringham // a setting exists. 1304c4d2fe2STim Corringham unsigned DefaultMode = FP_ROUND_ROUND_TO_NEAREST; 1314c4d2fe2STim Corringham Status DefaultStatus = 1324c4d2fe2STim Corringham Status(FP_ROUND_MODE_DP(0x3), FP_ROUND_MODE_DP(DefaultMode)); 1334c4d2fe2STim Corringham 1344c4d2fe2STim Corringham public: 1354c4d2fe2STim Corringham SIModeRegister() : MachineFunctionPass(ID) {} 1364c4d2fe2STim Corringham 1374c4d2fe2STim Corringham bool runOnMachineFunction(MachineFunction &MF) override; 1384c4d2fe2STim Corringham 1394c4d2fe2STim Corringham void getAnalysisUsage(AnalysisUsage &AU) const override { 1404c4d2fe2STim Corringham AU.setPreservesCFG(); 1414c4d2fe2STim Corringham MachineFunctionPass::getAnalysisUsage(AU); 1424c4d2fe2STim Corringham } 1434c4d2fe2STim Corringham 1444c4d2fe2STim Corringham void processBlockPhase1(MachineBasicBlock &MBB, const SIInstrInfo *TII); 1454c4d2fe2STim Corringham 1464c4d2fe2STim Corringham void processBlockPhase2(MachineBasicBlock &MBB, const SIInstrInfo *TII); 1474c4d2fe2STim Corringham 1484c4d2fe2STim Corringham void processBlockPhase3(MachineBasicBlock &MBB, const SIInstrInfo *TII); 1494c4d2fe2STim Corringham 1504c4d2fe2STim Corringham Status getInstructionMode(MachineInstr &MI, const SIInstrInfo *TII); 1514c4d2fe2STim Corringham 1524c4d2fe2STim Corringham void insertSetreg(MachineBasicBlock &MBB, MachineInstr *I, 1534c4d2fe2STim Corringham const SIInstrInfo *TII, Status InstrMode); 1544c4d2fe2STim Corringham }; 1554c4d2fe2STim Corringham } // End anonymous namespace. 1564c4d2fe2STim Corringham 1574c4d2fe2STim Corringham INITIALIZE_PASS(SIModeRegister, DEBUG_TYPE, 1584c4d2fe2STim Corringham "Insert required mode register values", false, false) 1594c4d2fe2STim Corringham 1604c4d2fe2STim Corringham char SIModeRegister::ID = 0; 1614c4d2fe2STim Corringham 1624c4d2fe2STim Corringham char &llvm::SIModeRegisterID = SIModeRegister::ID; 1634c4d2fe2STim Corringham 1644c4d2fe2STim Corringham FunctionPass *llvm::createSIModeRegisterPass() { return new SIModeRegister(); } 1654c4d2fe2STim Corringham 1664c4d2fe2STim Corringham // Determine the Mode register setting required for this instruction. 1674c4d2fe2STim Corringham // Instructions which don't use the Mode register return a null Status. 1684c4d2fe2STim Corringham // Note this currently only deals with instructions that use the floating point 1694c4d2fe2STim Corringham // double precision setting. 1704c4d2fe2STim Corringham Status SIModeRegister::getInstructionMode(MachineInstr &MI, 1714c4d2fe2STim Corringham const SIInstrInfo *TII) { 1724c4d2fe2STim Corringham if (TII->usesFPDPRounding(MI)) { 1734c4d2fe2STim Corringham switch (MI.getOpcode()) { 1744c4d2fe2STim Corringham case AMDGPU::V_INTERP_P1LL_F16: 1754c4d2fe2STim Corringham case AMDGPU::V_INTERP_P1LV_F16: 1764c4d2fe2STim Corringham case AMDGPU::V_INTERP_P2_F16: 1774c4d2fe2STim Corringham // f16 interpolation instructions need double precision round to zero 1784c4d2fe2STim Corringham return Status(FP_ROUND_MODE_DP(3), 1794c4d2fe2STim Corringham FP_ROUND_MODE_DP(FP_ROUND_ROUND_TO_ZERO)); 1804c4d2fe2STim Corringham default: 1814c4d2fe2STim Corringham return DefaultStatus; 1824c4d2fe2STim Corringham } 1834c4d2fe2STim Corringham } 1844c4d2fe2STim Corringham return Status(); 1854c4d2fe2STim Corringham } 1864c4d2fe2STim Corringham 1874c4d2fe2STim Corringham // Insert a setreg instruction to update the Mode register. 1884c4d2fe2STim Corringham // It is possible (though unlikely) for an instruction to require a change to 1894c4d2fe2STim Corringham // the value of disjoint parts of the Mode register when we don't know the 1904c4d2fe2STim Corringham // value of the intervening bits. In that case we need to use more than one 1914c4d2fe2STim Corringham // setreg instruction. 1924c4d2fe2STim Corringham void SIModeRegister::insertSetreg(MachineBasicBlock &MBB, MachineInstr *MI, 1934c4d2fe2STim Corringham const SIInstrInfo *TII, Status InstrMode) { 1944c4d2fe2STim Corringham while (InstrMode.Mask) { 1954c4d2fe2STim Corringham unsigned Offset = countTrailingZeros<unsigned>(InstrMode.Mask); 1964c4d2fe2STim Corringham unsigned Width = countTrailingOnes<unsigned>(InstrMode.Mask >> Offset); 1974c4d2fe2STim Corringham unsigned Value = (InstrMode.Mode >> Offset) & ((1 << Width) - 1); 1984c4d2fe2STim Corringham BuildMI(MBB, MI, 0, TII->get(AMDGPU::S_SETREG_IMM32_B32)) 1994c4d2fe2STim Corringham .addImm(Value) 2004c4d2fe2STim Corringham .addImm(((Width - 1) << AMDGPU::Hwreg::WIDTH_M1_SHIFT_) | 2014c4d2fe2STim Corringham (Offset << AMDGPU::Hwreg::OFFSET_SHIFT_) | 2024c4d2fe2STim Corringham (AMDGPU::Hwreg::ID_MODE << AMDGPU::Hwreg::ID_SHIFT_)); 2034c4d2fe2STim Corringham ++NumSetregInserted; 2042faadb15STim Corringham InstrMode.Mask &= ~(((1 << Width) - 1) << Offset); 2054c4d2fe2STim Corringham } 2064c4d2fe2STim Corringham } 2074c4d2fe2STim Corringham 2084c4d2fe2STim Corringham // In Phase 1 we iterate through the instructions of the block and for each 2094c4d2fe2STim Corringham // instruction we get its mode usage. If the instruction uses the Mode register 2104c4d2fe2STim Corringham // we: 2114c4d2fe2STim Corringham // - update the Change status, which tracks the changes to the Mode register 2124c4d2fe2STim Corringham // made by this block 2134c4d2fe2STim Corringham // - if this instruction's requirements are compatible with the current setting 2144c4d2fe2STim Corringham // of the Mode register we merge the modes 2154c4d2fe2STim Corringham // - if it isn't compatible and an InsertionPoint isn't set, then we set the 2164c4d2fe2STim Corringham // InsertionPoint to the current instruction, and we remember the current 2174c4d2fe2STim Corringham // mode 2184c4d2fe2STim Corringham // - if it isn't compatible and InsertionPoint is set we insert a seteg before 2194c4d2fe2STim Corringham // that instruction (unless this instruction forms part of the block's 2204c4d2fe2STim Corringham // entry requirements in which case the insertion is deferred until Phase 3 2214c4d2fe2STim Corringham // when predecessor exit values are known), and move the insertion point to 2224c4d2fe2STim Corringham // this instruction 2234c4d2fe2STim Corringham // - if this is a setreg instruction we treat it as an incompatible instruction. 2244c4d2fe2STim Corringham // This is sub-optimal but avoids some nasty corner cases, and is expected to 2254c4d2fe2STim Corringham // occur very rarely. 2264c4d2fe2STim Corringham // - on exit we have set the Require, Change, and initial Exit modes. 2274c4d2fe2STim Corringham void SIModeRegister::processBlockPhase1(MachineBasicBlock &MBB, 2284c4d2fe2STim Corringham const SIInstrInfo *TII) { 229*0eaee545SJonas Devlieghere auto NewInfo = std::make_unique<BlockData>(); 2304c4d2fe2STim Corringham MachineInstr *InsertionPoint = nullptr; 2314c4d2fe2STim Corringham // RequirePending is used to indicate whether we are collecting the initial 2324c4d2fe2STim Corringham // requirements for the block, and need to defer the first InsertionPoint to 2334c4d2fe2STim Corringham // Phase 3. It is set to false once we have set FirstInsertionPoint, or when 2344c4d2fe2STim Corringham // we discover an explict setreg that means this block doesn't have any 2354c4d2fe2STim Corringham // initial requirements. 2364c4d2fe2STim Corringham bool RequirePending = true; 2374c4d2fe2STim Corringham Status IPChange; 2384c4d2fe2STim Corringham for (MachineInstr &MI : MBB) { 2394c4d2fe2STim Corringham Status InstrMode = getInstructionMode(MI, TII); 2404c4d2fe2STim Corringham if ((MI.getOpcode() == AMDGPU::S_SETREG_B32) || 2414c4d2fe2STim Corringham (MI.getOpcode() == AMDGPU::S_SETREG_IMM32_B32)) { 2424c4d2fe2STim Corringham // We preserve any explicit mode register setreg instruction we encounter, 2434c4d2fe2STim Corringham // as we assume it has been inserted by a higher authority (this is 2444c4d2fe2STim Corringham // likely to be a very rare occurrence). 2454c4d2fe2STim Corringham unsigned Dst = TII->getNamedOperand(MI, AMDGPU::OpName::simm16)->getImm(); 2464c4d2fe2STim Corringham if (((Dst & AMDGPU::Hwreg::ID_MASK_) >> AMDGPU::Hwreg::ID_SHIFT_) != 2474c4d2fe2STim Corringham AMDGPU::Hwreg::ID_MODE) 2484c4d2fe2STim Corringham continue; 2494c4d2fe2STim Corringham 2504c4d2fe2STim Corringham unsigned Width = ((Dst & AMDGPU::Hwreg::WIDTH_M1_MASK_) >> 2514c4d2fe2STim Corringham AMDGPU::Hwreg::WIDTH_M1_SHIFT_) + 2524c4d2fe2STim Corringham 1; 2534c4d2fe2STim Corringham unsigned Offset = 2544c4d2fe2STim Corringham (Dst & AMDGPU::Hwreg::OFFSET_MASK_) >> AMDGPU::Hwreg::OFFSET_SHIFT_; 2554c4d2fe2STim Corringham unsigned Mask = ((1 << Width) - 1) << Offset; 2564c4d2fe2STim Corringham 2574c4d2fe2STim Corringham // If an InsertionPoint is set we will insert a setreg there. 2584c4d2fe2STim Corringham if (InsertionPoint) { 2594c4d2fe2STim Corringham insertSetreg(MBB, InsertionPoint, TII, IPChange.delta(NewInfo->Change)); 2604c4d2fe2STim Corringham InsertionPoint = nullptr; 2614c4d2fe2STim Corringham } 2624c4d2fe2STim Corringham // If this is an immediate then we know the value being set, but if it is 2634c4d2fe2STim Corringham // not an immediate then we treat the modified bits of the mode register 2644c4d2fe2STim Corringham // as unknown. 2654c4d2fe2STim Corringham if (MI.getOpcode() == AMDGPU::S_SETREG_IMM32_B32) { 2664c4d2fe2STim Corringham unsigned Val = TII->getNamedOperand(MI, AMDGPU::OpName::imm)->getImm(); 2674c4d2fe2STim Corringham unsigned Mode = (Val << Offset) & Mask; 2684c4d2fe2STim Corringham Status Setreg = Status(Mask, Mode); 2694c4d2fe2STim Corringham // If we haven't already set the initial requirements for the block we 2704c4d2fe2STim Corringham // don't need to as the requirements start from this explicit setreg. 2714c4d2fe2STim Corringham RequirePending = false; 2724c4d2fe2STim Corringham NewInfo->Change = NewInfo->Change.merge(Setreg); 2734c4d2fe2STim Corringham } else { 2744c4d2fe2STim Corringham NewInfo->Change = NewInfo->Change.mergeUnknown(Mask); 2754c4d2fe2STim Corringham } 2764c4d2fe2STim Corringham } else if (!NewInfo->Change.isCompatible(InstrMode)) { 2774c4d2fe2STim Corringham // This instruction uses the Mode register and its requirements aren't 2784c4d2fe2STim Corringham // compatible with the current mode. 2794c4d2fe2STim Corringham if (InsertionPoint) { 2804c4d2fe2STim Corringham // If the required mode change cannot be included in the current 2814c4d2fe2STim Corringham // InsertionPoint changes, we need a setreg and start a new 2824c4d2fe2STim Corringham // InsertionPoint. 2834c4d2fe2STim Corringham if (!IPChange.delta(NewInfo->Change).isCombinable(InstrMode)) { 2844c4d2fe2STim Corringham if (RequirePending) { 2854c4d2fe2STim Corringham // This is the first insertionPoint in the block so we will defer 2864c4d2fe2STim Corringham // the insertion of the setreg to Phase 3 where we know whether or 2874c4d2fe2STim Corringham // not it is actually needed. 2884c4d2fe2STim Corringham NewInfo->FirstInsertionPoint = InsertionPoint; 2894c4d2fe2STim Corringham NewInfo->Require = NewInfo->Change; 2904c4d2fe2STim Corringham RequirePending = false; 2914c4d2fe2STim Corringham } else { 2924c4d2fe2STim Corringham insertSetreg(MBB, InsertionPoint, TII, 2934c4d2fe2STim Corringham IPChange.delta(NewInfo->Change)); 2944c4d2fe2STim Corringham IPChange = NewInfo->Change; 2954c4d2fe2STim Corringham } 2964c4d2fe2STim Corringham // Set the new InsertionPoint 2974c4d2fe2STim Corringham InsertionPoint = &MI; 2984c4d2fe2STim Corringham } 2994c4d2fe2STim Corringham NewInfo->Change = NewInfo->Change.merge(InstrMode); 3004c4d2fe2STim Corringham } else { 3014c4d2fe2STim Corringham // No InsertionPoint is currently set - this is either the first in 3024c4d2fe2STim Corringham // the block or we have previously seen an explicit setreg. 3034c4d2fe2STim Corringham InsertionPoint = &MI; 3044c4d2fe2STim Corringham IPChange = NewInfo->Change; 3054c4d2fe2STim Corringham NewInfo->Change = NewInfo->Change.merge(InstrMode); 3064c4d2fe2STim Corringham } 3074c4d2fe2STim Corringham } 3084c4d2fe2STim Corringham } 3094c4d2fe2STim Corringham if (RequirePending) { 3104c4d2fe2STim Corringham // If we haven't yet set the initial requirements for the block we set them 3114c4d2fe2STim Corringham // now. 3124c4d2fe2STim Corringham NewInfo->FirstInsertionPoint = InsertionPoint; 3134c4d2fe2STim Corringham NewInfo->Require = NewInfo->Change; 3144c4d2fe2STim Corringham } else if (InsertionPoint) { 3154c4d2fe2STim Corringham // We need to insert a setreg at the InsertionPoint 3164c4d2fe2STim Corringham insertSetreg(MBB, InsertionPoint, TII, IPChange.delta(NewInfo->Change)); 3174c4d2fe2STim Corringham } 3184c4d2fe2STim Corringham NewInfo->Exit = NewInfo->Change; 3194c4d2fe2STim Corringham BlockInfo[MBB.getNumber()] = std::move(NewInfo); 3204c4d2fe2STim Corringham } 3214c4d2fe2STim Corringham 3224c4d2fe2STim Corringham // In Phase 2 we revisit each block and calculate the common Mode register 3234c4d2fe2STim Corringham // value provided by all predecessor blocks. If the Exit value for the block 3244c4d2fe2STim Corringham // is changed, then we add the successor blocks to the worklist so that the 3254c4d2fe2STim Corringham // exit value is propagated. 3264c4d2fe2STim Corringham void SIModeRegister::processBlockPhase2(MachineBasicBlock &MBB, 3274c4d2fe2STim Corringham const SIInstrInfo *TII) { 3284c4d2fe2STim Corringham // BlockData *BI = BlockInfo[MBB.getNumber()]; 3294c4d2fe2STim Corringham unsigned ThisBlock = MBB.getNumber(); 3304c4d2fe2STim Corringham if (MBB.pred_empty()) { 3314c4d2fe2STim Corringham // There are no predecessors, so use the default starting status. 3324c4d2fe2STim Corringham BlockInfo[ThisBlock]->Pred = DefaultStatus; 3334c4d2fe2STim Corringham } else { 3344c4d2fe2STim Corringham // Build a status that is common to all the predecessors by intersecting 3354c4d2fe2STim Corringham // all the predecessor exit status values. 3364c4d2fe2STim Corringham MachineBasicBlock::pred_iterator P = MBB.pred_begin(), E = MBB.pred_end(); 3374c4d2fe2STim Corringham MachineBasicBlock &PB = *(*P); 3384c4d2fe2STim Corringham BlockInfo[ThisBlock]->Pred = BlockInfo[PB.getNumber()]->Exit; 3394c4d2fe2STim Corringham 3404c4d2fe2STim Corringham for (P = std::next(P); P != E; P = std::next(P)) { 3414c4d2fe2STim Corringham MachineBasicBlock *Pred = *P; 3424c4d2fe2STim Corringham BlockInfo[ThisBlock]->Pred = BlockInfo[ThisBlock]->Pred.intersect(BlockInfo[Pred->getNumber()]->Exit); 3434c4d2fe2STim Corringham } 3444c4d2fe2STim Corringham } 3454c4d2fe2STim Corringham Status TmpStatus = BlockInfo[ThisBlock]->Pred.merge(BlockInfo[ThisBlock]->Change); 3464c4d2fe2STim Corringham if (BlockInfo[ThisBlock]->Exit != TmpStatus) { 3474c4d2fe2STim Corringham BlockInfo[ThisBlock]->Exit = TmpStatus; 3484c4d2fe2STim Corringham // Add the successors to the work list so we can propagate the changed exit 3494c4d2fe2STim Corringham // status. 3504c4d2fe2STim Corringham for (MachineBasicBlock::succ_iterator S = MBB.succ_begin(), 3514c4d2fe2STim Corringham E = MBB.succ_end(); 3524c4d2fe2STim Corringham S != E; S = std::next(S)) { 3534c4d2fe2STim Corringham MachineBasicBlock &B = *(*S); 3544c4d2fe2STim Corringham Phase2List.push(&B); 3554c4d2fe2STim Corringham } 3564c4d2fe2STim Corringham } 3574c4d2fe2STim Corringham } 3584c4d2fe2STim Corringham 3594c4d2fe2STim Corringham // In Phase 3 we revisit each block and if it has an insertion point defined we 3604c4d2fe2STim Corringham // check whether the predecessor mode meets the block's entry requirements. If 3614c4d2fe2STim Corringham // not we insert an appropriate setreg instruction to modify the Mode register. 3624c4d2fe2STim Corringham void SIModeRegister::processBlockPhase3(MachineBasicBlock &MBB, 3634c4d2fe2STim Corringham const SIInstrInfo *TII) { 3644c4d2fe2STim Corringham // BlockData *BI = BlockInfo[MBB.getNumber()]; 3654c4d2fe2STim Corringham unsigned ThisBlock = MBB.getNumber(); 3664c4d2fe2STim Corringham if (!BlockInfo[ThisBlock]->Pred.isCompatible(BlockInfo[ThisBlock]->Require)) { 3674c4d2fe2STim Corringham Status Delta = BlockInfo[ThisBlock]->Pred.delta(BlockInfo[ThisBlock]->Require); 3684c4d2fe2STim Corringham if (BlockInfo[ThisBlock]->FirstInsertionPoint) 3694c4d2fe2STim Corringham insertSetreg(MBB, BlockInfo[ThisBlock]->FirstInsertionPoint, TII, Delta); 3704c4d2fe2STim Corringham else 3714c4d2fe2STim Corringham insertSetreg(MBB, &MBB.instr_front(), TII, Delta); 3724c4d2fe2STim Corringham } 3734c4d2fe2STim Corringham } 3744c4d2fe2STim Corringham 3754c4d2fe2STim Corringham bool SIModeRegister::runOnMachineFunction(MachineFunction &MF) { 3764c4d2fe2STim Corringham BlockInfo.resize(MF.getNumBlockIDs()); 3774c4d2fe2STim Corringham const GCNSubtarget &ST = MF.getSubtarget<GCNSubtarget>(); 3784c4d2fe2STim Corringham const SIInstrInfo *TII = ST.getInstrInfo(); 3794c4d2fe2STim Corringham 3804c4d2fe2STim Corringham // Processing is performed in a number of phases 3814c4d2fe2STim Corringham 3824c4d2fe2STim Corringham // Phase 1 - determine the initial mode required by each block, and add setreg 3834c4d2fe2STim Corringham // instructions for intra block requirements. 3844c4d2fe2STim Corringham for (MachineBasicBlock &BB : MF) 3854c4d2fe2STim Corringham processBlockPhase1(BB, TII); 3864c4d2fe2STim Corringham 3874c4d2fe2STim Corringham // Phase 2 - determine the exit mode from each block. We add all blocks to the 3884c4d2fe2STim Corringham // list here, but will also add any that need to be revisited during Phase 2 3894c4d2fe2STim Corringham // processing. 3904c4d2fe2STim Corringham for (MachineBasicBlock &BB : MF) 3914c4d2fe2STim Corringham Phase2List.push(&BB); 3924c4d2fe2STim Corringham while (!Phase2List.empty()) { 3934c4d2fe2STim Corringham processBlockPhase2(*Phase2List.front(), TII); 3944c4d2fe2STim Corringham Phase2List.pop(); 3954c4d2fe2STim Corringham } 3964c4d2fe2STim Corringham 3974c4d2fe2STim Corringham // Phase 3 - add an initial setreg to each block where the required entry mode 3984c4d2fe2STim Corringham // is not satisfied by the exit mode of all its predecessors. 3994c4d2fe2STim Corringham for (MachineBasicBlock &BB : MF) 4004c4d2fe2STim Corringham processBlockPhase3(BB, TII); 4014c4d2fe2STim Corringham 4024c4d2fe2STim Corringham BlockInfo.clear(); 4034c4d2fe2STim Corringham 4044c4d2fe2STim Corringham return NumSetregInserted > 0; 4054c4d2fe2STim Corringham } 406