1 //=== X86CallingConv.cpp - X86 Custom Calling Convention Impl -*- C++ -*-===// 2 // 3 // The LLVM Compiler Infrastructure 4 // 5 // This file is distributed under the University of Illinois Open Source 6 // License. See LICENSE.TXT for details. 7 // 8 //===----------------------------------------------------------------------===// 9 // 10 // This file contains the implementation of custom routines for the X86 11 // Calling Convention that aren't done by tablegen. 12 // 13 //===----------------------------------------------------------------------===// 14 15 #include "X86CallingConv.h" 16 #include "X86Subtarget.h" 17 #include "llvm/ADT/SmallVector.h" 18 #include "llvm/CodeGen/CallingConvLower.h" 19 #include "llvm/IR/CallingConv.h" 20 21 using namespace llvm; 22 23 /// When regcall calling convention compiled to 32 bit arch, special treatment 24 /// is required for 64 bit masks. 25 /// The value should be assigned to two GPRs. 26 /// \return true if registers were allocated and false otherwise. 27 static bool CC_X86_32_RegCall_Assign2Regs(unsigned &ValNo, MVT &ValVT, 28 MVT &LocVT, 29 CCValAssign::LocInfo &LocInfo, 30 ISD::ArgFlagsTy &ArgFlags, 31 CCState &State) { 32 // List of GPR registers that are available to store values in regcall 33 // calling convention. 34 static const MCPhysReg RegList[] = {X86::EAX, X86::ECX, X86::EDX, X86::EDI, 35 X86::ESI}; 36 37 // The vector will save all the available registers for allocation. 38 SmallVector<unsigned, 5> AvailableRegs; 39 40 // searching for the available registers. 41 for (auto Reg : RegList) { 42 if (!State.isAllocated(Reg)) 43 AvailableRegs.push_back(Reg); 44 } 45 46 const size_t RequiredGprsUponSplit = 2; 47 if (AvailableRegs.size() < RequiredGprsUponSplit) 48 return false; // Not enough free registers - continue the search. 49 50 // Allocating the available registers. 51 for (unsigned I = 0; I < RequiredGprsUponSplit; I++) { 52 53 // Marking the register as located. 54 unsigned Reg = State.AllocateReg(AvailableRegs[I]); 55 56 // Since we previously made sure that 2 registers are available 57 // we expect that a real register number will be returned. 58 assert(Reg && "Expecting a register will be available"); 59 60 // Assign the value to the allocated register 61 State.addLoc(CCValAssign::getCustomReg(ValNo, ValVT, Reg, LocVT, LocInfo)); 62 } 63 64 // Successful in allocating regsiters - stop scanning next rules. 65 return true; 66 } 67 68 static ArrayRef<MCPhysReg> CC_X86_VectorCallGetSSEs(const MVT &ValVT) { 69 if (ValVT.is512BitVector()) { 70 static const MCPhysReg RegListZMM[] = {X86::ZMM0, X86::ZMM1, X86::ZMM2, 71 X86::ZMM3, X86::ZMM4, X86::ZMM5}; 72 return makeArrayRef(std::begin(RegListZMM), std::end(RegListZMM)); 73 } 74 75 if (ValVT.is256BitVector()) { 76 static const MCPhysReg RegListYMM[] = {X86::YMM0, X86::YMM1, X86::YMM2, 77 X86::YMM3, X86::YMM4, X86::YMM5}; 78 return makeArrayRef(std::begin(RegListYMM), std::end(RegListYMM)); 79 } 80 81 static const MCPhysReg RegListXMM[] = {X86::XMM0, X86::XMM1, X86::XMM2, 82 X86::XMM3, X86::XMM4, X86::XMM5}; 83 return makeArrayRef(std::begin(RegListXMM), std::end(RegListXMM)); 84 } 85 86 static ArrayRef<MCPhysReg> CC_X86_64_VectorCallGetGPRs() { 87 static const MCPhysReg RegListGPR[] = {X86::RCX, X86::RDX, X86::R8, X86::R9}; 88 return makeArrayRef(std::begin(RegListGPR), std::end(RegListGPR)); 89 } 90 91 static bool CC_X86_VectorCallAssignRegister(unsigned &ValNo, MVT &ValVT, 92 MVT &LocVT, 93 CCValAssign::LocInfo &LocInfo, 94 ISD::ArgFlagsTy &ArgFlags, 95 CCState &State) { 96 97 ArrayRef<MCPhysReg> RegList = CC_X86_VectorCallGetSSEs(ValVT); 98 bool Is64bit = static_cast<const X86Subtarget &>( 99 State.getMachineFunction().getSubtarget()) 100 .is64Bit(); 101 102 for (auto Reg : RegList) { 103 // If the register is not marked as allocated - assign to it. 104 if (!State.isAllocated(Reg)) { 105 unsigned AssigedReg = State.AllocateReg(Reg); 106 assert(AssigedReg == Reg && "Expecting a valid register allocation"); 107 State.addLoc( 108 CCValAssign::getReg(ValNo, ValVT, AssigedReg, LocVT, LocInfo)); 109 return true; 110 } 111 // If the register is marked as shadow allocated - assign to it. 112 if (Is64bit && State.IsShadowAllocatedReg(Reg)) { 113 State.addLoc(CCValAssign::getReg(ValNo, ValVT, Reg, LocVT, LocInfo)); 114 return true; 115 } 116 } 117 118 llvm_unreachable("Clang should ensure that hva marked vectors will have " 119 "an available register."); 120 return false; 121 } 122 123 /// Vectorcall calling convention has special handling for vector types or 124 /// HVA for 64 bit arch. 125 /// For HVAs shadow registers might be allocated on the first pass 126 /// and actual XMM registers are allocated on the second pass. 127 /// For vector types, actual XMM registers are allocated on the first pass. 128 /// \return true if registers were allocated and false otherwise. 129 static bool CC_X86_64_VectorCall(unsigned &ValNo, MVT &ValVT, MVT &LocVT, 130 CCValAssign::LocInfo &LocInfo, 131 ISD::ArgFlagsTy &ArgFlags, CCState &State) { 132 // On the second pass, go through the HVAs only. 133 if (ArgFlags.isSecArgPass()) { 134 if (ArgFlags.isHva()) 135 return CC_X86_VectorCallAssignRegister(ValNo, ValVT, LocVT, LocInfo, 136 ArgFlags, State); 137 return true; 138 } 139 140 // Process only vector types as defined by vectorcall spec: 141 // "A vector type is either a floating-point type, for example, 142 // a float or double, or an SIMD vector type, for example, __m128 or __m256". 143 if (!(ValVT.isFloatingPoint() || 144 (ValVT.isVector() && ValVT.getSizeInBits() >= 128))) { 145 // If R9 was already assigned it means that we are after the fourth element 146 // and because this is not an HVA / Vector type, we need to allocate 147 // shadow XMM register. 148 if (State.isAllocated(X86::R9)) { 149 // Assign shadow XMM register. 150 (void)State.AllocateReg(CC_X86_VectorCallGetSSEs(ValVT)); 151 } 152 153 return false; 154 } 155 156 if (!ArgFlags.isHva() || ArgFlags.isHvaStart()) { 157 // Assign shadow GPR register. 158 (void)State.AllocateReg(CC_X86_64_VectorCallGetGPRs()); 159 160 // Assign XMM register - (shadow for HVA and non-shadow for non HVA). 161 if (unsigned Reg = State.AllocateReg(CC_X86_VectorCallGetSSEs(ValVT))) { 162 // In Vectorcall Calling convention, additional shadow stack can be 163 // created on top of the basic 32 bytes of win64. 164 // It can happen if the fifth or sixth argument is vector type or HVA. 165 // At that case for each argument a shadow stack of 8 bytes is allocated. 166 if (Reg == X86::XMM4 || Reg == X86::XMM5) 167 State.AllocateStack(8, 8); 168 169 if (!ArgFlags.isHva()) { 170 State.addLoc(CCValAssign::getReg(ValNo, ValVT, Reg, LocVT, LocInfo)); 171 return true; // Allocated a register - Stop the search. 172 } 173 } 174 } 175 176 // If this is an HVA - Stop the search, 177 // otherwise continue the search. 178 return ArgFlags.isHva(); 179 } 180 181 /// Vectorcall calling convention has special handling for vector types or 182 /// HVA for 32 bit arch. 183 /// For HVAs actual XMM registers are allocated on the second pass. 184 /// For vector types, actual XMM registers are allocated on the first pass. 185 /// \return true if registers were allocated and false otherwise. 186 static bool CC_X86_32_VectorCall(unsigned &ValNo, MVT &ValVT, MVT &LocVT, 187 CCValAssign::LocInfo &LocInfo, 188 ISD::ArgFlagsTy &ArgFlags, CCState &State) { 189 // On the second pass, go through the HVAs only. 190 if (ArgFlags.isSecArgPass()) { 191 if (ArgFlags.isHva()) 192 return CC_X86_VectorCallAssignRegister(ValNo, ValVT, LocVT, LocInfo, 193 ArgFlags, State); 194 return true; 195 } 196 197 // Process only vector types as defined by vectorcall spec: 198 // "A vector type is either a floating point type, for example, 199 // a float or double, or an SIMD vector type, for example, __m128 or __m256". 200 if (!(ValVT.isFloatingPoint() || 201 (ValVT.isVector() && ValVT.getSizeInBits() >= 128))) { 202 return false; 203 } 204 205 if (ArgFlags.isHva()) 206 return true; // If this is an HVA - Stop the search. 207 208 // Assign XMM register. 209 if (unsigned Reg = State.AllocateReg(CC_X86_VectorCallGetSSEs(ValVT))) { 210 State.addLoc(CCValAssign::getReg(ValNo, ValVT, Reg, LocVT, LocInfo)); 211 return true; 212 } 213 214 // In case we did not find an available XMM register for a vector - 215 // pass it indirectly. 216 // It is similar to CCPassIndirect, with the addition of inreg. 217 if (!ValVT.isFloatingPoint()) { 218 LocVT = MVT::i32; 219 LocInfo = CCValAssign::Indirect; 220 ArgFlags.setInReg(); 221 } 222 223 return false; // No register was assigned - Continue the search. 224 } 225 226 static bool CC_X86_AnyReg_Error(unsigned &, MVT &, MVT &, 227 CCValAssign::LocInfo &, ISD::ArgFlagsTy &, 228 CCState &) { 229 llvm_unreachable("The AnyReg calling convention is only supported by the " 230 "stackmap and patchpoint intrinsics."); 231 // gracefully fallback to X86 C calling convention on Release builds. 232 return false; 233 } 234 235 static bool CC_X86_32_MCUInReg(unsigned &ValNo, MVT &ValVT, MVT &LocVT, 236 CCValAssign::LocInfo &LocInfo, 237 ISD::ArgFlagsTy &ArgFlags, CCState &State) { 238 // This is similar to CCAssignToReg<[EAX, EDX, ECX]>, but makes sure 239 // not to split i64 and double between a register and stack 240 static const MCPhysReg RegList[] = {X86::EAX, X86::EDX, X86::ECX}; 241 static const unsigned NumRegs = sizeof(RegList) / sizeof(RegList[0]); 242 243 SmallVectorImpl<CCValAssign> &PendingMembers = State.getPendingLocs(); 244 245 // If this is the first part of an double/i64/i128, or if we're already 246 // in the middle of a split, add to the pending list. If this is not 247 // the end of the split, return, otherwise go on to process the pending 248 // list 249 if (ArgFlags.isSplit() || !PendingMembers.empty()) { 250 PendingMembers.push_back( 251 CCValAssign::getPending(ValNo, ValVT, LocVT, LocInfo)); 252 if (!ArgFlags.isSplitEnd()) 253 return true; 254 } 255 256 // If there are no pending members, we are not in the middle of a split, 257 // so do the usual inreg stuff. 258 if (PendingMembers.empty()) { 259 if (unsigned Reg = State.AllocateReg(RegList)) { 260 State.addLoc(CCValAssign::getReg(ValNo, ValVT, Reg, LocVT, LocInfo)); 261 return true; 262 } 263 return false; 264 } 265 266 assert(ArgFlags.isSplitEnd()); 267 268 // We now have the entire original argument in PendingMembers, so decide 269 // whether to use registers or the stack. 270 // Per the MCU ABI: 271 // a) To use registers, we need to have enough of them free to contain 272 // the entire argument. 273 // b) We never want to use more than 2 registers for a single argument. 274 275 unsigned FirstFree = State.getFirstUnallocated(RegList); 276 bool UseRegs = PendingMembers.size() <= std::min(2U, NumRegs - FirstFree); 277 278 for (auto &It : PendingMembers) { 279 if (UseRegs) 280 It.convertToReg(State.AllocateReg(RegList[FirstFree++])); 281 else 282 It.convertToMem(State.AllocateStack(4, 4)); 283 State.addLoc(It); 284 } 285 286 PendingMembers.clear(); 287 288 return true; 289 } 290 291 // Provides entry points of CC_X86 and RetCC_X86. 292 #include "X86GenCallingConv.inc" 293