1 //===-- X86InstrFMA3Info.cpp - X86 FMA3 Instruction Information -----------===// 2 // 3 // The LLVM Compiler Infrastructure 4 // 5 // This file is distributed under the University of Illinois Open Source 6 // License. See LICENSE.TXT for details. 7 // 8 //===----------------------------------------------------------------------===// 9 // 10 // This file contains the implementation of the classes providing information 11 // about existing X86 FMA3 opcodes, classifying and grouping them. 12 // 13 //===----------------------------------------------------------------------===// 14 15 #include "X86InstrFMA3Info.h" 16 #include "X86InstrInfo.h" 17 #include "llvm/Support/ManagedStatic.h" 18 #include "llvm/Support/Threading.h" 19 using namespace llvm; 20 21 /// This flag is used in the method llvm::call_once() used below to make the 22 /// initialization of the map 'OpcodeToGroup' thread safe. 23 LLVM_DEFINE_ONCE_FLAG(InitGroupsOnceFlag); 24 25 static ManagedStatic<X86InstrFMA3Info> X86InstrFMA3InfoObj; 26 X86InstrFMA3Info *X86InstrFMA3Info::getX86InstrFMA3Info() { 27 return &*X86InstrFMA3InfoObj; 28 } 29 30 void X86InstrFMA3Info::initRMGroup(const uint16_t *RegOpcodes, 31 const uint16_t *MemOpcodes, unsigned Attr) { 32 // Create a new instance of this class that would hold a group of FMA opcodes. 33 X86InstrFMA3Group *G = new X86InstrFMA3Group(RegOpcodes, MemOpcodes, Attr); 34 35 // Add the references from indvidual opcodes to the group holding them. 36 assert((!OpcodeToGroup[RegOpcodes[0]] && !OpcodeToGroup[RegOpcodes[1]] && 37 !OpcodeToGroup[RegOpcodes[2]] && !OpcodeToGroup[MemOpcodes[0]] && 38 !OpcodeToGroup[MemOpcodes[1]] && !OpcodeToGroup[MemOpcodes[2]]) && 39 "Duplication or rewrite of elements in OpcodeToGroup."); 40 OpcodeToGroup[RegOpcodes[0]] = G; 41 OpcodeToGroup[RegOpcodes[1]] = G; 42 OpcodeToGroup[RegOpcodes[2]] = G; 43 OpcodeToGroup[MemOpcodes[0]] = G; 44 OpcodeToGroup[MemOpcodes[1]] = G; 45 OpcodeToGroup[MemOpcodes[2]] = G; 46 } 47 48 void X86InstrFMA3Info::initRGroup(const uint16_t *RegOpcodes, unsigned Attr) { 49 // Create a new instance of this class that would hold a group of FMA opcodes. 50 X86InstrFMA3Group *G = new X86InstrFMA3Group(RegOpcodes, nullptr, Attr); 51 52 // Add the references from indvidual opcodes to the group holding them. 53 assert((!OpcodeToGroup[RegOpcodes[0]] && !OpcodeToGroup[RegOpcodes[1]] && 54 !OpcodeToGroup[RegOpcodes[2]]) && 55 "Duplication or rewrite of elements in OpcodeToGroup."); 56 OpcodeToGroup[RegOpcodes[0]] = G; 57 OpcodeToGroup[RegOpcodes[1]] = G; 58 OpcodeToGroup[RegOpcodes[2]] = G; 59 } 60 61 void X86InstrFMA3Info::initMGroup(const uint16_t *MemOpcodes, unsigned Attr) { 62 // Create a new instance of this class that would hold a group of FMA opcodes. 63 X86InstrFMA3Group *G = new X86InstrFMA3Group(nullptr, MemOpcodes, Attr); 64 65 // Add the references from indvidual opcodes to the group holding them. 66 assert((!OpcodeToGroup[MemOpcodes[0]] && !OpcodeToGroup[MemOpcodes[1]] && 67 !OpcodeToGroup[MemOpcodes[2]]) && 68 "Duplication or rewrite of elements in OpcodeToGroup."); 69 OpcodeToGroup[MemOpcodes[0]] = G; 70 OpcodeToGroup[MemOpcodes[1]] = G; 71 OpcodeToGroup[MemOpcodes[2]] = G; 72 } 73 74 #define FMA3RM(R132, R213, R231, M132, M213, M231) \ 75 static const uint16_t Reg##R132[3] = {X86::R132, X86::R213, X86::R231}; \ 76 static const uint16_t Mem##R132[3] = {X86::M132, X86::M213, X86::M231}; \ 77 initRMGroup(Reg##R132, Mem##R132); 78 79 #define FMA3RMA(R132, R213, R231, M132, M213, M231, Attrs) \ 80 static const uint16_t Reg##R132[3] = {X86::R132, X86::R213, X86::R231}; \ 81 static const uint16_t Mem##R132[3] = {X86::M132, X86::M213, X86::M231}; \ 82 initRMGroup(Reg##R132, Mem##R132, (Attrs)); 83 84 #define FMA3R(R132, R213, R231) \ 85 static const uint16_t Reg##R132[3] = {X86::R132, X86::R213, X86::R231}; \ 86 initRGroup(Reg##R132); 87 88 #define FMA3RA(R132, R213, R231, Attrs) \ 89 static const uint16_t Reg##R132[3] = {X86::R132, X86::R213, X86::R231}; \ 90 initRGroup(Reg##R132, (Attrs)); 91 92 #define FMA3M(M132, M213, M231) \ 93 static const uint16_t Mem##M132[3] = {X86::M132, X86::M213, X86::M231}; \ 94 initMGroup(Mem##M132); 95 96 #define FMA3MA(M132, M213, M231, Attrs) \ 97 static const uint16_t Mem##M132[3] = {X86::M132, X86::M213, X86::M231}; \ 98 initMGroup(Mem##M132, (Attrs)); 99 100 #define FMA3_AVX2_VECTOR_GROUP(Name) \ 101 FMA3RM(Name##132PSr, Name##213PSr, Name##231PSr, \ 102 Name##132PSm, Name##213PSm, Name##231PSm); \ 103 FMA3RM(Name##132PDr, Name##213PDr, Name##231PDr, \ 104 Name##132PDm, Name##213PDm, Name##231PDm); \ 105 FMA3RM(Name##132PSYr, Name##213PSYr, Name##231PSYr, \ 106 Name##132PSYm, Name##213PSYm, Name##231PSYm); \ 107 FMA3RM(Name##132PDYr, Name##213PDYr, Name##231PDYr, \ 108 Name##132PDYm, Name##213PDYm, Name##231PDYm); 109 110 #define FMA3_AVX2_SCALAR_GROUP(Name) \ 111 FMA3RM(Name##132SSr, Name##213SSr, Name##231SSr, \ 112 Name##132SSm, Name##213SSm, Name##231SSm); \ 113 FMA3RM(Name##132SDr, Name##213SDr, Name##231SDr, \ 114 Name##132SDm, Name##213SDm, Name##231SDm); \ 115 FMA3RMA(Name##132SSr_Int, Name##213SSr_Int, Name##231SSr_Int, \ 116 Name##132SSm_Int, Name##213SSm_Int, Name##231SSm_Int, \ 117 X86InstrFMA3Group::X86FMA3Intrinsic); \ 118 FMA3RMA(Name##132SDr_Int, Name##213SDr_Int, Name##231SDr_Int, \ 119 Name##132SDm_Int, Name##213SDm_Int, Name##231SDm_Int, \ 120 X86InstrFMA3Group::X86FMA3Intrinsic); 121 122 #define FMA3_AVX2_FULL_GROUP(Name) \ 123 FMA3_AVX2_VECTOR_GROUP(Name); \ 124 FMA3_AVX2_SCALAR_GROUP(Name); 125 126 #define FMA3_AVX512_VECTOR_GROUP(Name) \ 127 FMA3RM(Name##132PSZ128r, Name##213PSZ128r, Name##231PSZ128r, \ 128 Name##132PSZ128m, Name##213PSZ128m, Name##231PSZ128m); \ 129 FMA3RM(Name##132PDZ128r, Name##213PDZ128r, Name##231PDZ128r, \ 130 Name##132PDZ128m, Name##213PDZ128m, Name##231PDZ128m); \ 131 FMA3RM(Name##132PSZ256r, Name##213PSZ256r, Name##231PSZ256r, \ 132 Name##132PSZ256m, Name##213PSZ256m, Name##231PSZ256m); \ 133 FMA3RM(Name##132PDZ256r, Name##213PDZ256r, Name##231PDZ256r, \ 134 Name##132PDZ256m, Name##213PDZ256m, Name##231PDZ256m); \ 135 FMA3RM(Name##132PSZr, Name##213PSZr, Name##231PSZr, \ 136 Name##132PSZm, Name##213PSZm, Name##231PSZm); \ 137 FMA3RM(Name##132PDZr, Name##213PDZr, Name##231PDZr, \ 138 Name##132PDZm, Name##213PDZm, Name##231PDZm); \ 139 FMA3RMA(Name##132PSZ128rk, Name##213PSZ128rk, Name##231PSZ128rk, \ 140 Name##132PSZ128mk, Name##213PSZ128mk, Name##231PSZ128mk, \ 141 X86InstrFMA3Group::X86FMA3KMergeMasked); \ 142 FMA3RMA(Name##132PDZ128rk, Name##213PDZ128rk, Name##231PDZ128rk, \ 143 Name##132PDZ128mk, Name##213PDZ128mk, Name##231PDZ128mk, \ 144 X86InstrFMA3Group::X86FMA3KMergeMasked); \ 145 FMA3RMA(Name##132PSZ256rk, Name##213PSZ256rk, Name##231PSZ256rk, \ 146 Name##132PSZ256mk, Name##213PSZ256mk, Name##231PSZ256mk, \ 147 X86InstrFMA3Group::X86FMA3KMergeMasked); \ 148 FMA3RMA(Name##132PDZ256rk, Name##213PDZ256rk, Name##231PDZ256rk, \ 149 Name##132PDZ256mk, Name##213PDZ256mk, Name##231PDZ256mk, \ 150 X86InstrFMA3Group::X86FMA3KMergeMasked); \ 151 FMA3RMA(Name##132PSZrk, Name##213PSZrk, Name##231PSZrk, \ 152 Name##132PSZmk, Name##213PSZmk, Name##231PSZmk, \ 153 X86InstrFMA3Group::X86FMA3KMergeMasked); \ 154 FMA3RMA(Name##132PDZrk, Name##213PDZrk, Name##231PDZrk, \ 155 Name##132PDZmk, Name##213PDZmk, Name##231PDZmk, \ 156 X86InstrFMA3Group::X86FMA3KMergeMasked); \ 157 FMA3RMA(Name##132PSZ128rkz, Name##213PSZ128rkz, Name##231PSZ128rkz, \ 158 Name##132PSZ128mkz, Name##213PSZ128mkz, Name##231PSZ128mkz, \ 159 X86InstrFMA3Group::X86FMA3KZeroMasked); \ 160 FMA3RMA(Name##132PDZ128rkz, Name##213PDZ128rkz, Name##231PDZ128rkz, \ 161 Name##132PDZ128mkz, Name##213PDZ128mkz, Name##231PDZ128mkz, \ 162 X86InstrFMA3Group::X86FMA3KZeroMasked); \ 163 FMA3RMA(Name##132PSZ256rkz, Name##213PSZ256rkz, Name##231PSZ256rkz, \ 164 Name##132PSZ256mkz, Name##213PSZ256mkz, Name##231PSZ256mkz, \ 165 X86InstrFMA3Group::X86FMA3KZeroMasked); \ 166 FMA3RMA(Name##132PDZ256rkz, Name##213PDZ256rkz, Name##231PDZ256rkz, \ 167 Name##132PDZ256mkz, Name##213PDZ256mkz, Name##231PDZ256mkz, \ 168 X86InstrFMA3Group::X86FMA3KZeroMasked); \ 169 FMA3RMA(Name##132PSZrkz, Name##213PSZrkz, Name##231PSZrkz, \ 170 Name##132PSZmkz, Name##213PSZmkz, Name##231PSZmkz, \ 171 X86InstrFMA3Group::X86FMA3KZeroMasked); \ 172 FMA3RMA(Name##132PDZrkz, Name##213PDZrkz, Name##231PDZrkz, \ 173 Name##132PDZmkz, Name##213PDZmkz, Name##231PDZmkz, \ 174 X86InstrFMA3Group::X86FMA3KZeroMasked); \ 175 FMA3R(Name##132PSZrb, Name##213PSZrb, Name##231PSZrb); \ 176 FMA3R(Name##132PDZrb, Name##213PDZrb, Name##231PDZrb); \ 177 FMA3RA(Name##132PSZrbk, Name##213PSZrbk, Name##231PSZrbk, \ 178 X86InstrFMA3Group::X86FMA3KMergeMasked); \ 179 FMA3RA(Name##132PDZrbk, Name##213PDZrbk, Name##231PDZrbk, \ 180 X86InstrFMA3Group::X86FMA3KMergeMasked); \ 181 FMA3RA(Name##132PSZrbkz, Name##213PSZrbkz, Name##231PSZrbkz, \ 182 X86InstrFMA3Group::X86FMA3KZeroMasked); \ 183 FMA3RA(Name##132PDZrbkz, Name##213PDZrbkz, Name##231PDZrbkz, \ 184 X86InstrFMA3Group::X86FMA3KZeroMasked); \ 185 FMA3M(Name##132PSZ128mb, Name##213PSZ128mb, Name##231PSZ128mb); \ 186 FMA3M(Name##132PDZ128mb, Name##213PDZ128mb, Name##231PDZ128mb); \ 187 FMA3M(Name##132PSZ256mb, Name##213PSZ256mb, Name##231PSZ256mb); \ 188 FMA3M(Name##132PDZ256mb, Name##213PDZ256mb, Name##231PDZ256mb); \ 189 FMA3M(Name##132PSZmb, Name##213PSZmb, Name##231PSZmb); \ 190 FMA3M(Name##132PDZmb, Name##213PDZmb, Name##231PDZmb); \ 191 FMA3MA(Name##132PSZ128mbk, Name##213PSZ128mbk, Name##231PSZ128mbk, \ 192 X86InstrFMA3Group::X86FMA3KMergeMasked); \ 193 FMA3MA(Name##132PDZ128mbk, Name##213PDZ128mbk, Name##231PDZ128mbk, \ 194 X86InstrFMA3Group::X86FMA3KMergeMasked); \ 195 FMA3MA(Name##132PSZ256mbk, Name##213PSZ256mbk, Name##231PSZ256mbk, \ 196 X86InstrFMA3Group::X86FMA3KMergeMasked); \ 197 FMA3MA(Name##132PDZ256mbk, Name##213PDZ256mbk, Name##231PDZ256mbk, \ 198 X86InstrFMA3Group::X86FMA3KMergeMasked); \ 199 FMA3MA(Name##132PSZmbk, Name##213PSZmbk, Name##231PSZmbk, \ 200 X86InstrFMA3Group::X86FMA3KMergeMasked); \ 201 FMA3MA(Name##132PDZmbk, Name##213PDZmbk, Name##231PDZmbk, \ 202 X86InstrFMA3Group::X86FMA3KMergeMasked); \ 203 FMA3MA(Name##132PSZ128mbkz, Name##213PSZ128mbkz, Name##231PSZ128mbkz, \ 204 X86InstrFMA3Group::X86FMA3KZeroMasked); \ 205 FMA3MA(Name##132PDZ128mbkz, Name##213PDZ128mbkz, Name##231PDZ128mbkz, \ 206 X86InstrFMA3Group::X86FMA3KZeroMasked); \ 207 FMA3MA(Name##132PSZ256mbkz, Name##213PSZ256mbkz, Name##231PSZ256mbkz, \ 208 X86InstrFMA3Group::X86FMA3KZeroMasked); \ 209 FMA3MA(Name##132PDZ256mbkz, Name##213PDZ256mbkz, Name##231PDZ256mbkz, \ 210 X86InstrFMA3Group::X86FMA3KZeroMasked); \ 211 FMA3MA(Name##132PSZmbkz, Name##213PSZmbkz, Name##231PSZmbkz, \ 212 X86InstrFMA3Group::X86FMA3KZeroMasked); \ 213 FMA3MA(Name##132PDZmbkz, Name##213PDZmbkz, Name##231PDZmbkz, \ 214 X86InstrFMA3Group::X86FMA3KZeroMasked); 215 216 #define FMA3_AVX512_SCALAR_GROUP(Name) \ 217 FMA3RM(Name##132SSZr, Name##213SSZr, Name##231SSZr, \ 218 Name##132SSZm, Name##213SSZm, Name##231SSZm); \ 219 FMA3RM(Name##132SDZr, Name##213SDZr, Name##231SDZr, \ 220 Name##132SDZm, Name##213SDZm, Name##231SDZm); \ 221 FMA3RMA(Name##132SSZr_Int, Name##213SSZr_Int, Name##231SSZr_Int, \ 222 Name##132SSZm_Int, Name##213SSZm_Int, Name##231SSZm_Int, \ 223 X86InstrFMA3Group::X86FMA3Intrinsic); \ 224 FMA3RMA(Name##132SDZr_Int, Name##213SDZr_Int, Name##231SDZr_Int, \ 225 Name##132SDZm_Int, Name##213SDZm_Int, Name##231SDZm_Int, \ 226 X86InstrFMA3Group::X86FMA3Intrinsic); \ 227 FMA3RMA(Name##132SSZr_Intk, Name##213SSZr_Intk, Name##231SSZr_Intk, \ 228 Name##132SSZm_Intk, Name##213SSZm_Intk, Name##231SSZm_Intk, \ 229 X86InstrFMA3Group::X86FMA3Intrinsic | \ 230 X86InstrFMA3Group::X86FMA3KMergeMasked); \ 231 FMA3RMA(Name##132SDZr_Intk, Name##213SDZr_Intk, Name##231SDZr_Intk, \ 232 Name##132SDZm_Intk, Name##213SDZm_Intk, Name##231SDZm_Intk, \ 233 X86InstrFMA3Group::X86FMA3Intrinsic | \ 234 X86InstrFMA3Group::X86FMA3KMergeMasked); \ 235 FMA3RMA(Name##132SSZr_Intkz, Name##213SSZr_Intkz, Name##231SSZr_Intkz, \ 236 Name##132SSZm_Intkz, Name##213SSZm_Intkz, Name##231SSZm_Intkz, \ 237 X86InstrFMA3Group::X86FMA3Intrinsic | \ 238 X86InstrFMA3Group::X86FMA3KZeroMasked); \ 239 FMA3RMA(Name##132SDZr_Intkz, Name##213SDZr_Intkz, Name##231SDZr_Intkz, \ 240 Name##132SDZm_Intkz, Name##213SDZm_Intkz, Name##231SDZm_Intkz, \ 241 X86InstrFMA3Group::X86FMA3Intrinsic | \ 242 X86InstrFMA3Group::X86FMA3KZeroMasked); \ 243 FMA3RA(Name##132SSZrb_Int, Name##213SSZrb_Int, Name##231SSZrb_Int, \ 244 X86InstrFMA3Group::X86FMA3Intrinsic); \ 245 FMA3RA(Name##132SDZrb_Int, Name##213SDZrb_Int, Name##231SDZrb_Int, \ 246 X86InstrFMA3Group::X86FMA3Intrinsic); \ 247 FMA3RA(Name##132SSZrb_Intk, Name##213SSZrb_Intk, Name##231SSZrb_Intk, \ 248 X86InstrFMA3Group::X86FMA3Intrinsic | \ 249 X86InstrFMA3Group::X86FMA3KMergeMasked); \ 250 FMA3RA(Name##132SDZrb_Intk, Name##213SDZrb_Intk, Name##231SDZrb_Intk, \ 251 X86InstrFMA3Group::X86FMA3Intrinsic | \ 252 X86InstrFMA3Group::X86FMA3KMergeMasked); \ 253 FMA3RA(Name##132SSZrb_Intkz, Name##213SSZrb_Intkz, Name##231SSZrb_Intkz, \ 254 X86InstrFMA3Group::X86FMA3Intrinsic | \ 255 X86InstrFMA3Group::X86FMA3KZeroMasked); \ 256 FMA3RA(Name##132SDZrb_Intkz, Name##213SDZrb_Intkz, Name##231SDZrb_Intkz, \ 257 X86InstrFMA3Group::X86FMA3Intrinsic | \ 258 X86InstrFMA3Group::X86FMA3KZeroMasked); 259 260 #define FMA3_AVX512_FULL_GROUP(Name) \ 261 FMA3_AVX512_VECTOR_GROUP(Name); \ 262 FMA3_AVX512_SCALAR_GROUP(Name); 263 264 void X86InstrFMA3Info::initGroupsOnceImpl() { 265 FMA3_AVX2_FULL_GROUP(VFMADD); 266 FMA3_AVX2_FULL_GROUP(VFMSUB); 267 FMA3_AVX2_FULL_GROUP(VFNMADD); 268 FMA3_AVX2_FULL_GROUP(VFNMSUB); 269 270 FMA3_AVX2_VECTOR_GROUP(VFMADDSUB); 271 FMA3_AVX2_VECTOR_GROUP(VFMSUBADD); 272 273 FMA3_AVX512_FULL_GROUP(VFMADD); 274 FMA3_AVX512_FULL_GROUP(VFMSUB); 275 FMA3_AVX512_FULL_GROUP(VFNMADD); 276 FMA3_AVX512_FULL_GROUP(VFNMSUB); 277 278 FMA3_AVX512_VECTOR_GROUP(VFMADDSUB); 279 FMA3_AVX512_VECTOR_GROUP(VFMSUBADD); 280 } 281 282 void X86InstrFMA3Info::initGroupsOnce() { 283 llvm::call_once(InitGroupsOnceFlag, 284 []() { getX86InstrFMA3Info()->initGroupsOnceImpl(); }); 285 } 286