1 //===-- AutoUpgrade.cpp - Implement auto-upgrade helper functions ---------===// 2 // 3 // The LLVM Compiler Infrastructure 4 // 5 // This file is distributed under the University of Illinois Open Source 6 // License. See LICENSE.TXT for details. 7 // 8 //===----------------------------------------------------------------------===// 9 // 10 // This file implements the auto-upgrade helper functions. 11 // This is where deprecated IR intrinsics and other IR features are updated to 12 // current specifications. 13 // 14 //===----------------------------------------------------------------------===// 15 16 #include "llvm/IR/AutoUpgrade.h" 17 #include "llvm/ADT/StringSwitch.h" 18 #include "llvm/IR/Constants.h" 19 #include "llvm/IR/DIBuilder.h" 20 #include "llvm/IR/DebugInfo.h" 21 #include "llvm/IR/DiagnosticInfo.h" 22 #include "llvm/IR/Function.h" 23 #include "llvm/IR/IRBuilder.h" 24 #include "llvm/IR/Instruction.h" 25 #include "llvm/IR/LLVMContext.h" 26 #include "llvm/IR/Module.h" 27 #include "llvm/IR/Verifier.h" 28 #include "llvm/Support/ErrorHandling.h" 29 #include "llvm/Support/Regex.h" 30 #include <cstring> 31 using namespace llvm; 32 33 static void rename(GlobalValue *GV) { GV->setName(GV->getName() + ".old"); } 34 35 // Upgrade the declarations of the SSE4.1 ptest intrinsics whose arguments have 36 // changed their type from v4f32 to v2i64. 37 static bool UpgradePTESTIntrinsic(Function* F, Intrinsic::ID IID, 38 Function *&NewFn) { 39 // Check whether this is an old version of the function, which received 40 // v4f32 arguments. 41 Type *Arg0Type = F->getFunctionType()->getParamType(0); 42 if (Arg0Type != VectorType::get(Type::getFloatTy(F->getContext()), 4)) 43 return false; 44 45 // Yes, it's old, replace it with new version. 46 rename(F); 47 NewFn = Intrinsic::getDeclaration(F->getParent(), IID); 48 return true; 49 } 50 51 // Upgrade the declarations of intrinsic functions whose 8-bit immediate mask 52 // arguments have changed their type from i32 to i8. 53 static bool UpgradeX86IntrinsicsWith8BitMask(Function *F, Intrinsic::ID IID, 54 Function *&NewFn) { 55 // Check that the last argument is an i32. 56 Type *LastArgType = F->getFunctionType()->getParamType( 57 F->getFunctionType()->getNumParams() - 1); 58 if (!LastArgType->isIntegerTy(32)) 59 return false; 60 61 // Move this function aside and map down. 62 rename(F); 63 NewFn = Intrinsic::getDeclaration(F->getParent(), IID); 64 return true; 65 } 66 67 static bool ShouldUpgradeX86Intrinsic(Function *F, StringRef Name) { 68 // All of the intrinsics matches below should be marked with which llvm 69 // version started autoupgrading them. At some point in the future we would 70 // like to use this information to remove upgrade code for some older 71 // intrinsics. It is currently undecided how we will determine that future 72 // point. 73 if (Name=="ssse3.pabs.b.128" || // Added in 6.0 74 Name=="ssse3.pabs.w.128" || // Added in 6.0 75 Name=="ssse3.pabs.d.128" || // Added in 6.0 76 Name.startswith("avx512.mask.shuf.i") || // Added in 6.0 77 Name.startswith("avx512.mask.shuf.f") || // Added in 6.0 78 Name.startswith("avx512.kunpck") || //added in 6.0 79 Name.startswith("avx2.pabs.") || // Added in 6.0 80 Name.startswith("avx512.mask.pabs.") || // Added in 6.0 81 Name.startswith("avx512.broadcastm") || // Added in 6.0 82 Name.startswith("avx512.mask.pbroadcast") || // Added in 6.0 83 Name.startswith("sse2.pcmpeq.") || // Added in 3.1 84 Name.startswith("sse2.pcmpgt.") || // Added in 3.1 85 Name.startswith("avx2.pcmpeq.") || // Added in 3.1 86 Name.startswith("avx2.pcmpgt.") || // Added in 3.1 87 Name.startswith("avx512.mask.pcmpeq.") || // Added in 3.9 88 Name.startswith("avx512.mask.pcmpgt.") || // Added in 3.9 89 Name.startswith("avx.vperm2f128.") || // Added in 6.0 90 Name == "avx2.vperm2i128" || // Added in 6.0 91 Name == "sse.add.ss" || // Added in 4.0 92 Name == "sse2.add.sd" || // Added in 4.0 93 Name == "sse.sub.ss" || // Added in 4.0 94 Name == "sse2.sub.sd" || // Added in 4.0 95 Name == "sse.mul.ss" || // Added in 4.0 96 Name == "sse2.mul.sd" || // Added in 4.0 97 Name == "sse.div.ss" || // Added in 4.0 98 Name == "sse2.div.sd" || // Added in 4.0 99 Name == "sse41.pmaxsb" || // Added in 3.9 100 Name == "sse2.pmaxs.w" || // Added in 3.9 101 Name == "sse41.pmaxsd" || // Added in 3.9 102 Name == "sse2.pmaxu.b" || // Added in 3.9 103 Name == "sse41.pmaxuw" || // Added in 3.9 104 Name == "sse41.pmaxud" || // Added in 3.9 105 Name == "sse41.pminsb" || // Added in 3.9 106 Name == "sse2.pmins.w" || // Added in 3.9 107 Name == "sse41.pminsd" || // Added in 3.9 108 Name == "sse2.pminu.b" || // Added in 3.9 109 Name == "sse41.pminuw" || // Added in 3.9 110 Name == "sse41.pminud" || // Added in 3.9 111 Name.startswith("avx512.mask.pshuf.b.") || // Added in 4.0 112 Name.startswith("avx2.pmax") || // Added in 3.9 113 Name.startswith("avx2.pmin") || // Added in 3.9 114 Name.startswith("avx512.mask.pmax") || // Added in 4.0 115 Name.startswith("avx512.mask.pmin") || // Added in 4.0 116 Name.startswith("avx2.vbroadcast") || // Added in 3.8 117 Name.startswith("avx2.pbroadcast") || // Added in 3.8 118 Name.startswith("avx.vpermil.") || // Added in 3.1 119 Name.startswith("sse2.pshuf") || // Added in 3.9 120 Name.startswith("avx512.pbroadcast") || // Added in 3.9 121 Name.startswith("avx512.mask.broadcast.s") || // Added in 3.9 122 Name.startswith("avx512.mask.movddup") || // Added in 3.9 123 Name.startswith("avx512.mask.movshdup") || // Added in 3.9 124 Name.startswith("avx512.mask.movsldup") || // Added in 3.9 125 Name.startswith("avx512.mask.pshuf.d.") || // Added in 3.9 126 Name.startswith("avx512.mask.pshufl.w.") || // Added in 3.9 127 Name.startswith("avx512.mask.pshufh.w.") || // Added in 3.9 128 Name.startswith("avx512.mask.shuf.p") || // Added in 4.0 129 Name.startswith("avx512.mask.vpermil.p") || // Added in 3.9 130 Name.startswith("avx512.mask.perm.df.") || // Added in 3.9 131 Name.startswith("avx512.mask.perm.di.") || // Added in 3.9 132 Name.startswith("avx512.mask.punpckl") || // Added in 3.9 133 Name.startswith("avx512.mask.punpckh") || // Added in 3.9 134 Name.startswith("avx512.mask.unpckl.") || // Added in 3.9 135 Name.startswith("avx512.mask.unpckh.") || // Added in 3.9 136 Name.startswith("avx512.mask.pand.") || // Added in 3.9 137 Name.startswith("avx512.mask.pandn.") || // Added in 3.9 138 Name.startswith("avx512.mask.por.") || // Added in 3.9 139 Name.startswith("avx512.mask.pxor.") || // Added in 3.9 140 Name.startswith("avx512.mask.and.") || // Added in 3.9 141 Name.startswith("avx512.mask.andn.") || // Added in 3.9 142 Name.startswith("avx512.mask.or.") || // Added in 3.9 143 Name.startswith("avx512.mask.xor.") || // Added in 3.9 144 Name.startswith("avx512.mask.padd.") || // Added in 4.0 145 Name.startswith("avx512.mask.psub.") || // Added in 4.0 146 Name.startswith("avx512.mask.pmull.") || // Added in 4.0 147 Name.startswith("avx512.mask.cvtdq2pd.") || // Added in 4.0 148 Name.startswith("avx512.mask.cvtudq2pd.") || // Added in 4.0 149 Name.startswith("avx512.mask.pmul.dq.") || // Added in 4.0 150 Name.startswith("avx512.mask.pmulu.dq.") || // Added in 4.0 151 Name.startswith("avx512.mask.packsswb.") || // Added in 5.0 152 Name.startswith("avx512.mask.packssdw.") || // Added in 5.0 153 Name.startswith("avx512.mask.packuswb.") || // Added in 5.0 154 Name.startswith("avx512.mask.packusdw.") || // Added in 5.0 155 Name.startswith("avx512.mask.cmp.b") || // Added in 5.0 156 Name.startswith("avx512.mask.cmp.d") || // Added in 5.0 157 Name.startswith("avx512.mask.cmp.q") || // Added in 5.0 158 Name.startswith("avx512.mask.cmp.w") || // Added in 5.0 159 Name.startswith("avx512.mask.ucmp.") || // Added in 5.0 160 Name.startswith("avx512.cvtb2mask.") || // Added in 7.0 161 Name.startswith("avx512.cvtw2mask.") || // Added in 7.0 162 Name.startswith("avx512.cvtd2mask.") || // Added in 7.0 163 Name.startswith("avx512.cvtq2mask.") || // Added in 7.0 164 Name == "avx512.mask.add.pd.128" || // Added in 4.0 165 Name == "avx512.mask.add.pd.256" || // Added in 4.0 166 Name == "avx512.mask.add.ps.128" || // Added in 4.0 167 Name == "avx512.mask.add.ps.256" || // Added in 4.0 168 Name == "avx512.mask.div.pd.128" || // Added in 4.0 169 Name == "avx512.mask.div.pd.256" || // Added in 4.0 170 Name == "avx512.mask.div.ps.128" || // Added in 4.0 171 Name == "avx512.mask.div.ps.256" || // Added in 4.0 172 Name == "avx512.mask.mul.pd.128" || // Added in 4.0 173 Name == "avx512.mask.mul.pd.256" || // Added in 4.0 174 Name == "avx512.mask.mul.ps.128" || // Added in 4.0 175 Name == "avx512.mask.mul.ps.256" || // Added in 4.0 176 Name == "avx512.mask.sub.pd.128" || // Added in 4.0 177 Name == "avx512.mask.sub.pd.256" || // Added in 4.0 178 Name == "avx512.mask.sub.ps.128" || // Added in 4.0 179 Name == "avx512.mask.sub.ps.256" || // Added in 4.0 180 Name == "avx512.mask.max.pd.128" || // Added in 5.0 181 Name == "avx512.mask.max.pd.256" || // Added in 5.0 182 Name == "avx512.mask.max.ps.128" || // Added in 5.0 183 Name == "avx512.mask.max.ps.256" || // Added in 5.0 184 Name == "avx512.mask.min.pd.128" || // Added in 5.0 185 Name == "avx512.mask.min.pd.256" || // Added in 5.0 186 Name == "avx512.mask.min.ps.128" || // Added in 5.0 187 Name == "avx512.mask.min.ps.256" || // Added in 5.0 188 Name.startswith("avx512.mask.vpermilvar.") || // Added in 4.0 189 Name.startswith("avx512.mask.psll.d") || // Added in 4.0 190 Name.startswith("avx512.mask.psll.q") || // Added in 4.0 191 Name.startswith("avx512.mask.psll.w") || // Added in 4.0 192 Name.startswith("avx512.mask.psra.d") || // Added in 4.0 193 Name.startswith("avx512.mask.psra.q") || // Added in 4.0 194 Name.startswith("avx512.mask.psra.w") || // Added in 4.0 195 Name.startswith("avx512.mask.psrl.d") || // Added in 4.0 196 Name.startswith("avx512.mask.psrl.q") || // Added in 4.0 197 Name.startswith("avx512.mask.psrl.w") || // Added in 4.0 198 Name.startswith("avx512.mask.pslli") || // Added in 4.0 199 Name.startswith("avx512.mask.psrai") || // Added in 4.0 200 Name.startswith("avx512.mask.psrli") || // Added in 4.0 201 Name.startswith("avx512.mask.psllv") || // Added in 4.0 202 Name.startswith("avx512.mask.psrav") || // Added in 4.0 203 Name.startswith("avx512.mask.psrlv") || // Added in 4.0 204 Name.startswith("sse41.pmovsx") || // Added in 3.8 205 Name.startswith("sse41.pmovzx") || // Added in 3.9 206 Name.startswith("avx2.pmovsx") || // Added in 3.9 207 Name.startswith("avx2.pmovzx") || // Added in 3.9 208 Name.startswith("avx512.mask.pmovsx") || // Added in 4.0 209 Name.startswith("avx512.mask.pmovzx") || // Added in 4.0 210 Name.startswith("avx512.mask.lzcnt.") || // Added in 5.0 211 Name == "sse2.cvtdq2pd" || // Added in 3.9 212 Name == "sse2.cvtps2pd" || // Added in 3.9 213 Name == "avx.cvtdq2.pd.256" || // Added in 3.9 214 Name == "avx.cvt.ps2.pd.256" || // Added in 3.9 215 Name.startswith("avx.vinsertf128.") || // Added in 3.7 216 Name == "avx2.vinserti128" || // Added in 3.7 217 Name.startswith("avx512.mask.insert") || // Added in 4.0 218 Name.startswith("avx.vextractf128.") || // Added in 3.7 219 Name == "avx2.vextracti128" || // Added in 3.7 220 Name.startswith("avx512.mask.vextract") || // Added in 4.0 221 Name.startswith("sse4a.movnt.") || // Added in 3.9 222 Name.startswith("avx.movnt.") || // Added in 3.2 223 Name.startswith("avx512.storent.") || // Added in 3.9 224 Name == "sse41.movntdqa" || // Added in 5.0 225 Name == "avx2.movntdqa" || // Added in 5.0 226 Name == "avx512.movntdqa" || // Added in 5.0 227 Name == "sse2.storel.dq" || // Added in 3.9 228 Name.startswith("sse.storeu.") || // Added in 3.9 229 Name.startswith("sse2.storeu.") || // Added in 3.9 230 Name.startswith("avx.storeu.") || // Added in 3.9 231 Name.startswith("avx512.mask.storeu.") || // Added in 3.9 232 Name.startswith("avx512.mask.store.p") || // Added in 3.9 233 Name.startswith("avx512.mask.store.b.") || // Added in 3.9 234 Name.startswith("avx512.mask.store.w.") || // Added in 3.9 235 Name.startswith("avx512.mask.store.d.") || // Added in 3.9 236 Name.startswith("avx512.mask.store.q.") || // Added in 3.9 237 Name.startswith("avx512.mask.loadu.") || // Added in 3.9 238 Name.startswith("avx512.mask.load.") || // Added in 3.9 239 Name == "sse42.crc32.64.8" || // Added in 3.4 240 Name.startswith("avx.vbroadcast.s") || // Added in 3.5 241 Name.startswith("avx512.mask.palignr.") || // Added in 3.9 242 Name.startswith("avx512.mask.valign.") || // Added in 4.0 243 Name.startswith("sse2.psll.dq") || // Added in 3.7 244 Name.startswith("sse2.psrl.dq") || // Added in 3.7 245 Name.startswith("avx2.psll.dq") || // Added in 3.7 246 Name.startswith("avx2.psrl.dq") || // Added in 3.7 247 Name.startswith("avx512.psll.dq") || // Added in 3.9 248 Name.startswith("avx512.psrl.dq") || // Added in 3.9 249 Name == "sse41.pblendw" || // Added in 3.7 250 Name.startswith("sse41.blendp") || // Added in 3.7 251 Name.startswith("avx.blend.p") || // Added in 3.7 252 Name == "avx2.pblendw" || // Added in 3.7 253 Name.startswith("avx2.pblendd.") || // Added in 3.7 254 Name.startswith("avx.vbroadcastf128") || // Added in 4.0 255 Name == "avx2.vbroadcasti128" || // Added in 3.7 256 Name.startswith("avx512.mask.broadcastf") || // Added in 6.0 257 Name.startswith("avx512.mask.broadcasti") || // Added in 6.0 258 Name == "xop.vpcmov" || // Added in 3.8 259 Name == "xop.vpcmov.256" || // Added in 5.0 260 Name.startswith("avx512.mask.move.s") || // Added in 4.0 261 Name.startswith("avx512.cvtmask2") || // Added in 5.0 262 (Name.startswith("xop.vpcom") && // Added in 3.2 263 F->arg_size() == 2) || 264 Name.startswith("avx512.ptestm") || //Added in 6.0 265 Name.startswith("avx512.ptestnm") || //Added in 6.0 266 Name.startswith("sse2.pavg") || // Added in 6.0 267 Name.startswith("avx2.pavg") || // Added in 6.0 268 Name.startswith("avx512.mask.pavg")) // Added in 6.0 269 return true; 270 271 return false; 272 } 273 274 static bool UpgradeX86IntrinsicFunction(Function *F, StringRef Name, 275 Function *&NewFn) { 276 // Only handle intrinsics that start with "x86.". 277 if (!Name.startswith("x86.")) 278 return false; 279 // Remove "x86." prefix. 280 Name = Name.substr(4); 281 282 if (ShouldUpgradeX86Intrinsic(F, Name)) { 283 NewFn = nullptr; 284 return true; 285 } 286 287 // SSE4.1 ptest functions may have an old signature. 288 if (Name.startswith("sse41.ptest")) { // Added in 3.2 289 if (Name.substr(11) == "c") 290 return UpgradePTESTIntrinsic(F, Intrinsic::x86_sse41_ptestc, NewFn); 291 if (Name.substr(11) == "z") 292 return UpgradePTESTIntrinsic(F, Intrinsic::x86_sse41_ptestz, NewFn); 293 if (Name.substr(11) == "nzc") 294 return UpgradePTESTIntrinsic(F, Intrinsic::x86_sse41_ptestnzc, NewFn); 295 } 296 // Several blend and other instructions with masks used the wrong number of 297 // bits. 298 if (Name == "sse41.insertps") // Added in 3.6 299 return UpgradeX86IntrinsicsWith8BitMask(F, Intrinsic::x86_sse41_insertps, 300 NewFn); 301 if (Name == "sse41.dppd") // Added in 3.6 302 return UpgradeX86IntrinsicsWith8BitMask(F, Intrinsic::x86_sse41_dppd, 303 NewFn); 304 if (Name == "sse41.dpps") // Added in 3.6 305 return UpgradeX86IntrinsicsWith8BitMask(F, Intrinsic::x86_sse41_dpps, 306 NewFn); 307 if (Name == "sse41.mpsadbw") // Added in 3.6 308 return UpgradeX86IntrinsicsWith8BitMask(F, Intrinsic::x86_sse41_mpsadbw, 309 NewFn); 310 if (Name == "avx.dp.ps.256") // Added in 3.6 311 return UpgradeX86IntrinsicsWith8BitMask(F, Intrinsic::x86_avx_dp_ps_256, 312 NewFn); 313 if (Name == "avx2.mpsadbw") // Added in 3.6 314 return UpgradeX86IntrinsicsWith8BitMask(F, Intrinsic::x86_avx2_mpsadbw, 315 NewFn); 316 317 // frcz.ss/sd may need to have an argument dropped. Added in 3.2 318 if (Name.startswith("xop.vfrcz.ss") && F->arg_size() == 2) { 319 rename(F); 320 NewFn = Intrinsic::getDeclaration(F->getParent(), 321 Intrinsic::x86_xop_vfrcz_ss); 322 return true; 323 } 324 if (Name.startswith("xop.vfrcz.sd") && F->arg_size() == 2) { 325 rename(F); 326 NewFn = Intrinsic::getDeclaration(F->getParent(), 327 Intrinsic::x86_xop_vfrcz_sd); 328 return true; 329 } 330 // Upgrade any XOP PERMIL2 index operand still using a float/double vector. 331 if (Name.startswith("xop.vpermil2")) { // Added in 3.9 332 auto Idx = F->getFunctionType()->getParamType(2); 333 if (Idx->isFPOrFPVectorTy()) { 334 rename(F); 335 unsigned IdxSize = Idx->getPrimitiveSizeInBits(); 336 unsigned EltSize = Idx->getScalarSizeInBits(); 337 Intrinsic::ID Permil2ID; 338 if (EltSize == 64 && IdxSize == 128) 339 Permil2ID = Intrinsic::x86_xop_vpermil2pd; 340 else if (EltSize == 32 && IdxSize == 128) 341 Permil2ID = Intrinsic::x86_xop_vpermil2ps; 342 else if (EltSize == 64 && IdxSize == 256) 343 Permil2ID = Intrinsic::x86_xop_vpermil2pd_256; 344 else 345 Permil2ID = Intrinsic::x86_xop_vpermil2ps_256; 346 NewFn = Intrinsic::getDeclaration(F->getParent(), Permil2ID); 347 return true; 348 } 349 } 350 351 return false; 352 } 353 354 static bool UpgradeIntrinsicFunction1(Function *F, Function *&NewFn) { 355 assert(F && "Illegal to upgrade a non-existent Function."); 356 357 // Quickly eliminate it, if it's not a candidate. 358 StringRef Name = F->getName(); 359 if (Name.size() <= 8 || !Name.startswith("llvm.")) 360 return false; 361 Name = Name.substr(5); // Strip off "llvm." 362 363 switch (Name[0]) { 364 default: break; 365 case 'a': { 366 if (Name.startswith("arm.rbit") || Name.startswith("aarch64.rbit")) { 367 NewFn = Intrinsic::getDeclaration(F->getParent(), Intrinsic::bitreverse, 368 F->arg_begin()->getType()); 369 return true; 370 } 371 if (Name.startswith("arm.neon.vclz")) { 372 Type* args[2] = { 373 F->arg_begin()->getType(), 374 Type::getInt1Ty(F->getContext()) 375 }; 376 // Can't use Intrinsic::getDeclaration here as it adds a ".i1" to 377 // the end of the name. Change name from llvm.arm.neon.vclz.* to 378 // llvm.ctlz.* 379 FunctionType* fType = FunctionType::get(F->getReturnType(), args, false); 380 NewFn = Function::Create(fType, F->getLinkage(), 381 "llvm.ctlz." + Name.substr(14), F->getParent()); 382 return true; 383 } 384 if (Name.startswith("arm.neon.vcnt")) { 385 NewFn = Intrinsic::getDeclaration(F->getParent(), Intrinsic::ctpop, 386 F->arg_begin()->getType()); 387 return true; 388 } 389 Regex vldRegex("^arm\\.neon\\.vld([1234]|[234]lane)\\.v[a-z0-9]*$"); 390 if (vldRegex.match(Name)) { 391 auto fArgs = F->getFunctionType()->params(); 392 SmallVector<Type *, 4> Tys(fArgs.begin(), fArgs.end()); 393 // Can't use Intrinsic::getDeclaration here as the return types might 394 // then only be structurally equal. 395 FunctionType* fType = FunctionType::get(F->getReturnType(), Tys, false); 396 NewFn = Function::Create(fType, F->getLinkage(), 397 "llvm." + Name + ".p0i8", F->getParent()); 398 return true; 399 } 400 Regex vstRegex("^arm\\.neon\\.vst([1234]|[234]lane)\\.v[a-z0-9]*$"); 401 if (vstRegex.match(Name)) { 402 static const Intrinsic::ID StoreInts[] = {Intrinsic::arm_neon_vst1, 403 Intrinsic::arm_neon_vst2, 404 Intrinsic::arm_neon_vst3, 405 Intrinsic::arm_neon_vst4}; 406 407 static const Intrinsic::ID StoreLaneInts[] = { 408 Intrinsic::arm_neon_vst2lane, Intrinsic::arm_neon_vst3lane, 409 Intrinsic::arm_neon_vst4lane 410 }; 411 412 auto fArgs = F->getFunctionType()->params(); 413 Type *Tys[] = {fArgs[0], fArgs[1]}; 414 if (Name.find("lane") == StringRef::npos) 415 NewFn = Intrinsic::getDeclaration(F->getParent(), 416 StoreInts[fArgs.size() - 3], Tys); 417 else 418 NewFn = Intrinsic::getDeclaration(F->getParent(), 419 StoreLaneInts[fArgs.size() - 5], Tys); 420 return true; 421 } 422 if (Name == "aarch64.thread.pointer" || Name == "arm.thread.pointer") { 423 NewFn = Intrinsic::getDeclaration(F->getParent(), Intrinsic::thread_pointer); 424 return true; 425 } 426 break; 427 } 428 429 case 'c': { 430 if (Name.startswith("ctlz.") && F->arg_size() == 1) { 431 rename(F); 432 NewFn = Intrinsic::getDeclaration(F->getParent(), Intrinsic::ctlz, 433 F->arg_begin()->getType()); 434 return true; 435 } 436 if (Name.startswith("cttz.") && F->arg_size() == 1) { 437 rename(F); 438 NewFn = Intrinsic::getDeclaration(F->getParent(), Intrinsic::cttz, 439 F->arg_begin()->getType()); 440 return true; 441 } 442 break; 443 } 444 case 'd': { 445 if (Name == "dbg.value" && F->arg_size() == 4) { 446 rename(F); 447 NewFn = Intrinsic::getDeclaration(F->getParent(), Intrinsic::dbg_value); 448 return true; 449 } 450 break; 451 } 452 case 'i': 453 case 'l': { 454 bool IsLifetimeStart = Name.startswith("lifetime.start"); 455 if (IsLifetimeStart || Name.startswith("invariant.start")) { 456 Intrinsic::ID ID = IsLifetimeStart ? 457 Intrinsic::lifetime_start : Intrinsic::invariant_start; 458 auto Args = F->getFunctionType()->params(); 459 Type* ObjectPtr[1] = {Args[1]}; 460 if (F->getName() != Intrinsic::getName(ID, ObjectPtr)) { 461 rename(F); 462 NewFn = Intrinsic::getDeclaration(F->getParent(), ID, ObjectPtr); 463 return true; 464 } 465 } 466 467 bool IsLifetimeEnd = Name.startswith("lifetime.end"); 468 if (IsLifetimeEnd || Name.startswith("invariant.end")) { 469 Intrinsic::ID ID = IsLifetimeEnd ? 470 Intrinsic::lifetime_end : Intrinsic::invariant_end; 471 472 auto Args = F->getFunctionType()->params(); 473 Type* ObjectPtr[1] = {Args[IsLifetimeEnd ? 1 : 2]}; 474 if (F->getName() != Intrinsic::getName(ID, ObjectPtr)) { 475 rename(F); 476 NewFn = Intrinsic::getDeclaration(F->getParent(), ID, ObjectPtr); 477 return true; 478 } 479 } 480 break; 481 } 482 case 'm': { 483 if (Name.startswith("masked.load.")) { 484 Type *Tys[] = { F->getReturnType(), F->arg_begin()->getType() }; 485 if (F->getName() != Intrinsic::getName(Intrinsic::masked_load, Tys)) { 486 rename(F); 487 NewFn = Intrinsic::getDeclaration(F->getParent(), 488 Intrinsic::masked_load, 489 Tys); 490 return true; 491 } 492 } 493 if (Name.startswith("masked.store.")) { 494 auto Args = F->getFunctionType()->params(); 495 Type *Tys[] = { Args[0], Args[1] }; 496 if (F->getName() != Intrinsic::getName(Intrinsic::masked_store, Tys)) { 497 rename(F); 498 NewFn = Intrinsic::getDeclaration(F->getParent(), 499 Intrinsic::masked_store, 500 Tys); 501 return true; 502 } 503 } 504 // Renaming gather/scatter intrinsics with no address space overloading 505 // to the new overload which includes an address space 506 if (Name.startswith("masked.gather.")) { 507 Type *Tys[] = {F->getReturnType(), F->arg_begin()->getType()}; 508 if (F->getName() != Intrinsic::getName(Intrinsic::masked_gather, Tys)) { 509 rename(F); 510 NewFn = Intrinsic::getDeclaration(F->getParent(), 511 Intrinsic::masked_gather, Tys); 512 return true; 513 } 514 } 515 if (Name.startswith("masked.scatter.")) { 516 auto Args = F->getFunctionType()->params(); 517 Type *Tys[] = {Args[0], Args[1]}; 518 if (F->getName() != Intrinsic::getName(Intrinsic::masked_scatter, Tys)) { 519 rename(F); 520 NewFn = Intrinsic::getDeclaration(F->getParent(), 521 Intrinsic::masked_scatter, Tys); 522 return true; 523 } 524 } 525 break; 526 } 527 case 'n': { 528 if (Name.startswith("nvvm.")) { 529 Name = Name.substr(5); 530 531 // The following nvvm intrinsics correspond exactly to an LLVM intrinsic. 532 Intrinsic::ID IID = StringSwitch<Intrinsic::ID>(Name) 533 .Cases("brev32", "brev64", Intrinsic::bitreverse) 534 .Case("clz.i", Intrinsic::ctlz) 535 .Case("popc.i", Intrinsic::ctpop) 536 .Default(Intrinsic::not_intrinsic); 537 if (IID != Intrinsic::not_intrinsic && F->arg_size() == 1) { 538 NewFn = Intrinsic::getDeclaration(F->getParent(), IID, 539 {F->getReturnType()}); 540 return true; 541 } 542 543 // The following nvvm intrinsics correspond exactly to an LLVM idiom, but 544 // not to an intrinsic alone. We expand them in UpgradeIntrinsicCall. 545 // 546 // TODO: We could add lohi.i2d. 547 bool Expand = StringSwitch<bool>(Name) 548 .Cases("abs.i", "abs.ll", true) 549 .Cases("clz.ll", "popc.ll", "h2f", true) 550 .Cases("max.i", "max.ll", "max.ui", "max.ull", true) 551 .Cases("min.i", "min.ll", "min.ui", "min.ull", true) 552 .Default(false); 553 if (Expand) { 554 NewFn = nullptr; 555 return true; 556 } 557 } 558 break; 559 } 560 case 'o': 561 // We only need to change the name to match the mangling including the 562 // address space. 563 if (Name.startswith("objectsize.")) { 564 Type *Tys[2] = { F->getReturnType(), F->arg_begin()->getType() }; 565 if (F->arg_size() == 2 || 566 F->getName() != Intrinsic::getName(Intrinsic::objectsize, Tys)) { 567 rename(F); 568 NewFn = Intrinsic::getDeclaration(F->getParent(), Intrinsic::objectsize, 569 Tys); 570 return true; 571 } 572 } 573 break; 574 575 case 's': 576 if (Name == "stackprotectorcheck") { 577 NewFn = nullptr; 578 return true; 579 } 580 break; 581 582 case 'x': 583 if (UpgradeX86IntrinsicFunction(F, Name, NewFn)) 584 return true; 585 } 586 // Remangle our intrinsic since we upgrade the mangling 587 auto Result = llvm::Intrinsic::remangleIntrinsicFunction(F); 588 if (Result != None) { 589 NewFn = Result.getValue(); 590 return true; 591 } 592 593 // This may not belong here. This function is effectively being overloaded 594 // to both detect an intrinsic which needs upgrading, and to provide the 595 // upgraded form of the intrinsic. We should perhaps have two separate 596 // functions for this. 597 return false; 598 } 599 600 bool llvm::UpgradeIntrinsicFunction(Function *F, Function *&NewFn) { 601 NewFn = nullptr; 602 bool Upgraded = UpgradeIntrinsicFunction1(F, NewFn); 603 assert(F != NewFn && "Intrinsic function upgraded to the same function"); 604 605 // Upgrade intrinsic attributes. This does not change the function. 606 if (NewFn) 607 F = NewFn; 608 if (Intrinsic::ID id = F->getIntrinsicID()) 609 F->setAttributes(Intrinsic::getAttributes(F->getContext(), id)); 610 return Upgraded; 611 } 612 613 bool llvm::UpgradeGlobalVariable(GlobalVariable *GV) { 614 // Nothing to do yet. 615 return false; 616 } 617 618 // Handles upgrading SSE2/AVX2/AVX512BW PSLLDQ intrinsics by converting them 619 // to byte shuffles. 620 static Value *UpgradeX86PSLLDQIntrinsics(IRBuilder<> &Builder, 621 Value *Op, unsigned Shift) { 622 Type *ResultTy = Op->getType(); 623 unsigned NumElts = ResultTy->getVectorNumElements() * 8; 624 625 // Bitcast from a 64-bit element type to a byte element type. 626 Type *VecTy = VectorType::get(Builder.getInt8Ty(), NumElts); 627 Op = Builder.CreateBitCast(Op, VecTy, "cast"); 628 629 // We'll be shuffling in zeroes. 630 Value *Res = Constant::getNullValue(VecTy); 631 632 // If shift is less than 16, emit a shuffle to move the bytes. Otherwise, 633 // we'll just return the zero vector. 634 if (Shift < 16) { 635 uint32_t Idxs[64]; 636 // 256/512-bit version is split into 2/4 16-byte lanes. 637 for (unsigned l = 0; l != NumElts; l += 16) 638 for (unsigned i = 0; i != 16; ++i) { 639 unsigned Idx = NumElts + i - Shift; 640 if (Idx < NumElts) 641 Idx -= NumElts - 16; // end of lane, switch operand. 642 Idxs[l + i] = Idx + l; 643 } 644 645 Res = Builder.CreateShuffleVector(Res, Op, makeArrayRef(Idxs, NumElts)); 646 } 647 648 // Bitcast back to a 64-bit element type. 649 return Builder.CreateBitCast(Res, ResultTy, "cast"); 650 } 651 652 // Handles upgrading SSE2/AVX2/AVX512BW PSRLDQ intrinsics by converting them 653 // to byte shuffles. 654 static Value *UpgradeX86PSRLDQIntrinsics(IRBuilder<> &Builder, Value *Op, 655 unsigned Shift) { 656 Type *ResultTy = Op->getType(); 657 unsigned NumElts = ResultTy->getVectorNumElements() * 8; 658 659 // Bitcast from a 64-bit element type to a byte element type. 660 Type *VecTy = VectorType::get(Builder.getInt8Ty(), NumElts); 661 Op = Builder.CreateBitCast(Op, VecTy, "cast"); 662 663 // We'll be shuffling in zeroes. 664 Value *Res = Constant::getNullValue(VecTy); 665 666 // If shift is less than 16, emit a shuffle to move the bytes. Otherwise, 667 // we'll just return the zero vector. 668 if (Shift < 16) { 669 uint32_t Idxs[64]; 670 // 256/512-bit version is split into 2/4 16-byte lanes. 671 for (unsigned l = 0; l != NumElts; l += 16) 672 for (unsigned i = 0; i != 16; ++i) { 673 unsigned Idx = i + Shift; 674 if (Idx >= 16) 675 Idx += NumElts - 16; // end of lane, switch operand. 676 Idxs[l + i] = Idx + l; 677 } 678 679 Res = Builder.CreateShuffleVector(Op, Res, makeArrayRef(Idxs, NumElts)); 680 } 681 682 // Bitcast back to a 64-bit element type. 683 return Builder.CreateBitCast(Res, ResultTy, "cast"); 684 } 685 686 static Value *getX86MaskVec(IRBuilder<> &Builder, Value *Mask, 687 unsigned NumElts) { 688 llvm::VectorType *MaskTy = llvm::VectorType::get(Builder.getInt1Ty(), 689 cast<IntegerType>(Mask->getType())->getBitWidth()); 690 Mask = Builder.CreateBitCast(Mask, MaskTy); 691 692 // If we have less than 8 elements, then the starting mask was an i8 and 693 // we need to extract down to the right number of elements. 694 if (NumElts < 8) { 695 uint32_t Indices[4]; 696 for (unsigned i = 0; i != NumElts; ++i) 697 Indices[i] = i; 698 Mask = Builder.CreateShuffleVector(Mask, Mask, 699 makeArrayRef(Indices, NumElts), 700 "extract"); 701 } 702 703 return Mask; 704 } 705 706 static Value *EmitX86Select(IRBuilder<> &Builder, Value *Mask, 707 Value *Op0, Value *Op1) { 708 // If the mask is all ones just emit the align operation. 709 if (const auto *C = dyn_cast<Constant>(Mask)) 710 if (C->isAllOnesValue()) 711 return Op0; 712 713 Mask = getX86MaskVec(Builder, Mask, Op0->getType()->getVectorNumElements()); 714 return Builder.CreateSelect(Mask, Op0, Op1); 715 } 716 717 // Handle autoupgrade for masked PALIGNR and VALIGND/Q intrinsics. 718 // PALIGNR handles large immediates by shifting while VALIGN masks the immediate 719 // so we need to handle both cases. VALIGN also doesn't have 128-bit lanes. 720 static Value *UpgradeX86ALIGNIntrinsics(IRBuilder<> &Builder, Value *Op0, 721 Value *Op1, Value *Shift, 722 Value *Passthru, Value *Mask, 723 bool IsVALIGN) { 724 unsigned ShiftVal = cast<llvm::ConstantInt>(Shift)->getZExtValue(); 725 726 unsigned NumElts = Op0->getType()->getVectorNumElements(); 727 assert((IsVALIGN || NumElts % 16 == 0) && "Illegal NumElts for PALIGNR!"); 728 assert((!IsVALIGN || NumElts <= 16) && "NumElts too large for VALIGN!"); 729 assert(isPowerOf2_32(NumElts) && "NumElts not a power of 2!"); 730 731 // Mask the immediate for VALIGN. 732 if (IsVALIGN) 733 ShiftVal &= (NumElts - 1); 734 735 // If palignr is shifting the pair of vectors more than the size of two 736 // lanes, emit zero. 737 if (ShiftVal >= 32) 738 return llvm::Constant::getNullValue(Op0->getType()); 739 740 // If palignr is shifting the pair of input vectors more than one lane, 741 // but less than two lanes, convert to shifting in zeroes. 742 if (ShiftVal > 16) { 743 ShiftVal -= 16; 744 Op1 = Op0; 745 Op0 = llvm::Constant::getNullValue(Op0->getType()); 746 } 747 748 uint32_t Indices[64]; 749 // 256-bit palignr operates on 128-bit lanes so we need to handle that 750 for (unsigned l = 0; l < NumElts; l += 16) { 751 for (unsigned i = 0; i != 16; ++i) { 752 unsigned Idx = ShiftVal + i; 753 if (!IsVALIGN && Idx >= 16) // Disable wrap for VALIGN. 754 Idx += NumElts - 16; // End of lane, switch operand. 755 Indices[l + i] = Idx + l; 756 } 757 } 758 759 Value *Align = Builder.CreateShuffleVector(Op1, Op0, 760 makeArrayRef(Indices, NumElts), 761 "palignr"); 762 763 return EmitX86Select(Builder, Mask, Align, Passthru); 764 } 765 766 static Value *UpgradeMaskedStore(IRBuilder<> &Builder, 767 Value *Ptr, Value *Data, Value *Mask, 768 bool Aligned) { 769 // Cast the pointer to the right type. 770 Ptr = Builder.CreateBitCast(Ptr, 771 llvm::PointerType::getUnqual(Data->getType())); 772 unsigned Align = 773 Aligned ? cast<VectorType>(Data->getType())->getBitWidth() / 8 : 1; 774 775 // If the mask is all ones just emit a regular store. 776 if (const auto *C = dyn_cast<Constant>(Mask)) 777 if (C->isAllOnesValue()) 778 return Builder.CreateAlignedStore(Data, Ptr, Align); 779 780 // Convert the mask from an integer type to a vector of i1. 781 unsigned NumElts = Data->getType()->getVectorNumElements(); 782 Mask = getX86MaskVec(Builder, Mask, NumElts); 783 return Builder.CreateMaskedStore(Data, Ptr, Align, Mask); 784 } 785 786 static Value *UpgradeMaskedLoad(IRBuilder<> &Builder, 787 Value *Ptr, Value *Passthru, Value *Mask, 788 bool Aligned) { 789 // Cast the pointer to the right type. 790 Ptr = Builder.CreateBitCast(Ptr, 791 llvm::PointerType::getUnqual(Passthru->getType())); 792 unsigned Align = 793 Aligned ? cast<VectorType>(Passthru->getType())->getBitWidth() / 8 : 1; 794 795 // If the mask is all ones just emit a regular store. 796 if (const auto *C = dyn_cast<Constant>(Mask)) 797 if (C->isAllOnesValue()) 798 return Builder.CreateAlignedLoad(Ptr, Align); 799 800 // Convert the mask from an integer type to a vector of i1. 801 unsigned NumElts = Passthru->getType()->getVectorNumElements(); 802 Mask = getX86MaskVec(Builder, Mask, NumElts); 803 return Builder.CreateMaskedLoad(Ptr, Align, Mask, Passthru); 804 } 805 806 static Value *upgradeAbs(IRBuilder<> &Builder, CallInst &CI) { 807 Value *Op0 = CI.getArgOperand(0); 808 llvm::Type *Ty = Op0->getType(); 809 Value *Zero = llvm::Constant::getNullValue(Ty); 810 Value *Cmp = Builder.CreateICmp(ICmpInst::ICMP_SGT, Op0, Zero); 811 Value *Neg = Builder.CreateNeg(Op0); 812 Value *Res = Builder.CreateSelect(Cmp, Op0, Neg); 813 814 if (CI.getNumArgOperands() == 3) 815 Res = EmitX86Select(Builder,CI.getArgOperand(2), Res, CI.getArgOperand(1)); 816 817 return Res; 818 } 819 820 static Value *upgradeIntMinMax(IRBuilder<> &Builder, CallInst &CI, 821 ICmpInst::Predicate Pred) { 822 Value *Op0 = CI.getArgOperand(0); 823 Value *Op1 = CI.getArgOperand(1); 824 Value *Cmp = Builder.CreateICmp(Pred, Op0, Op1); 825 Value *Res = Builder.CreateSelect(Cmp, Op0, Op1); 826 827 if (CI.getNumArgOperands() == 4) 828 Res = EmitX86Select(Builder, CI.getArgOperand(3), Res, CI.getArgOperand(2)); 829 830 return Res; 831 } 832 833 // Applying mask on vector of i1's and make sure result is at least 8 bits wide. 834 static Value *ApplyX86MaskOn1BitsVec(IRBuilder<> &Builder,Value *Vec, Value *Mask, 835 unsigned NumElts) { 836 if (Mask) { 837 const auto *C = dyn_cast<Constant>(Mask); 838 if (!C || !C->isAllOnesValue()) 839 Vec = Builder.CreateAnd(Vec, getX86MaskVec(Builder, Mask, NumElts)); 840 } 841 842 if (NumElts < 8) { 843 uint32_t Indices[8]; 844 for (unsigned i = 0; i != NumElts; ++i) 845 Indices[i] = i; 846 for (unsigned i = NumElts; i != 8; ++i) 847 Indices[i] = NumElts + i % NumElts; 848 Vec = Builder.CreateShuffleVector(Vec, 849 Constant::getNullValue(Vec->getType()), 850 Indices); 851 } 852 return Builder.CreateBitCast(Vec, Builder.getIntNTy(std::max(NumElts, 8U))); 853 } 854 855 static Value *upgradeMaskedCompare(IRBuilder<> &Builder, CallInst &CI, 856 unsigned CC, bool Signed) { 857 Value *Op0 = CI.getArgOperand(0); 858 unsigned NumElts = Op0->getType()->getVectorNumElements(); 859 860 Value *Cmp; 861 if (CC == 3) { 862 Cmp = Constant::getNullValue(llvm::VectorType::get(Builder.getInt1Ty(), NumElts)); 863 } else if (CC == 7) { 864 Cmp = Constant::getAllOnesValue(llvm::VectorType::get(Builder.getInt1Ty(), NumElts)); 865 } else { 866 ICmpInst::Predicate Pred; 867 switch (CC) { 868 default: llvm_unreachable("Unknown condition code"); 869 case 0: Pred = ICmpInst::ICMP_EQ; break; 870 case 1: Pred = Signed ? ICmpInst::ICMP_SLT : ICmpInst::ICMP_ULT; break; 871 case 2: Pred = Signed ? ICmpInst::ICMP_SLE : ICmpInst::ICMP_ULE; break; 872 case 4: Pred = ICmpInst::ICMP_NE; break; 873 case 5: Pred = Signed ? ICmpInst::ICMP_SGE : ICmpInst::ICMP_UGE; break; 874 case 6: Pred = Signed ? ICmpInst::ICMP_SGT : ICmpInst::ICMP_UGT; break; 875 } 876 Cmp = Builder.CreateICmp(Pred, Op0, CI.getArgOperand(1)); 877 } 878 879 Value *Mask = CI.getArgOperand(CI.getNumArgOperands() - 1); 880 881 return ApplyX86MaskOn1BitsVec(Builder, Cmp, Mask, NumElts); 882 } 883 884 // Replace a masked intrinsic with an older unmasked intrinsic. 885 static Value *UpgradeX86MaskedShift(IRBuilder<> &Builder, CallInst &CI, 886 Intrinsic::ID IID) { 887 Function *F = CI.getCalledFunction(); 888 Function *Intrin = Intrinsic::getDeclaration(F->getParent(), IID); 889 Value *Rep = Builder.CreateCall(Intrin, 890 { CI.getArgOperand(0), CI.getArgOperand(1) }); 891 return EmitX86Select(Builder, CI.getArgOperand(3), Rep, CI.getArgOperand(2)); 892 } 893 894 static Value* upgradeMaskedMove(IRBuilder<> &Builder, CallInst &CI) { 895 Value* A = CI.getArgOperand(0); 896 Value* B = CI.getArgOperand(1); 897 Value* Src = CI.getArgOperand(2); 898 Value* Mask = CI.getArgOperand(3); 899 900 Value* AndNode = Builder.CreateAnd(Mask, APInt(8, 1)); 901 Value* Cmp = Builder.CreateIsNotNull(AndNode); 902 Value* Extract1 = Builder.CreateExtractElement(B, (uint64_t)0); 903 Value* Extract2 = Builder.CreateExtractElement(Src, (uint64_t)0); 904 Value* Select = Builder.CreateSelect(Cmp, Extract1, Extract2); 905 return Builder.CreateInsertElement(A, Select, (uint64_t)0); 906 } 907 908 909 static Value* UpgradeMaskToInt(IRBuilder<> &Builder, CallInst &CI) { 910 Value* Op = CI.getArgOperand(0); 911 Type* ReturnOp = CI.getType(); 912 unsigned NumElts = CI.getType()->getVectorNumElements(); 913 Value *Mask = getX86MaskVec(Builder, Op, NumElts); 914 return Builder.CreateSExt(Mask, ReturnOp, "vpmovm2"); 915 } 916 917 /// Upgrade a call to an old intrinsic. All argument and return casting must be 918 /// provided to seamlessly integrate with existing context. 919 void llvm::UpgradeIntrinsicCall(CallInst *CI, Function *NewFn) { 920 Function *F = CI->getCalledFunction(); 921 LLVMContext &C = CI->getContext(); 922 IRBuilder<> Builder(C); 923 Builder.SetInsertPoint(CI->getParent(), CI->getIterator()); 924 925 assert(F && "Intrinsic call is not direct?"); 926 927 if (!NewFn) { 928 // Get the Function's name. 929 StringRef Name = F->getName(); 930 931 assert(Name.startswith("llvm.") && "Intrinsic doesn't start with 'llvm.'"); 932 Name = Name.substr(5); 933 934 bool IsX86 = Name.startswith("x86."); 935 if (IsX86) 936 Name = Name.substr(4); 937 bool IsNVVM = Name.startswith("nvvm."); 938 if (IsNVVM) 939 Name = Name.substr(5); 940 941 if (IsX86 && Name.startswith("sse4a.movnt.")) { 942 Module *M = F->getParent(); 943 SmallVector<Metadata *, 1> Elts; 944 Elts.push_back( 945 ConstantAsMetadata::get(ConstantInt::get(Type::getInt32Ty(C), 1))); 946 MDNode *Node = MDNode::get(C, Elts); 947 948 Value *Arg0 = CI->getArgOperand(0); 949 Value *Arg1 = CI->getArgOperand(1); 950 951 // Nontemporal (unaligned) store of the 0'th element of the float/double 952 // vector. 953 Type *SrcEltTy = cast<VectorType>(Arg1->getType())->getElementType(); 954 PointerType *EltPtrTy = PointerType::getUnqual(SrcEltTy); 955 Value *Addr = Builder.CreateBitCast(Arg0, EltPtrTy, "cast"); 956 Value *Extract = 957 Builder.CreateExtractElement(Arg1, (uint64_t)0, "extractelement"); 958 959 StoreInst *SI = Builder.CreateAlignedStore(Extract, Addr, 1); 960 SI->setMetadata(M->getMDKindID("nontemporal"), Node); 961 962 // Remove intrinsic. 963 CI->eraseFromParent(); 964 return; 965 } 966 967 if (IsX86 && (Name.startswith("avx.movnt.") || 968 Name.startswith("avx512.storent."))) { 969 Module *M = F->getParent(); 970 SmallVector<Metadata *, 1> Elts; 971 Elts.push_back( 972 ConstantAsMetadata::get(ConstantInt::get(Type::getInt32Ty(C), 1))); 973 MDNode *Node = MDNode::get(C, Elts); 974 975 Value *Arg0 = CI->getArgOperand(0); 976 Value *Arg1 = CI->getArgOperand(1); 977 978 // Convert the type of the pointer to a pointer to the stored type. 979 Value *BC = Builder.CreateBitCast(Arg0, 980 PointerType::getUnqual(Arg1->getType()), 981 "cast"); 982 VectorType *VTy = cast<VectorType>(Arg1->getType()); 983 StoreInst *SI = Builder.CreateAlignedStore(Arg1, BC, 984 VTy->getBitWidth() / 8); 985 SI->setMetadata(M->getMDKindID("nontemporal"), Node); 986 987 // Remove intrinsic. 988 CI->eraseFromParent(); 989 return; 990 } 991 992 if (IsX86 && Name == "sse2.storel.dq") { 993 Value *Arg0 = CI->getArgOperand(0); 994 Value *Arg1 = CI->getArgOperand(1); 995 996 Type *NewVecTy = VectorType::get(Type::getInt64Ty(C), 2); 997 Value *BC0 = Builder.CreateBitCast(Arg1, NewVecTy, "cast"); 998 Value *Elt = Builder.CreateExtractElement(BC0, (uint64_t)0); 999 Value *BC = Builder.CreateBitCast(Arg0, 1000 PointerType::getUnqual(Elt->getType()), 1001 "cast"); 1002 Builder.CreateAlignedStore(Elt, BC, 1); 1003 1004 // Remove intrinsic. 1005 CI->eraseFromParent(); 1006 return; 1007 } 1008 1009 if (IsX86 && (Name.startswith("sse.storeu.") || 1010 Name.startswith("sse2.storeu.") || 1011 Name.startswith("avx.storeu."))) { 1012 Value *Arg0 = CI->getArgOperand(0); 1013 Value *Arg1 = CI->getArgOperand(1); 1014 1015 Arg0 = Builder.CreateBitCast(Arg0, 1016 PointerType::getUnqual(Arg1->getType()), 1017 "cast"); 1018 Builder.CreateAlignedStore(Arg1, Arg0, 1); 1019 1020 // Remove intrinsic. 1021 CI->eraseFromParent(); 1022 return; 1023 } 1024 1025 if (IsX86 && (Name.startswith("avx512.mask.store"))) { 1026 // "avx512.mask.storeu." or "avx512.mask.store." 1027 bool Aligned = Name[17] != 'u'; // "avx512.mask.storeu". 1028 UpgradeMaskedStore(Builder, CI->getArgOperand(0), CI->getArgOperand(1), 1029 CI->getArgOperand(2), Aligned); 1030 1031 // Remove intrinsic. 1032 CI->eraseFromParent(); 1033 return; 1034 } 1035 1036 Value *Rep; 1037 // Upgrade packed integer vector compare intrinsics to compare instructions. 1038 if (IsX86 && (Name.startswith("sse2.pcmp") || 1039 Name.startswith("avx2.pcmp"))) { 1040 // "sse2.pcpmpeq." "sse2.pcmpgt." "avx2.pcmpeq." or "avx2.pcmpgt." 1041 bool CmpEq = Name[9] == 'e'; 1042 Rep = Builder.CreateICmp(CmpEq ? ICmpInst::ICMP_EQ : ICmpInst::ICMP_SGT, 1043 CI->getArgOperand(0), CI->getArgOperand(1)); 1044 Rep = Builder.CreateSExt(Rep, CI->getType(), ""); 1045 } else if (IsX86 && (Name.startswith("avx512.broadcastm"))) { 1046 Type *ExtTy = Type::getInt32Ty(C); 1047 if (CI->getOperand(0)->getType()->isIntegerTy(8)) 1048 ExtTy = Type::getInt64Ty(C); 1049 unsigned NumElts = CI->getType()->getPrimitiveSizeInBits() / 1050 ExtTy->getPrimitiveSizeInBits(); 1051 Rep = Builder.CreateZExt(CI->getArgOperand(0), ExtTy); 1052 Rep = Builder.CreateVectorSplat(NumElts, Rep); 1053 } else if (IsX86 && (Name.startswith("avx512.ptestm") || 1054 Name.startswith("avx512.ptestnm"))) { 1055 Value *Op0 = CI->getArgOperand(0); 1056 Value *Op1 = CI->getArgOperand(1); 1057 Value *Mask = CI->getArgOperand(2); 1058 Rep = Builder.CreateAnd(Op0, Op1); 1059 llvm::Type *Ty = Op0->getType(); 1060 Value *Zero = llvm::Constant::getNullValue(Ty); 1061 ICmpInst::Predicate Pred = 1062 Name.startswith("avx512.ptestm") ? ICmpInst::ICMP_NE : ICmpInst::ICMP_EQ; 1063 Rep = Builder.CreateICmp(Pred, Rep, Zero); 1064 unsigned NumElts = Op0->getType()->getVectorNumElements(); 1065 Rep = ApplyX86MaskOn1BitsVec(Builder, Rep, Mask, NumElts); 1066 } else if (IsX86 && (Name.startswith("avx512.mask.pbroadcast"))){ 1067 unsigned NumElts = 1068 CI->getArgOperand(1)->getType()->getVectorNumElements(); 1069 Rep = Builder.CreateVectorSplat(NumElts, CI->getArgOperand(0)); 1070 Rep = EmitX86Select(Builder, CI->getArgOperand(2), Rep, 1071 CI->getArgOperand(1)); 1072 } else if (IsX86 && (Name.startswith("avx512.kunpck"))) { 1073 unsigned NumElts = CI->getType()->getScalarSizeInBits(); 1074 Value *LHS = getX86MaskVec(Builder, CI->getArgOperand(0), NumElts); 1075 Value *RHS = getX86MaskVec(Builder, CI->getArgOperand(1), NumElts); 1076 uint32_t Indices[64]; 1077 for (unsigned i = 0; i != NumElts; ++i) 1078 Indices[i] = i; 1079 1080 // First extract half of each vector. This gives better codegen than 1081 // doing it in a single shuffle. 1082 LHS = Builder.CreateShuffleVector(LHS, LHS, 1083 makeArrayRef(Indices, NumElts / 2)); 1084 RHS = Builder.CreateShuffleVector(RHS, RHS, 1085 makeArrayRef(Indices, NumElts / 2)); 1086 // Concat the vectors. 1087 Rep = Builder.CreateShuffleVector(LHS, RHS, 1088 makeArrayRef(Indices, NumElts)); 1089 Rep = Builder.CreateBitCast(Rep, CI->getType()); 1090 } else if (IsX86 && (Name == "sse.add.ss" || Name == "sse2.add.sd")) { 1091 Type *I32Ty = Type::getInt32Ty(C); 1092 Value *Elt0 = Builder.CreateExtractElement(CI->getArgOperand(0), 1093 ConstantInt::get(I32Ty, 0)); 1094 Value *Elt1 = Builder.CreateExtractElement(CI->getArgOperand(1), 1095 ConstantInt::get(I32Ty, 0)); 1096 Rep = Builder.CreateInsertElement(CI->getArgOperand(0), 1097 Builder.CreateFAdd(Elt0, Elt1), 1098 ConstantInt::get(I32Ty, 0)); 1099 } else if (IsX86 && (Name == "sse.sub.ss" || Name == "sse2.sub.sd")) { 1100 Type *I32Ty = Type::getInt32Ty(C); 1101 Value *Elt0 = Builder.CreateExtractElement(CI->getArgOperand(0), 1102 ConstantInt::get(I32Ty, 0)); 1103 Value *Elt1 = Builder.CreateExtractElement(CI->getArgOperand(1), 1104 ConstantInt::get(I32Ty, 0)); 1105 Rep = Builder.CreateInsertElement(CI->getArgOperand(0), 1106 Builder.CreateFSub(Elt0, Elt1), 1107 ConstantInt::get(I32Ty, 0)); 1108 } else if (IsX86 && (Name == "sse.mul.ss" || Name == "sse2.mul.sd")) { 1109 Type *I32Ty = Type::getInt32Ty(C); 1110 Value *Elt0 = Builder.CreateExtractElement(CI->getArgOperand(0), 1111 ConstantInt::get(I32Ty, 0)); 1112 Value *Elt1 = Builder.CreateExtractElement(CI->getArgOperand(1), 1113 ConstantInt::get(I32Ty, 0)); 1114 Rep = Builder.CreateInsertElement(CI->getArgOperand(0), 1115 Builder.CreateFMul(Elt0, Elt1), 1116 ConstantInt::get(I32Ty, 0)); 1117 } else if (IsX86 && (Name == "sse.div.ss" || Name == "sse2.div.sd")) { 1118 Type *I32Ty = Type::getInt32Ty(C); 1119 Value *Elt0 = Builder.CreateExtractElement(CI->getArgOperand(0), 1120 ConstantInt::get(I32Ty, 0)); 1121 Value *Elt1 = Builder.CreateExtractElement(CI->getArgOperand(1), 1122 ConstantInt::get(I32Ty, 0)); 1123 Rep = Builder.CreateInsertElement(CI->getArgOperand(0), 1124 Builder.CreateFDiv(Elt0, Elt1), 1125 ConstantInt::get(I32Ty, 0)); 1126 } else if (IsX86 && Name.startswith("avx512.mask.pcmp")) { 1127 // "avx512.mask.pcmpeq." or "avx512.mask.pcmpgt." 1128 bool CmpEq = Name[16] == 'e'; 1129 Rep = upgradeMaskedCompare(Builder, *CI, CmpEq ? 0 : 6, true); 1130 } else if (IsX86 && Name.startswith("avx512.mask.cmp")) { 1131 unsigned Imm = cast<ConstantInt>(CI->getArgOperand(2))->getZExtValue(); 1132 Rep = upgradeMaskedCompare(Builder, *CI, Imm, true); 1133 } else if (IsX86 && Name.startswith("avx512.mask.ucmp")) { 1134 unsigned Imm = cast<ConstantInt>(CI->getArgOperand(2))->getZExtValue(); 1135 Rep = upgradeMaskedCompare(Builder, *CI, Imm, false); 1136 } else if (IsX86 && (Name.startswith("avx512.cvtb2mask.") || 1137 Name.startswith("avx512.cvtw2mask.") || 1138 Name.startswith("avx512.cvtd2mask.") || 1139 Name.startswith("avx512.cvtq2mask."))) { 1140 Value *Op = CI->getArgOperand(0); 1141 Value *Zero = llvm::Constant::getNullValue(Op->getType()); 1142 Rep = Builder.CreateICmp(ICmpInst::ICMP_SLT, Op, Zero); 1143 Rep = ApplyX86MaskOn1BitsVec(Builder, Rep, nullptr, 1144 Op->getType()->getVectorNumElements()); 1145 } else if(IsX86 && (Name == "ssse3.pabs.b.128" || 1146 Name == "ssse3.pabs.w.128" || 1147 Name == "ssse3.pabs.d.128" || 1148 Name.startswith("avx2.pabs") || 1149 Name.startswith("avx512.mask.pabs"))) { 1150 Rep = upgradeAbs(Builder, *CI); 1151 } else if (IsX86 && (Name == "sse41.pmaxsb" || 1152 Name == "sse2.pmaxs.w" || 1153 Name == "sse41.pmaxsd" || 1154 Name.startswith("avx2.pmaxs") || 1155 Name.startswith("avx512.mask.pmaxs"))) { 1156 Rep = upgradeIntMinMax(Builder, *CI, ICmpInst::ICMP_SGT); 1157 } else if (IsX86 && (Name == "sse2.pmaxu.b" || 1158 Name == "sse41.pmaxuw" || 1159 Name == "sse41.pmaxud" || 1160 Name.startswith("avx2.pmaxu") || 1161 Name.startswith("avx512.mask.pmaxu"))) { 1162 Rep = upgradeIntMinMax(Builder, *CI, ICmpInst::ICMP_UGT); 1163 } else if (IsX86 && (Name == "sse41.pminsb" || 1164 Name == "sse2.pmins.w" || 1165 Name == "sse41.pminsd" || 1166 Name.startswith("avx2.pmins") || 1167 Name.startswith("avx512.mask.pmins"))) { 1168 Rep = upgradeIntMinMax(Builder, *CI, ICmpInst::ICMP_SLT); 1169 } else if (IsX86 && (Name == "sse2.pminu.b" || 1170 Name == "sse41.pminuw" || 1171 Name == "sse41.pminud" || 1172 Name.startswith("avx2.pminu") || 1173 Name.startswith("avx512.mask.pminu"))) { 1174 Rep = upgradeIntMinMax(Builder, *CI, ICmpInst::ICMP_ULT); 1175 } else if (IsX86 && (Name == "sse2.cvtdq2pd" || 1176 Name == "sse2.cvtps2pd" || 1177 Name == "avx.cvtdq2.pd.256" || 1178 Name == "avx.cvt.ps2.pd.256" || 1179 Name.startswith("avx512.mask.cvtdq2pd.") || 1180 Name.startswith("avx512.mask.cvtudq2pd."))) { 1181 // Lossless i32/float to double conversion. 1182 // Extract the bottom elements if necessary and convert to double vector. 1183 Value *Src = CI->getArgOperand(0); 1184 VectorType *SrcTy = cast<VectorType>(Src->getType()); 1185 VectorType *DstTy = cast<VectorType>(CI->getType()); 1186 Rep = CI->getArgOperand(0); 1187 1188 unsigned NumDstElts = DstTy->getNumElements(); 1189 if (NumDstElts < SrcTy->getNumElements()) { 1190 assert(NumDstElts == 2 && "Unexpected vector size"); 1191 uint32_t ShuffleMask[2] = { 0, 1 }; 1192 Rep = Builder.CreateShuffleVector(Rep, UndefValue::get(SrcTy), 1193 ShuffleMask); 1194 } 1195 1196 bool SInt2Double = (StringRef::npos != Name.find("cvtdq2")); 1197 bool UInt2Double = (StringRef::npos != Name.find("cvtudq2")); 1198 if (SInt2Double) 1199 Rep = Builder.CreateSIToFP(Rep, DstTy, "cvtdq2pd"); 1200 else if (UInt2Double) 1201 Rep = Builder.CreateUIToFP(Rep, DstTy, "cvtudq2pd"); 1202 else 1203 Rep = Builder.CreateFPExt(Rep, DstTy, "cvtps2pd"); 1204 1205 if (CI->getNumArgOperands() == 3) 1206 Rep = EmitX86Select(Builder, CI->getArgOperand(2), Rep, 1207 CI->getArgOperand(1)); 1208 } else if (IsX86 && (Name.startswith("avx512.mask.loadu."))) { 1209 Rep = UpgradeMaskedLoad(Builder, CI->getArgOperand(0), 1210 CI->getArgOperand(1), CI->getArgOperand(2), 1211 /*Aligned*/false); 1212 } else if (IsX86 && (Name.startswith("avx512.mask.load."))) { 1213 Rep = UpgradeMaskedLoad(Builder, CI->getArgOperand(0), 1214 CI->getArgOperand(1),CI->getArgOperand(2), 1215 /*Aligned*/true); 1216 } else if (IsX86 && Name.startswith("xop.vpcom")) { 1217 Intrinsic::ID intID; 1218 if (Name.endswith("ub")) 1219 intID = Intrinsic::x86_xop_vpcomub; 1220 else if (Name.endswith("uw")) 1221 intID = Intrinsic::x86_xop_vpcomuw; 1222 else if (Name.endswith("ud")) 1223 intID = Intrinsic::x86_xop_vpcomud; 1224 else if (Name.endswith("uq")) 1225 intID = Intrinsic::x86_xop_vpcomuq; 1226 else if (Name.endswith("b")) 1227 intID = Intrinsic::x86_xop_vpcomb; 1228 else if (Name.endswith("w")) 1229 intID = Intrinsic::x86_xop_vpcomw; 1230 else if (Name.endswith("d")) 1231 intID = Intrinsic::x86_xop_vpcomd; 1232 else if (Name.endswith("q")) 1233 intID = Intrinsic::x86_xop_vpcomq; 1234 else 1235 llvm_unreachable("Unknown suffix"); 1236 1237 Name = Name.substr(9); // strip off "xop.vpcom" 1238 unsigned Imm; 1239 if (Name.startswith("lt")) 1240 Imm = 0; 1241 else if (Name.startswith("le")) 1242 Imm = 1; 1243 else if (Name.startswith("gt")) 1244 Imm = 2; 1245 else if (Name.startswith("ge")) 1246 Imm = 3; 1247 else if (Name.startswith("eq")) 1248 Imm = 4; 1249 else if (Name.startswith("ne")) 1250 Imm = 5; 1251 else if (Name.startswith("false")) 1252 Imm = 6; 1253 else if (Name.startswith("true")) 1254 Imm = 7; 1255 else 1256 llvm_unreachable("Unknown condition"); 1257 1258 Function *VPCOM = Intrinsic::getDeclaration(F->getParent(), intID); 1259 Rep = 1260 Builder.CreateCall(VPCOM, {CI->getArgOperand(0), CI->getArgOperand(1), 1261 Builder.getInt8(Imm)}); 1262 } else if (IsX86 && Name.startswith("xop.vpcmov")) { 1263 Value *Sel = CI->getArgOperand(2); 1264 Value *NotSel = Builder.CreateNot(Sel); 1265 Value *Sel0 = Builder.CreateAnd(CI->getArgOperand(0), Sel); 1266 Value *Sel1 = Builder.CreateAnd(CI->getArgOperand(1), NotSel); 1267 Rep = Builder.CreateOr(Sel0, Sel1); 1268 } else if (IsX86 && Name == "sse42.crc32.64.8") { 1269 Function *CRC32 = Intrinsic::getDeclaration(F->getParent(), 1270 Intrinsic::x86_sse42_crc32_32_8); 1271 Value *Trunc0 = Builder.CreateTrunc(CI->getArgOperand(0), Type::getInt32Ty(C)); 1272 Rep = Builder.CreateCall(CRC32, {Trunc0, CI->getArgOperand(1)}); 1273 Rep = Builder.CreateZExt(Rep, CI->getType(), ""); 1274 } else if (IsX86 && Name.startswith("avx.vbroadcast.s")) { 1275 // Replace broadcasts with a series of insertelements. 1276 Type *VecTy = CI->getType(); 1277 Type *EltTy = VecTy->getVectorElementType(); 1278 unsigned EltNum = VecTy->getVectorNumElements(); 1279 Value *Cast = Builder.CreateBitCast(CI->getArgOperand(0), 1280 EltTy->getPointerTo()); 1281 Value *Load = Builder.CreateLoad(EltTy, Cast); 1282 Type *I32Ty = Type::getInt32Ty(C); 1283 Rep = UndefValue::get(VecTy); 1284 for (unsigned I = 0; I < EltNum; ++I) 1285 Rep = Builder.CreateInsertElement(Rep, Load, 1286 ConstantInt::get(I32Ty, I)); 1287 } else if (IsX86 && (Name.startswith("sse41.pmovsx") || 1288 Name.startswith("sse41.pmovzx") || 1289 Name.startswith("avx2.pmovsx") || 1290 Name.startswith("avx2.pmovzx") || 1291 Name.startswith("avx512.mask.pmovsx") || 1292 Name.startswith("avx512.mask.pmovzx"))) { 1293 VectorType *SrcTy = cast<VectorType>(CI->getArgOperand(0)->getType()); 1294 VectorType *DstTy = cast<VectorType>(CI->getType()); 1295 unsigned NumDstElts = DstTy->getNumElements(); 1296 1297 // Extract a subvector of the first NumDstElts lanes and sign/zero extend. 1298 SmallVector<uint32_t, 8> ShuffleMask(NumDstElts); 1299 for (unsigned i = 0; i != NumDstElts; ++i) 1300 ShuffleMask[i] = i; 1301 1302 Value *SV = Builder.CreateShuffleVector( 1303 CI->getArgOperand(0), UndefValue::get(SrcTy), ShuffleMask); 1304 1305 bool DoSext = (StringRef::npos != Name.find("pmovsx")); 1306 Rep = DoSext ? Builder.CreateSExt(SV, DstTy) 1307 : Builder.CreateZExt(SV, DstTy); 1308 // If there are 3 arguments, it's a masked intrinsic so we need a select. 1309 if (CI->getNumArgOperands() == 3) 1310 Rep = EmitX86Select(Builder, CI->getArgOperand(2), Rep, 1311 CI->getArgOperand(1)); 1312 } else if (IsX86 && (Name.startswith("avx.vbroadcastf128") || 1313 Name == "avx2.vbroadcasti128")) { 1314 // Replace vbroadcastf128/vbroadcasti128 with a vector load+shuffle. 1315 Type *EltTy = CI->getType()->getVectorElementType(); 1316 unsigned NumSrcElts = 128 / EltTy->getPrimitiveSizeInBits(); 1317 Type *VT = VectorType::get(EltTy, NumSrcElts); 1318 Value *Op = Builder.CreatePointerCast(CI->getArgOperand(0), 1319 PointerType::getUnqual(VT)); 1320 Value *Load = Builder.CreateAlignedLoad(Op, 1); 1321 if (NumSrcElts == 2) 1322 Rep = Builder.CreateShuffleVector(Load, UndefValue::get(Load->getType()), 1323 { 0, 1, 0, 1 }); 1324 else 1325 Rep = Builder.CreateShuffleVector(Load, UndefValue::get(Load->getType()), 1326 { 0, 1, 2, 3, 0, 1, 2, 3 }); 1327 } else if (IsX86 && (Name.startswith("avx512.mask.shuf.i") || 1328 Name.startswith("avx512.mask.shuf.f"))) { 1329 unsigned Imm = cast<ConstantInt>(CI->getArgOperand(2))->getZExtValue(); 1330 Type *VT = CI->getType(); 1331 unsigned NumLanes = VT->getPrimitiveSizeInBits() / 128; 1332 unsigned NumElementsInLane = 128 / VT->getScalarSizeInBits(); 1333 unsigned ControlBitsMask = NumLanes - 1; 1334 unsigned NumControlBits = NumLanes / 2; 1335 SmallVector<uint32_t, 8> ShuffleMask(0); 1336 1337 for (unsigned l = 0; l != NumLanes; ++l) { 1338 unsigned LaneMask = (Imm >> (l * NumControlBits)) & ControlBitsMask; 1339 // We actually need the other source. 1340 if (l >= NumLanes / 2) 1341 LaneMask += NumLanes; 1342 for (unsigned i = 0; i != NumElementsInLane; ++i) 1343 ShuffleMask.push_back(LaneMask * NumElementsInLane + i); 1344 } 1345 Rep = Builder.CreateShuffleVector(CI->getArgOperand(0), 1346 CI->getArgOperand(1), ShuffleMask); 1347 Rep = EmitX86Select(Builder, CI->getArgOperand(4), Rep, 1348 CI->getArgOperand(3)); 1349 }else if (IsX86 && (Name.startswith("avx512.mask.broadcastf") || 1350 Name.startswith("avx512.mask.broadcasti"))) { 1351 unsigned NumSrcElts = 1352 CI->getArgOperand(0)->getType()->getVectorNumElements(); 1353 unsigned NumDstElts = CI->getType()->getVectorNumElements(); 1354 1355 SmallVector<uint32_t, 8> ShuffleMask(NumDstElts); 1356 for (unsigned i = 0; i != NumDstElts; ++i) 1357 ShuffleMask[i] = i % NumSrcElts; 1358 1359 Rep = Builder.CreateShuffleVector(CI->getArgOperand(0), 1360 CI->getArgOperand(0), 1361 ShuffleMask); 1362 Rep = EmitX86Select(Builder, CI->getArgOperand(2), Rep, 1363 CI->getArgOperand(1)); 1364 } else if (IsX86 && (Name.startswith("avx2.pbroadcast") || 1365 Name.startswith("avx2.vbroadcast") || 1366 Name.startswith("avx512.pbroadcast") || 1367 Name.startswith("avx512.mask.broadcast.s"))) { 1368 // Replace vp?broadcasts with a vector shuffle. 1369 Value *Op = CI->getArgOperand(0); 1370 unsigned NumElts = CI->getType()->getVectorNumElements(); 1371 Type *MaskTy = VectorType::get(Type::getInt32Ty(C), NumElts); 1372 Rep = Builder.CreateShuffleVector(Op, UndefValue::get(Op->getType()), 1373 Constant::getNullValue(MaskTy)); 1374 1375 if (CI->getNumArgOperands() == 3) 1376 Rep = EmitX86Select(Builder, CI->getArgOperand(2), Rep, 1377 CI->getArgOperand(1)); 1378 } else if (IsX86 && Name.startswith("avx512.mask.palignr.")) { 1379 Rep = UpgradeX86ALIGNIntrinsics(Builder, CI->getArgOperand(0), 1380 CI->getArgOperand(1), 1381 CI->getArgOperand(2), 1382 CI->getArgOperand(3), 1383 CI->getArgOperand(4), 1384 false); 1385 } else if (IsX86 && Name.startswith("avx512.mask.valign.")) { 1386 Rep = UpgradeX86ALIGNIntrinsics(Builder, CI->getArgOperand(0), 1387 CI->getArgOperand(1), 1388 CI->getArgOperand(2), 1389 CI->getArgOperand(3), 1390 CI->getArgOperand(4), 1391 true); 1392 } else if (IsX86 && (Name == "sse2.psll.dq" || 1393 Name == "avx2.psll.dq")) { 1394 // 128/256-bit shift left specified in bits. 1395 unsigned Shift = cast<ConstantInt>(CI->getArgOperand(1))->getZExtValue(); 1396 Rep = UpgradeX86PSLLDQIntrinsics(Builder, CI->getArgOperand(0), 1397 Shift / 8); // Shift is in bits. 1398 } else if (IsX86 && (Name == "sse2.psrl.dq" || 1399 Name == "avx2.psrl.dq")) { 1400 // 128/256-bit shift right specified in bits. 1401 unsigned Shift = cast<ConstantInt>(CI->getArgOperand(1))->getZExtValue(); 1402 Rep = UpgradeX86PSRLDQIntrinsics(Builder, CI->getArgOperand(0), 1403 Shift / 8); // Shift is in bits. 1404 } else if (IsX86 && (Name == "sse2.psll.dq.bs" || 1405 Name == "avx2.psll.dq.bs" || 1406 Name == "avx512.psll.dq.512")) { 1407 // 128/256/512-bit shift left specified in bytes. 1408 unsigned Shift = cast<ConstantInt>(CI->getArgOperand(1))->getZExtValue(); 1409 Rep = UpgradeX86PSLLDQIntrinsics(Builder, CI->getArgOperand(0), Shift); 1410 } else if (IsX86 && (Name == "sse2.psrl.dq.bs" || 1411 Name == "avx2.psrl.dq.bs" || 1412 Name == "avx512.psrl.dq.512")) { 1413 // 128/256/512-bit shift right specified in bytes. 1414 unsigned Shift = cast<ConstantInt>(CI->getArgOperand(1))->getZExtValue(); 1415 Rep = UpgradeX86PSRLDQIntrinsics(Builder, CI->getArgOperand(0), Shift); 1416 } else if (IsX86 && (Name == "sse41.pblendw" || 1417 Name.startswith("sse41.blendp") || 1418 Name.startswith("avx.blend.p") || 1419 Name == "avx2.pblendw" || 1420 Name.startswith("avx2.pblendd."))) { 1421 Value *Op0 = CI->getArgOperand(0); 1422 Value *Op1 = CI->getArgOperand(1); 1423 unsigned Imm = cast <ConstantInt>(CI->getArgOperand(2))->getZExtValue(); 1424 VectorType *VecTy = cast<VectorType>(CI->getType()); 1425 unsigned NumElts = VecTy->getNumElements(); 1426 1427 SmallVector<uint32_t, 16> Idxs(NumElts); 1428 for (unsigned i = 0; i != NumElts; ++i) 1429 Idxs[i] = ((Imm >> (i%8)) & 1) ? i + NumElts : i; 1430 1431 Rep = Builder.CreateShuffleVector(Op0, Op1, Idxs); 1432 } else if (IsX86 && (Name.startswith("avx.vinsertf128.") || 1433 Name == "avx2.vinserti128" || 1434 Name.startswith("avx512.mask.insert"))) { 1435 Value *Op0 = CI->getArgOperand(0); 1436 Value *Op1 = CI->getArgOperand(1); 1437 unsigned Imm = cast<ConstantInt>(CI->getArgOperand(2))->getZExtValue(); 1438 unsigned DstNumElts = CI->getType()->getVectorNumElements(); 1439 unsigned SrcNumElts = Op1->getType()->getVectorNumElements(); 1440 unsigned Scale = DstNumElts / SrcNumElts; 1441 1442 // Mask off the high bits of the immediate value; hardware ignores those. 1443 Imm = Imm % Scale; 1444 1445 // Extend the second operand into a vector the size of the destination. 1446 Value *UndefV = UndefValue::get(Op1->getType()); 1447 SmallVector<uint32_t, 8> Idxs(DstNumElts); 1448 for (unsigned i = 0; i != SrcNumElts; ++i) 1449 Idxs[i] = i; 1450 for (unsigned i = SrcNumElts; i != DstNumElts; ++i) 1451 Idxs[i] = SrcNumElts; 1452 Rep = Builder.CreateShuffleVector(Op1, UndefV, Idxs); 1453 1454 // Insert the second operand into the first operand. 1455 1456 // Note that there is no guarantee that instruction lowering will actually 1457 // produce a vinsertf128 instruction for the created shuffles. In 1458 // particular, the 0 immediate case involves no lane changes, so it can 1459 // be handled as a blend. 1460 1461 // Example of shuffle mask for 32-bit elements: 1462 // Imm = 1 <i32 0, i32 1, i32 2, i32 3, i32 8, i32 9, i32 10, i32 11> 1463 // Imm = 0 <i32 8, i32 9, i32 10, i32 11, i32 4, i32 5, i32 6, i32 7 > 1464 1465 // First fill with identify mask. 1466 for (unsigned i = 0; i != DstNumElts; ++i) 1467 Idxs[i] = i; 1468 // Then replace the elements where we need to insert. 1469 for (unsigned i = 0; i != SrcNumElts; ++i) 1470 Idxs[i + Imm * SrcNumElts] = i + DstNumElts; 1471 Rep = Builder.CreateShuffleVector(Op0, Rep, Idxs); 1472 1473 // If the intrinsic has a mask operand, handle that. 1474 if (CI->getNumArgOperands() == 5) 1475 Rep = EmitX86Select(Builder, CI->getArgOperand(4), Rep, 1476 CI->getArgOperand(3)); 1477 } else if (IsX86 && (Name.startswith("avx.vextractf128.") || 1478 Name == "avx2.vextracti128" || 1479 Name.startswith("avx512.mask.vextract"))) { 1480 Value *Op0 = CI->getArgOperand(0); 1481 unsigned Imm = cast<ConstantInt>(CI->getArgOperand(1))->getZExtValue(); 1482 unsigned DstNumElts = CI->getType()->getVectorNumElements(); 1483 unsigned SrcNumElts = Op0->getType()->getVectorNumElements(); 1484 unsigned Scale = SrcNumElts / DstNumElts; 1485 1486 // Mask off the high bits of the immediate value; hardware ignores those. 1487 Imm = Imm % Scale; 1488 1489 // Get indexes for the subvector of the input vector. 1490 SmallVector<uint32_t, 8> Idxs(DstNumElts); 1491 for (unsigned i = 0; i != DstNumElts; ++i) { 1492 Idxs[i] = i + (Imm * DstNumElts); 1493 } 1494 Rep = Builder.CreateShuffleVector(Op0, Op0, Idxs); 1495 1496 // If the intrinsic has a mask operand, handle that. 1497 if (CI->getNumArgOperands() == 4) 1498 Rep = EmitX86Select(Builder, CI->getArgOperand(3), Rep, 1499 CI->getArgOperand(2)); 1500 } else if (!IsX86 && Name == "stackprotectorcheck") { 1501 Rep = nullptr; 1502 } else if (IsX86 && (Name.startswith("avx512.mask.perm.df.") || 1503 Name.startswith("avx512.mask.perm.di."))) { 1504 Value *Op0 = CI->getArgOperand(0); 1505 unsigned Imm = cast<ConstantInt>(CI->getArgOperand(1))->getZExtValue(); 1506 VectorType *VecTy = cast<VectorType>(CI->getType()); 1507 unsigned NumElts = VecTy->getNumElements(); 1508 1509 SmallVector<uint32_t, 8> Idxs(NumElts); 1510 for (unsigned i = 0; i != NumElts; ++i) 1511 Idxs[i] = (i & ~0x3) + ((Imm >> (2 * (i & 0x3))) & 3); 1512 1513 Rep = Builder.CreateShuffleVector(Op0, Op0, Idxs); 1514 1515 if (CI->getNumArgOperands() == 4) 1516 Rep = EmitX86Select(Builder, CI->getArgOperand(3), Rep, 1517 CI->getArgOperand(2)); 1518 } else if (IsX86 && (Name.startswith("avx.vperm2f128.") || 1519 Name == "avx2.vperm2i128")) { 1520 // The immediate permute control byte looks like this: 1521 // [1:0] - select 128 bits from sources for low half of destination 1522 // [2] - ignore 1523 // [3] - zero low half of destination 1524 // [5:4] - select 128 bits from sources for high half of destination 1525 // [6] - ignore 1526 // [7] - zero high half of destination 1527 1528 uint8_t Imm = cast<ConstantInt>(CI->getArgOperand(2))->getZExtValue(); 1529 1530 unsigned NumElts = CI->getType()->getVectorNumElements(); 1531 unsigned HalfSize = NumElts / 2; 1532 SmallVector<uint32_t, 8> ShuffleMask(NumElts); 1533 1534 // Determine which operand(s) are actually in use for this instruction. 1535 Value *V0 = (Imm & 0x02) ? CI->getArgOperand(1) : CI->getArgOperand(0); 1536 Value *V1 = (Imm & 0x20) ? CI->getArgOperand(1) : CI->getArgOperand(0); 1537 1538 // If needed, replace operands based on zero mask. 1539 V0 = (Imm & 0x08) ? ConstantAggregateZero::get(CI->getType()) : V0; 1540 V1 = (Imm & 0x80) ? ConstantAggregateZero::get(CI->getType()) : V1; 1541 1542 // Permute low half of result. 1543 unsigned StartIndex = (Imm & 0x01) ? HalfSize : 0; 1544 for (unsigned i = 0; i < HalfSize; ++i) 1545 ShuffleMask[i] = StartIndex + i; 1546 1547 // Permute high half of result. 1548 StartIndex = (Imm & 0x10) ? HalfSize : 0; 1549 for (unsigned i = 0; i < HalfSize; ++i) 1550 ShuffleMask[i + HalfSize] = NumElts + StartIndex + i; 1551 1552 Rep = Builder.CreateShuffleVector(V0, V1, ShuffleMask); 1553 1554 } else if (IsX86 && (Name.startswith("avx.vpermil.") || 1555 Name == "sse2.pshuf.d" || 1556 Name.startswith("avx512.mask.vpermil.p") || 1557 Name.startswith("avx512.mask.pshuf.d."))) { 1558 Value *Op0 = CI->getArgOperand(0); 1559 unsigned Imm = cast<ConstantInt>(CI->getArgOperand(1))->getZExtValue(); 1560 VectorType *VecTy = cast<VectorType>(CI->getType()); 1561 unsigned NumElts = VecTy->getNumElements(); 1562 // Calculate the size of each index in the immediate. 1563 unsigned IdxSize = 64 / VecTy->getScalarSizeInBits(); 1564 unsigned IdxMask = ((1 << IdxSize) - 1); 1565 1566 SmallVector<uint32_t, 8> Idxs(NumElts); 1567 // Lookup the bits for this element, wrapping around the immediate every 1568 // 8-bits. Elements are grouped into sets of 2 or 4 elements so we need 1569 // to offset by the first index of each group. 1570 for (unsigned i = 0; i != NumElts; ++i) 1571 Idxs[i] = ((Imm >> ((i * IdxSize) % 8)) & IdxMask) | (i & ~IdxMask); 1572 1573 Rep = Builder.CreateShuffleVector(Op0, Op0, Idxs); 1574 1575 if (CI->getNumArgOperands() == 4) 1576 Rep = EmitX86Select(Builder, CI->getArgOperand(3), Rep, 1577 CI->getArgOperand(2)); 1578 } else if (IsX86 && (Name == "sse2.pshufl.w" || 1579 Name.startswith("avx512.mask.pshufl.w."))) { 1580 Value *Op0 = CI->getArgOperand(0); 1581 unsigned Imm = cast<ConstantInt>(CI->getArgOperand(1))->getZExtValue(); 1582 unsigned NumElts = CI->getType()->getVectorNumElements(); 1583 1584 SmallVector<uint32_t, 16> Idxs(NumElts); 1585 for (unsigned l = 0; l != NumElts; l += 8) { 1586 for (unsigned i = 0; i != 4; ++i) 1587 Idxs[i + l] = ((Imm >> (2 * i)) & 0x3) + l; 1588 for (unsigned i = 4; i != 8; ++i) 1589 Idxs[i + l] = i + l; 1590 } 1591 1592 Rep = Builder.CreateShuffleVector(Op0, Op0, Idxs); 1593 1594 if (CI->getNumArgOperands() == 4) 1595 Rep = EmitX86Select(Builder, CI->getArgOperand(3), Rep, 1596 CI->getArgOperand(2)); 1597 } else if (IsX86 && (Name == "sse2.pshufh.w" || 1598 Name.startswith("avx512.mask.pshufh.w."))) { 1599 Value *Op0 = CI->getArgOperand(0); 1600 unsigned Imm = cast<ConstantInt>(CI->getArgOperand(1))->getZExtValue(); 1601 unsigned NumElts = CI->getType()->getVectorNumElements(); 1602 1603 SmallVector<uint32_t, 16> Idxs(NumElts); 1604 for (unsigned l = 0; l != NumElts; l += 8) { 1605 for (unsigned i = 0; i != 4; ++i) 1606 Idxs[i + l] = i + l; 1607 for (unsigned i = 0; i != 4; ++i) 1608 Idxs[i + l + 4] = ((Imm >> (2 * i)) & 0x3) + 4 + l; 1609 } 1610 1611 Rep = Builder.CreateShuffleVector(Op0, Op0, Idxs); 1612 1613 if (CI->getNumArgOperands() == 4) 1614 Rep = EmitX86Select(Builder, CI->getArgOperand(3), Rep, 1615 CI->getArgOperand(2)); 1616 } else if (IsX86 && Name.startswith("avx512.mask.shuf.p")) { 1617 Value *Op0 = CI->getArgOperand(0); 1618 Value *Op1 = CI->getArgOperand(1); 1619 unsigned Imm = cast<ConstantInt>(CI->getArgOperand(2))->getZExtValue(); 1620 unsigned NumElts = CI->getType()->getVectorNumElements(); 1621 1622 unsigned NumLaneElts = 128/CI->getType()->getScalarSizeInBits(); 1623 unsigned HalfLaneElts = NumLaneElts / 2; 1624 1625 SmallVector<uint32_t, 16> Idxs(NumElts); 1626 for (unsigned i = 0; i != NumElts; ++i) { 1627 // Base index is the starting element of the lane. 1628 Idxs[i] = i - (i % NumLaneElts); 1629 // If we are half way through the lane switch to the other source. 1630 if ((i % NumLaneElts) >= HalfLaneElts) 1631 Idxs[i] += NumElts; 1632 // Now select the specific element. By adding HalfLaneElts bits from 1633 // the immediate. Wrapping around the immediate every 8-bits. 1634 Idxs[i] += (Imm >> ((i * HalfLaneElts) % 8)) & ((1 << HalfLaneElts) - 1); 1635 } 1636 1637 Rep = Builder.CreateShuffleVector(Op0, Op1, Idxs); 1638 1639 Rep = EmitX86Select(Builder, CI->getArgOperand(4), Rep, 1640 CI->getArgOperand(3)); 1641 } else if (IsX86 && (Name.startswith("avx512.mask.movddup") || 1642 Name.startswith("avx512.mask.movshdup") || 1643 Name.startswith("avx512.mask.movsldup"))) { 1644 Value *Op0 = CI->getArgOperand(0); 1645 unsigned NumElts = CI->getType()->getVectorNumElements(); 1646 unsigned NumLaneElts = 128/CI->getType()->getScalarSizeInBits(); 1647 1648 unsigned Offset = 0; 1649 if (Name.startswith("avx512.mask.movshdup.")) 1650 Offset = 1; 1651 1652 SmallVector<uint32_t, 16> Idxs(NumElts); 1653 for (unsigned l = 0; l != NumElts; l += NumLaneElts) 1654 for (unsigned i = 0; i != NumLaneElts; i += 2) { 1655 Idxs[i + l + 0] = i + l + Offset; 1656 Idxs[i + l + 1] = i + l + Offset; 1657 } 1658 1659 Rep = Builder.CreateShuffleVector(Op0, Op0, Idxs); 1660 1661 Rep = EmitX86Select(Builder, CI->getArgOperand(2), Rep, 1662 CI->getArgOperand(1)); 1663 } else if (IsX86 && (Name.startswith("avx512.mask.punpckl") || 1664 Name.startswith("avx512.mask.unpckl."))) { 1665 Value *Op0 = CI->getArgOperand(0); 1666 Value *Op1 = CI->getArgOperand(1); 1667 int NumElts = CI->getType()->getVectorNumElements(); 1668 int NumLaneElts = 128/CI->getType()->getScalarSizeInBits(); 1669 1670 SmallVector<uint32_t, 64> Idxs(NumElts); 1671 for (int l = 0; l != NumElts; l += NumLaneElts) 1672 for (int i = 0; i != NumLaneElts; ++i) 1673 Idxs[i + l] = l + (i / 2) + NumElts * (i % 2); 1674 1675 Rep = Builder.CreateShuffleVector(Op0, Op1, Idxs); 1676 1677 Rep = EmitX86Select(Builder, CI->getArgOperand(3), Rep, 1678 CI->getArgOperand(2)); 1679 } else if (IsX86 && (Name.startswith("avx512.mask.punpckh") || 1680 Name.startswith("avx512.mask.unpckh."))) { 1681 Value *Op0 = CI->getArgOperand(0); 1682 Value *Op1 = CI->getArgOperand(1); 1683 int NumElts = CI->getType()->getVectorNumElements(); 1684 int NumLaneElts = 128/CI->getType()->getScalarSizeInBits(); 1685 1686 SmallVector<uint32_t, 64> Idxs(NumElts); 1687 for (int l = 0; l != NumElts; l += NumLaneElts) 1688 for (int i = 0; i != NumLaneElts; ++i) 1689 Idxs[i + l] = (NumLaneElts / 2) + l + (i / 2) + NumElts * (i % 2); 1690 1691 Rep = Builder.CreateShuffleVector(Op0, Op1, Idxs); 1692 1693 Rep = EmitX86Select(Builder, CI->getArgOperand(3), Rep, 1694 CI->getArgOperand(2)); 1695 } else if (IsX86 && Name.startswith("avx512.mask.pand.")) { 1696 Rep = Builder.CreateAnd(CI->getArgOperand(0), CI->getArgOperand(1)); 1697 Rep = EmitX86Select(Builder, CI->getArgOperand(3), Rep, 1698 CI->getArgOperand(2)); 1699 } else if (IsX86 && Name.startswith("avx512.mask.pandn.")) { 1700 Rep = Builder.CreateAnd(Builder.CreateNot(CI->getArgOperand(0)), 1701 CI->getArgOperand(1)); 1702 Rep = EmitX86Select(Builder, CI->getArgOperand(3), Rep, 1703 CI->getArgOperand(2)); 1704 } else if (IsX86 && Name.startswith("avx512.mask.por.")) { 1705 Rep = Builder.CreateOr(CI->getArgOperand(0), CI->getArgOperand(1)); 1706 Rep = EmitX86Select(Builder, CI->getArgOperand(3), Rep, 1707 CI->getArgOperand(2)); 1708 } else if (IsX86 && Name.startswith("avx512.mask.pxor.")) { 1709 Rep = Builder.CreateXor(CI->getArgOperand(0), CI->getArgOperand(1)); 1710 Rep = EmitX86Select(Builder, CI->getArgOperand(3), Rep, 1711 CI->getArgOperand(2)); 1712 } else if (IsX86 && Name.startswith("avx512.mask.and.")) { 1713 VectorType *FTy = cast<VectorType>(CI->getType()); 1714 VectorType *ITy = VectorType::getInteger(FTy); 1715 Rep = Builder.CreateAnd(Builder.CreateBitCast(CI->getArgOperand(0), ITy), 1716 Builder.CreateBitCast(CI->getArgOperand(1), ITy)); 1717 Rep = Builder.CreateBitCast(Rep, FTy); 1718 Rep = EmitX86Select(Builder, CI->getArgOperand(3), Rep, 1719 CI->getArgOperand(2)); 1720 } else if (IsX86 && Name.startswith("avx512.mask.andn.")) { 1721 VectorType *FTy = cast<VectorType>(CI->getType()); 1722 VectorType *ITy = VectorType::getInteger(FTy); 1723 Rep = Builder.CreateNot(Builder.CreateBitCast(CI->getArgOperand(0), ITy)); 1724 Rep = Builder.CreateAnd(Rep, 1725 Builder.CreateBitCast(CI->getArgOperand(1), ITy)); 1726 Rep = Builder.CreateBitCast(Rep, FTy); 1727 Rep = EmitX86Select(Builder, CI->getArgOperand(3), Rep, 1728 CI->getArgOperand(2)); 1729 } else if (IsX86 && Name.startswith("avx512.mask.or.")) { 1730 VectorType *FTy = cast<VectorType>(CI->getType()); 1731 VectorType *ITy = VectorType::getInteger(FTy); 1732 Rep = Builder.CreateOr(Builder.CreateBitCast(CI->getArgOperand(0), ITy), 1733 Builder.CreateBitCast(CI->getArgOperand(1), ITy)); 1734 Rep = Builder.CreateBitCast(Rep, FTy); 1735 Rep = EmitX86Select(Builder, CI->getArgOperand(3), Rep, 1736 CI->getArgOperand(2)); 1737 } else if (IsX86 && Name.startswith("avx512.mask.xor.")) { 1738 VectorType *FTy = cast<VectorType>(CI->getType()); 1739 VectorType *ITy = VectorType::getInteger(FTy); 1740 Rep = Builder.CreateXor(Builder.CreateBitCast(CI->getArgOperand(0), ITy), 1741 Builder.CreateBitCast(CI->getArgOperand(1), ITy)); 1742 Rep = Builder.CreateBitCast(Rep, FTy); 1743 Rep = EmitX86Select(Builder, CI->getArgOperand(3), Rep, 1744 CI->getArgOperand(2)); 1745 } else if (IsX86 && Name.startswith("avx512.mask.padd.")) { 1746 Rep = Builder.CreateAdd(CI->getArgOperand(0), CI->getArgOperand(1)); 1747 Rep = EmitX86Select(Builder, CI->getArgOperand(3), Rep, 1748 CI->getArgOperand(2)); 1749 } else if (IsX86 && Name.startswith("avx512.mask.psub.")) { 1750 Rep = Builder.CreateSub(CI->getArgOperand(0), CI->getArgOperand(1)); 1751 Rep = EmitX86Select(Builder, CI->getArgOperand(3), Rep, 1752 CI->getArgOperand(2)); 1753 } else if (IsX86 && Name.startswith("avx512.mask.pmull.")) { 1754 Rep = Builder.CreateMul(CI->getArgOperand(0), CI->getArgOperand(1)); 1755 Rep = EmitX86Select(Builder, CI->getArgOperand(3), Rep, 1756 CI->getArgOperand(2)); 1757 } else if (IsX86 && (Name.startswith("avx512.mask.add.p"))) { 1758 Rep = Builder.CreateFAdd(CI->getArgOperand(0), CI->getArgOperand(1)); 1759 Rep = EmitX86Select(Builder, CI->getArgOperand(3), Rep, 1760 CI->getArgOperand(2)); 1761 } else if (IsX86 && Name.startswith("avx512.mask.div.p")) { 1762 Rep = Builder.CreateFDiv(CI->getArgOperand(0), CI->getArgOperand(1)); 1763 Rep = EmitX86Select(Builder, CI->getArgOperand(3), Rep, 1764 CI->getArgOperand(2)); 1765 } else if (IsX86 && Name.startswith("avx512.mask.mul.p")) { 1766 Rep = Builder.CreateFMul(CI->getArgOperand(0), CI->getArgOperand(1)); 1767 Rep = EmitX86Select(Builder, CI->getArgOperand(3), Rep, 1768 CI->getArgOperand(2)); 1769 } else if (IsX86 && Name.startswith("avx512.mask.sub.p")) { 1770 Rep = Builder.CreateFSub(CI->getArgOperand(0), CI->getArgOperand(1)); 1771 Rep = EmitX86Select(Builder, CI->getArgOperand(3), Rep, 1772 CI->getArgOperand(2)); 1773 } else if (IsX86 && Name.startswith("avx512.mask.lzcnt.")) { 1774 Rep = Builder.CreateCall(Intrinsic::getDeclaration(F->getParent(), 1775 Intrinsic::ctlz, 1776 CI->getType()), 1777 { CI->getArgOperand(0), Builder.getInt1(false) }); 1778 Rep = EmitX86Select(Builder, CI->getArgOperand(2), Rep, 1779 CI->getArgOperand(1)); 1780 } else if (IsX86 && (Name.startswith("avx512.mask.max.p") || 1781 Name.startswith("avx512.mask.min.p"))) { 1782 bool IsMin = Name[13] == 'i'; 1783 VectorType *VecTy = cast<VectorType>(CI->getType()); 1784 unsigned VecWidth = VecTy->getPrimitiveSizeInBits(); 1785 unsigned EltWidth = VecTy->getScalarSizeInBits(); 1786 Intrinsic::ID IID; 1787 if (!IsMin && VecWidth == 128 && EltWidth == 32) 1788 IID = Intrinsic::x86_sse_max_ps; 1789 else if (!IsMin && VecWidth == 128 && EltWidth == 64) 1790 IID = Intrinsic::x86_sse2_max_pd; 1791 else if (!IsMin && VecWidth == 256 && EltWidth == 32) 1792 IID = Intrinsic::x86_avx_max_ps_256; 1793 else if (!IsMin && VecWidth == 256 && EltWidth == 64) 1794 IID = Intrinsic::x86_avx_max_pd_256; 1795 else if (IsMin && VecWidth == 128 && EltWidth == 32) 1796 IID = Intrinsic::x86_sse_min_ps; 1797 else if (IsMin && VecWidth == 128 && EltWidth == 64) 1798 IID = Intrinsic::x86_sse2_min_pd; 1799 else if (IsMin && VecWidth == 256 && EltWidth == 32) 1800 IID = Intrinsic::x86_avx_min_ps_256; 1801 else if (IsMin && VecWidth == 256 && EltWidth == 64) 1802 IID = Intrinsic::x86_avx_min_pd_256; 1803 else 1804 llvm_unreachable("Unexpected intrinsic"); 1805 1806 Rep = Builder.CreateCall(Intrinsic::getDeclaration(F->getParent(), IID), 1807 { CI->getArgOperand(0), CI->getArgOperand(1) }); 1808 Rep = EmitX86Select(Builder, CI->getArgOperand(3), Rep, 1809 CI->getArgOperand(2)); 1810 } else if (IsX86 && Name.startswith("avx512.mask.pshuf.b.")) { 1811 VectorType *VecTy = cast<VectorType>(CI->getType()); 1812 Intrinsic::ID IID; 1813 if (VecTy->getPrimitiveSizeInBits() == 128) 1814 IID = Intrinsic::x86_ssse3_pshuf_b_128; 1815 else if (VecTy->getPrimitiveSizeInBits() == 256) 1816 IID = Intrinsic::x86_avx2_pshuf_b; 1817 else if (VecTy->getPrimitiveSizeInBits() == 512) 1818 IID = Intrinsic::x86_avx512_pshuf_b_512; 1819 else 1820 llvm_unreachable("Unexpected intrinsic"); 1821 1822 Rep = Builder.CreateCall(Intrinsic::getDeclaration(F->getParent(), IID), 1823 { CI->getArgOperand(0), CI->getArgOperand(1) }); 1824 Rep = EmitX86Select(Builder, CI->getArgOperand(3), Rep, 1825 CI->getArgOperand(2)); 1826 } else if (IsX86 && (Name.startswith("avx512.mask.pmul.dq.") || 1827 Name.startswith("avx512.mask.pmulu.dq."))) { 1828 bool IsUnsigned = Name[16] == 'u'; 1829 VectorType *VecTy = cast<VectorType>(CI->getType()); 1830 Intrinsic::ID IID; 1831 if (!IsUnsigned && VecTy->getPrimitiveSizeInBits() == 128) 1832 IID = Intrinsic::x86_sse41_pmuldq; 1833 else if (!IsUnsigned && VecTy->getPrimitiveSizeInBits() == 256) 1834 IID = Intrinsic::x86_avx2_pmul_dq; 1835 else if (!IsUnsigned && VecTy->getPrimitiveSizeInBits() == 512) 1836 IID = Intrinsic::x86_avx512_pmul_dq_512; 1837 else if (IsUnsigned && VecTy->getPrimitiveSizeInBits() == 128) 1838 IID = Intrinsic::x86_sse2_pmulu_dq; 1839 else if (IsUnsigned && VecTy->getPrimitiveSizeInBits() == 256) 1840 IID = Intrinsic::x86_avx2_pmulu_dq; 1841 else if (IsUnsigned && VecTy->getPrimitiveSizeInBits() == 512) 1842 IID = Intrinsic::x86_avx512_pmulu_dq_512; 1843 else 1844 llvm_unreachable("Unexpected intrinsic"); 1845 1846 Rep = Builder.CreateCall(Intrinsic::getDeclaration(F->getParent(), IID), 1847 { CI->getArgOperand(0), CI->getArgOperand(1) }); 1848 Rep = EmitX86Select(Builder, CI->getArgOperand(3), Rep, 1849 CI->getArgOperand(2)); 1850 } else if (IsX86 && Name.startswith("avx512.mask.pack")) { 1851 bool IsUnsigned = Name[16] == 'u'; 1852 bool IsDW = Name[18] == 'd'; 1853 VectorType *VecTy = cast<VectorType>(CI->getType()); 1854 Intrinsic::ID IID; 1855 if (!IsUnsigned && !IsDW && VecTy->getPrimitiveSizeInBits() == 128) 1856 IID = Intrinsic::x86_sse2_packsswb_128; 1857 else if (!IsUnsigned && !IsDW && VecTy->getPrimitiveSizeInBits() == 256) 1858 IID = Intrinsic::x86_avx2_packsswb; 1859 else if (!IsUnsigned && !IsDW && VecTy->getPrimitiveSizeInBits() == 512) 1860 IID = Intrinsic::x86_avx512_packsswb_512; 1861 else if (!IsUnsigned && IsDW && VecTy->getPrimitiveSizeInBits() == 128) 1862 IID = Intrinsic::x86_sse2_packssdw_128; 1863 else if (!IsUnsigned && IsDW && VecTy->getPrimitiveSizeInBits() == 256) 1864 IID = Intrinsic::x86_avx2_packssdw; 1865 else if (!IsUnsigned && IsDW && VecTy->getPrimitiveSizeInBits() == 512) 1866 IID = Intrinsic::x86_avx512_packssdw_512; 1867 else if (IsUnsigned && !IsDW && VecTy->getPrimitiveSizeInBits() == 128) 1868 IID = Intrinsic::x86_sse2_packuswb_128; 1869 else if (IsUnsigned && !IsDW && VecTy->getPrimitiveSizeInBits() == 256) 1870 IID = Intrinsic::x86_avx2_packuswb; 1871 else if (IsUnsigned && !IsDW && VecTy->getPrimitiveSizeInBits() == 512) 1872 IID = Intrinsic::x86_avx512_packuswb_512; 1873 else if (IsUnsigned && IsDW && VecTy->getPrimitiveSizeInBits() == 128) 1874 IID = Intrinsic::x86_sse41_packusdw; 1875 else if (IsUnsigned && IsDW && VecTy->getPrimitiveSizeInBits() == 256) 1876 IID = Intrinsic::x86_avx2_packusdw; 1877 else if (IsUnsigned && IsDW && VecTy->getPrimitiveSizeInBits() == 512) 1878 IID = Intrinsic::x86_avx512_packusdw_512; 1879 else 1880 llvm_unreachable("Unexpected intrinsic"); 1881 1882 Rep = Builder.CreateCall(Intrinsic::getDeclaration(F->getParent(), IID), 1883 { CI->getArgOperand(0), CI->getArgOperand(1) }); 1884 Rep = EmitX86Select(Builder, CI->getArgOperand(3), Rep, 1885 CI->getArgOperand(2)); 1886 } else if (IsX86 && Name.startswith("avx512.mask.psll")) { 1887 bool IsImmediate = Name[16] == 'i' || 1888 (Name.size() > 18 && Name[18] == 'i'); 1889 bool IsVariable = Name[16] == 'v'; 1890 char Size = Name[16] == '.' ? Name[17] : 1891 Name[17] == '.' ? Name[18] : 1892 Name[18] == '.' ? Name[19] : 1893 Name[20]; 1894 1895 Intrinsic::ID IID; 1896 if (IsVariable && Name[17] != '.') { 1897 if (Size == 'd' && Name[17] == '2') // avx512.mask.psllv2.di 1898 IID = Intrinsic::x86_avx2_psllv_q; 1899 else if (Size == 'd' && Name[17] == '4') // avx512.mask.psllv4.di 1900 IID = Intrinsic::x86_avx2_psllv_q_256; 1901 else if (Size == 's' && Name[17] == '4') // avx512.mask.psllv4.si 1902 IID = Intrinsic::x86_avx2_psllv_d; 1903 else if (Size == 's' && Name[17] == '8') // avx512.mask.psllv8.si 1904 IID = Intrinsic::x86_avx2_psllv_d_256; 1905 else if (Size == 'h' && Name[17] == '8') // avx512.mask.psllv8.hi 1906 IID = Intrinsic::x86_avx512_psllv_w_128; 1907 else if (Size == 'h' && Name[17] == '1') // avx512.mask.psllv16.hi 1908 IID = Intrinsic::x86_avx512_psllv_w_256; 1909 else if (Name[17] == '3' && Name[18] == '2') // avx512.mask.psllv32hi 1910 IID = Intrinsic::x86_avx512_psllv_w_512; 1911 else 1912 llvm_unreachable("Unexpected size"); 1913 } else if (Name.endswith(".128")) { 1914 if (Size == 'd') // avx512.mask.psll.d.128, avx512.mask.psll.di.128 1915 IID = IsImmediate ? Intrinsic::x86_sse2_pslli_d 1916 : Intrinsic::x86_sse2_psll_d; 1917 else if (Size == 'q') // avx512.mask.psll.q.128, avx512.mask.psll.qi.128 1918 IID = IsImmediate ? Intrinsic::x86_sse2_pslli_q 1919 : Intrinsic::x86_sse2_psll_q; 1920 else if (Size == 'w') // avx512.mask.psll.w.128, avx512.mask.psll.wi.128 1921 IID = IsImmediate ? Intrinsic::x86_sse2_pslli_w 1922 : Intrinsic::x86_sse2_psll_w; 1923 else 1924 llvm_unreachable("Unexpected size"); 1925 } else if (Name.endswith(".256")) { 1926 if (Size == 'd') // avx512.mask.psll.d.256, avx512.mask.psll.di.256 1927 IID = IsImmediate ? Intrinsic::x86_avx2_pslli_d 1928 : Intrinsic::x86_avx2_psll_d; 1929 else if (Size == 'q') // avx512.mask.psll.q.256, avx512.mask.psll.qi.256 1930 IID = IsImmediate ? Intrinsic::x86_avx2_pslli_q 1931 : Intrinsic::x86_avx2_psll_q; 1932 else if (Size == 'w') // avx512.mask.psll.w.256, avx512.mask.psll.wi.256 1933 IID = IsImmediate ? Intrinsic::x86_avx2_pslli_w 1934 : Intrinsic::x86_avx2_psll_w; 1935 else 1936 llvm_unreachable("Unexpected size"); 1937 } else { 1938 if (Size == 'd') // psll.di.512, pslli.d, psll.d, psllv.d.512 1939 IID = IsImmediate ? Intrinsic::x86_avx512_pslli_d_512 : 1940 IsVariable ? Intrinsic::x86_avx512_psllv_d_512 : 1941 Intrinsic::x86_avx512_psll_d_512; 1942 else if (Size == 'q') // psll.qi.512, pslli.q, psll.q, psllv.q.512 1943 IID = IsImmediate ? Intrinsic::x86_avx512_pslli_q_512 : 1944 IsVariable ? Intrinsic::x86_avx512_psllv_q_512 : 1945 Intrinsic::x86_avx512_psll_q_512; 1946 else if (Size == 'w') // psll.wi.512, pslli.w, psll.w 1947 IID = IsImmediate ? Intrinsic::x86_avx512_pslli_w_512 1948 : Intrinsic::x86_avx512_psll_w_512; 1949 else 1950 llvm_unreachable("Unexpected size"); 1951 } 1952 1953 Rep = UpgradeX86MaskedShift(Builder, *CI, IID); 1954 } else if (IsX86 && Name.startswith("avx512.mask.psrl")) { 1955 bool IsImmediate = Name[16] == 'i' || 1956 (Name.size() > 18 && Name[18] == 'i'); 1957 bool IsVariable = Name[16] == 'v'; 1958 char Size = Name[16] == '.' ? Name[17] : 1959 Name[17] == '.' ? Name[18] : 1960 Name[18] == '.' ? Name[19] : 1961 Name[20]; 1962 1963 Intrinsic::ID IID; 1964 if (IsVariable && Name[17] != '.') { 1965 if (Size == 'd' && Name[17] == '2') // avx512.mask.psrlv2.di 1966 IID = Intrinsic::x86_avx2_psrlv_q; 1967 else if (Size == 'd' && Name[17] == '4') // avx512.mask.psrlv4.di 1968 IID = Intrinsic::x86_avx2_psrlv_q_256; 1969 else if (Size == 's' && Name[17] == '4') // avx512.mask.psrlv4.si 1970 IID = Intrinsic::x86_avx2_psrlv_d; 1971 else if (Size == 's' && Name[17] == '8') // avx512.mask.psrlv8.si 1972 IID = Intrinsic::x86_avx2_psrlv_d_256; 1973 else if (Size == 'h' && Name[17] == '8') // avx512.mask.psrlv8.hi 1974 IID = Intrinsic::x86_avx512_psrlv_w_128; 1975 else if (Size == 'h' && Name[17] == '1') // avx512.mask.psrlv16.hi 1976 IID = Intrinsic::x86_avx512_psrlv_w_256; 1977 else if (Name[17] == '3' && Name[18] == '2') // avx512.mask.psrlv32hi 1978 IID = Intrinsic::x86_avx512_psrlv_w_512; 1979 else 1980 llvm_unreachable("Unexpected size"); 1981 } else if (Name.endswith(".128")) { 1982 if (Size == 'd') // avx512.mask.psrl.d.128, avx512.mask.psrl.di.128 1983 IID = IsImmediate ? Intrinsic::x86_sse2_psrli_d 1984 : Intrinsic::x86_sse2_psrl_d; 1985 else if (Size == 'q') // avx512.mask.psrl.q.128, avx512.mask.psrl.qi.128 1986 IID = IsImmediate ? Intrinsic::x86_sse2_psrli_q 1987 : Intrinsic::x86_sse2_psrl_q; 1988 else if (Size == 'w') // avx512.mask.psrl.w.128, avx512.mask.psrl.wi.128 1989 IID = IsImmediate ? Intrinsic::x86_sse2_psrli_w 1990 : Intrinsic::x86_sse2_psrl_w; 1991 else 1992 llvm_unreachable("Unexpected size"); 1993 } else if (Name.endswith(".256")) { 1994 if (Size == 'd') // avx512.mask.psrl.d.256, avx512.mask.psrl.di.256 1995 IID = IsImmediate ? Intrinsic::x86_avx2_psrli_d 1996 : Intrinsic::x86_avx2_psrl_d; 1997 else if (Size == 'q') // avx512.mask.psrl.q.256, avx512.mask.psrl.qi.256 1998 IID = IsImmediate ? Intrinsic::x86_avx2_psrli_q 1999 : Intrinsic::x86_avx2_psrl_q; 2000 else if (Size == 'w') // avx512.mask.psrl.w.256, avx512.mask.psrl.wi.256 2001 IID = IsImmediate ? Intrinsic::x86_avx2_psrli_w 2002 : Intrinsic::x86_avx2_psrl_w; 2003 else 2004 llvm_unreachable("Unexpected size"); 2005 } else { 2006 if (Size == 'd') // psrl.di.512, psrli.d, psrl.d, psrl.d.512 2007 IID = IsImmediate ? Intrinsic::x86_avx512_psrli_d_512 : 2008 IsVariable ? Intrinsic::x86_avx512_psrlv_d_512 : 2009 Intrinsic::x86_avx512_psrl_d_512; 2010 else if (Size == 'q') // psrl.qi.512, psrli.q, psrl.q, psrl.q.512 2011 IID = IsImmediate ? Intrinsic::x86_avx512_psrli_q_512 : 2012 IsVariable ? Intrinsic::x86_avx512_psrlv_q_512 : 2013 Intrinsic::x86_avx512_psrl_q_512; 2014 else if (Size == 'w') // psrl.wi.512, psrli.w, psrl.w) 2015 IID = IsImmediate ? Intrinsic::x86_avx512_psrli_w_512 2016 : Intrinsic::x86_avx512_psrl_w_512; 2017 else 2018 llvm_unreachable("Unexpected size"); 2019 } 2020 2021 Rep = UpgradeX86MaskedShift(Builder, *CI, IID); 2022 } else if (IsX86 && Name.startswith("avx512.mask.psra")) { 2023 bool IsImmediate = Name[16] == 'i' || 2024 (Name.size() > 18 && Name[18] == 'i'); 2025 bool IsVariable = Name[16] == 'v'; 2026 char Size = Name[16] == '.' ? Name[17] : 2027 Name[17] == '.' ? Name[18] : 2028 Name[18] == '.' ? Name[19] : 2029 Name[20]; 2030 2031 Intrinsic::ID IID; 2032 if (IsVariable && Name[17] != '.') { 2033 if (Size == 's' && Name[17] == '4') // avx512.mask.psrav4.si 2034 IID = Intrinsic::x86_avx2_psrav_d; 2035 else if (Size == 's' && Name[17] == '8') // avx512.mask.psrav8.si 2036 IID = Intrinsic::x86_avx2_psrav_d_256; 2037 else if (Size == 'h' && Name[17] == '8') // avx512.mask.psrav8.hi 2038 IID = Intrinsic::x86_avx512_psrav_w_128; 2039 else if (Size == 'h' && Name[17] == '1') // avx512.mask.psrav16.hi 2040 IID = Intrinsic::x86_avx512_psrav_w_256; 2041 else if (Name[17] == '3' && Name[18] == '2') // avx512.mask.psrav32hi 2042 IID = Intrinsic::x86_avx512_psrav_w_512; 2043 else 2044 llvm_unreachable("Unexpected size"); 2045 } else if (Name.endswith(".128")) { 2046 if (Size == 'd') // avx512.mask.psra.d.128, avx512.mask.psra.di.128 2047 IID = IsImmediate ? Intrinsic::x86_sse2_psrai_d 2048 : Intrinsic::x86_sse2_psra_d; 2049 else if (Size == 'q') // avx512.mask.psra.q.128, avx512.mask.psra.qi.128 2050 IID = IsImmediate ? Intrinsic::x86_avx512_psrai_q_128 : 2051 IsVariable ? Intrinsic::x86_avx512_psrav_q_128 : 2052 Intrinsic::x86_avx512_psra_q_128; 2053 else if (Size == 'w') // avx512.mask.psra.w.128, avx512.mask.psra.wi.128 2054 IID = IsImmediate ? Intrinsic::x86_sse2_psrai_w 2055 : Intrinsic::x86_sse2_psra_w; 2056 else 2057 llvm_unreachable("Unexpected size"); 2058 } else if (Name.endswith(".256")) { 2059 if (Size == 'd') // avx512.mask.psra.d.256, avx512.mask.psra.di.256 2060 IID = IsImmediate ? Intrinsic::x86_avx2_psrai_d 2061 : Intrinsic::x86_avx2_psra_d; 2062 else if (Size == 'q') // avx512.mask.psra.q.256, avx512.mask.psra.qi.256 2063 IID = IsImmediate ? Intrinsic::x86_avx512_psrai_q_256 : 2064 IsVariable ? Intrinsic::x86_avx512_psrav_q_256 : 2065 Intrinsic::x86_avx512_psra_q_256; 2066 else if (Size == 'w') // avx512.mask.psra.w.256, avx512.mask.psra.wi.256 2067 IID = IsImmediate ? Intrinsic::x86_avx2_psrai_w 2068 : Intrinsic::x86_avx2_psra_w; 2069 else 2070 llvm_unreachable("Unexpected size"); 2071 } else { 2072 if (Size == 'd') // psra.di.512, psrai.d, psra.d, psrav.d.512 2073 IID = IsImmediate ? Intrinsic::x86_avx512_psrai_d_512 : 2074 IsVariable ? Intrinsic::x86_avx512_psrav_d_512 : 2075 Intrinsic::x86_avx512_psra_d_512; 2076 else if (Size == 'q') // psra.qi.512, psrai.q, psra.q 2077 IID = IsImmediate ? Intrinsic::x86_avx512_psrai_q_512 : 2078 IsVariable ? Intrinsic::x86_avx512_psrav_q_512 : 2079 Intrinsic::x86_avx512_psra_q_512; 2080 else if (Size == 'w') // psra.wi.512, psrai.w, psra.w 2081 IID = IsImmediate ? Intrinsic::x86_avx512_psrai_w_512 2082 : Intrinsic::x86_avx512_psra_w_512; 2083 else 2084 llvm_unreachable("Unexpected size"); 2085 } 2086 2087 Rep = UpgradeX86MaskedShift(Builder, *CI, IID); 2088 } else if (IsX86 && Name.startswith("avx512.mask.move.s")) { 2089 Rep = upgradeMaskedMove(Builder, *CI); 2090 } else if (IsX86 && Name.startswith("avx512.cvtmask2")) { 2091 Rep = UpgradeMaskToInt(Builder, *CI); 2092 } else if (IsX86 && Name.startswith("avx512.mask.vpermilvar.")) { 2093 Intrinsic::ID IID; 2094 if (Name.endswith("ps.128")) 2095 IID = Intrinsic::x86_avx_vpermilvar_ps; 2096 else if (Name.endswith("pd.128")) 2097 IID = Intrinsic::x86_avx_vpermilvar_pd; 2098 else if (Name.endswith("ps.256")) 2099 IID = Intrinsic::x86_avx_vpermilvar_ps_256; 2100 else if (Name.endswith("pd.256")) 2101 IID = Intrinsic::x86_avx_vpermilvar_pd_256; 2102 else if (Name.endswith("ps.512")) 2103 IID = Intrinsic::x86_avx512_vpermilvar_ps_512; 2104 else if (Name.endswith("pd.512")) 2105 IID = Intrinsic::x86_avx512_vpermilvar_pd_512; 2106 else 2107 llvm_unreachable("Unexpected vpermilvar intrinsic"); 2108 2109 Function *Intrin = Intrinsic::getDeclaration(F->getParent(), IID); 2110 Rep = Builder.CreateCall(Intrin, 2111 { CI->getArgOperand(0), CI->getArgOperand(1) }); 2112 Rep = EmitX86Select(Builder, CI->getArgOperand(3), Rep, 2113 CI->getArgOperand(2)); 2114 } else if (IsX86 && Name.endswith(".movntdqa")) { 2115 Module *M = F->getParent(); 2116 MDNode *Node = MDNode::get( 2117 C, ConstantAsMetadata::get(ConstantInt::get(Type::getInt32Ty(C), 1))); 2118 2119 Value *Ptr = CI->getArgOperand(0); 2120 VectorType *VTy = cast<VectorType>(CI->getType()); 2121 2122 // Convert the type of the pointer to a pointer to the stored type. 2123 Value *BC = 2124 Builder.CreateBitCast(Ptr, PointerType::getUnqual(VTy), "cast"); 2125 LoadInst *LI = Builder.CreateAlignedLoad(BC, VTy->getBitWidth() / 8); 2126 LI->setMetadata(M->getMDKindID("nontemporal"), Node); 2127 Rep = LI; 2128 } else if (IsX86 && 2129 (Name.startswith("sse2.pavg") || Name.startswith("avx2.pavg") || 2130 Name.startswith("avx512.mask.pavg"))) { 2131 // llvm.x86.sse2.pavg.b/w, llvm.x86.avx2.pavg.b/w, 2132 // llvm.x86.avx512.mask.pavg.b/w 2133 Value *A = CI->getArgOperand(0); 2134 Value *B = CI->getArgOperand(1); 2135 VectorType *ZextType = VectorType::getExtendedElementVectorType( 2136 cast<VectorType>(A->getType())); 2137 Value *ExtendedA = Builder.CreateZExt(A, ZextType); 2138 Value *ExtendedB = Builder.CreateZExt(B, ZextType); 2139 Value *Sum = Builder.CreateAdd(ExtendedA, ExtendedB); 2140 Value *AddOne = Builder.CreateAdd(Sum, ConstantInt::get(ZextType, 1)); 2141 Value *ShiftR = Builder.CreateLShr(AddOne, ConstantInt::get(ZextType, 1)); 2142 Rep = Builder.CreateTrunc(ShiftR, A->getType()); 2143 if (CI->getNumArgOperands() > 2) { 2144 Rep = EmitX86Select(Builder, CI->getArgOperand(3), Rep, 2145 CI->getArgOperand(2)); 2146 } 2147 } else if (IsNVVM && (Name == "abs.i" || Name == "abs.ll")) { 2148 Value *Arg = CI->getArgOperand(0); 2149 Value *Neg = Builder.CreateNeg(Arg, "neg"); 2150 Value *Cmp = Builder.CreateICmpSGE( 2151 Arg, llvm::Constant::getNullValue(Arg->getType()), "abs.cond"); 2152 Rep = Builder.CreateSelect(Cmp, Arg, Neg, "abs"); 2153 } else if (IsNVVM && (Name == "max.i" || Name == "max.ll" || 2154 Name == "max.ui" || Name == "max.ull")) { 2155 Value *Arg0 = CI->getArgOperand(0); 2156 Value *Arg1 = CI->getArgOperand(1); 2157 Value *Cmp = Name.endswith(".ui") || Name.endswith(".ull") 2158 ? Builder.CreateICmpUGE(Arg0, Arg1, "max.cond") 2159 : Builder.CreateICmpSGE(Arg0, Arg1, "max.cond"); 2160 Rep = Builder.CreateSelect(Cmp, Arg0, Arg1, "max"); 2161 } else if (IsNVVM && (Name == "min.i" || Name == "min.ll" || 2162 Name == "min.ui" || Name == "min.ull")) { 2163 Value *Arg0 = CI->getArgOperand(0); 2164 Value *Arg1 = CI->getArgOperand(1); 2165 Value *Cmp = Name.endswith(".ui") || Name.endswith(".ull") 2166 ? Builder.CreateICmpULE(Arg0, Arg1, "min.cond") 2167 : Builder.CreateICmpSLE(Arg0, Arg1, "min.cond"); 2168 Rep = Builder.CreateSelect(Cmp, Arg0, Arg1, "min"); 2169 } else if (IsNVVM && Name == "clz.ll") { 2170 // llvm.nvvm.clz.ll returns an i32, but llvm.ctlz.i64 and returns an i64. 2171 Value *Arg = CI->getArgOperand(0); 2172 Value *Ctlz = Builder.CreateCall( 2173 Intrinsic::getDeclaration(F->getParent(), Intrinsic::ctlz, 2174 {Arg->getType()}), 2175 {Arg, Builder.getFalse()}, "ctlz"); 2176 Rep = Builder.CreateTrunc(Ctlz, Builder.getInt32Ty(), "ctlz.trunc"); 2177 } else if (IsNVVM && Name == "popc.ll") { 2178 // llvm.nvvm.popc.ll returns an i32, but llvm.ctpop.i64 and returns an 2179 // i64. 2180 Value *Arg = CI->getArgOperand(0); 2181 Value *Popc = Builder.CreateCall( 2182 Intrinsic::getDeclaration(F->getParent(), Intrinsic::ctpop, 2183 {Arg->getType()}), 2184 Arg, "ctpop"); 2185 Rep = Builder.CreateTrunc(Popc, Builder.getInt32Ty(), "ctpop.trunc"); 2186 } else if (IsNVVM && Name == "h2f") { 2187 Rep = Builder.CreateCall(Intrinsic::getDeclaration( 2188 F->getParent(), Intrinsic::convert_from_fp16, 2189 {Builder.getFloatTy()}), 2190 CI->getArgOperand(0), "h2f"); 2191 } else { 2192 llvm_unreachable("Unknown function for CallInst upgrade."); 2193 } 2194 2195 if (Rep) 2196 CI->replaceAllUsesWith(Rep); 2197 CI->eraseFromParent(); 2198 return; 2199 } 2200 2201 CallInst *NewCall = nullptr; 2202 switch (NewFn->getIntrinsicID()) { 2203 default: { 2204 // Handle generic mangling change, but nothing else 2205 assert( 2206 (CI->getCalledFunction()->getName() != NewFn->getName()) && 2207 "Unknown function for CallInst upgrade and isn't just a name change"); 2208 CI->setCalledFunction(NewFn); 2209 return; 2210 } 2211 2212 case Intrinsic::arm_neon_vld1: 2213 case Intrinsic::arm_neon_vld2: 2214 case Intrinsic::arm_neon_vld3: 2215 case Intrinsic::arm_neon_vld4: 2216 case Intrinsic::arm_neon_vld2lane: 2217 case Intrinsic::arm_neon_vld3lane: 2218 case Intrinsic::arm_neon_vld4lane: 2219 case Intrinsic::arm_neon_vst1: 2220 case Intrinsic::arm_neon_vst2: 2221 case Intrinsic::arm_neon_vst3: 2222 case Intrinsic::arm_neon_vst4: 2223 case Intrinsic::arm_neon_vst2lane: 2224 case Intrinsic::arm_neon_vst3lane: 2225 case Intrinsic::arm_neon_vst4lane: { 2226 SmallVector<Value *, 4> Args(CI->arg_operands().begin(), 2227 CI->arg_operands().end()); 2228 NewCall = Builder.CreateCall(NewFn, Args); 2229 break; 2230 } 2231 2232 case Intrinsic::bitreverse: 2233 NewCall = Builder.CreateCall(NewFn, {CI->getArgOperand(0)}); 2234 break; 2235 2236 case Intrinsic::ctlz: 2237 case Intrinsic::cttz: 2238 assert(CI->getNumArgOperands() == 1 && 2239 "Mismatch between function args and call args"); 2240 NewCall = 2241 Builder.CreateCall(NewFn, {CI->getArgOperand(0), Builder.getFalse()}); 2242 break; 2243 2244 case Intrinsic::objectsize: { 2245 Value *NullIsUnknownSize = CI->getNumArgOperands() == 2 2246 ? Builder.getFalse() 2247 : CI->getArgOperand(2); 2248 NewCall = Builder.CreateCall( 2249 NewFn, {CI->getArgOperand(0), CI->getArgOperand(1), NullIsUnknownSize}); 2250 break; 2251 } 2252 2253 case Intrinsic::ctpop: 2254 NewCall = Builder.CreateCall(NewFn, {CI->getArgOperand(0)}); 2255 break; 2256 2257 case Intrinsic::convert_from_fp16: 2258 NewCall = Builder.CreateCall(NewFn, {CI->getArgOperand(0)}); 2259 break; 2260 2261 case Intrinsic::dbg_value: 2262 // Upgrade from the old version that had an extra offset argument. 2263 assert(CI->getNumArgOperands() == 4); 2264 // Drop nonzero offsets instead of attempting to upgrade them. 2265 if (auto *Offset = dyn_cast_or_null<Constant>(CI->getArgOperand(1))) 2266 if (Offset->isZeroValue()) { 2267 NewCall = Builder.CreateCall( 2268 NewFn, 2269 {CI->getArgOperand(0), CI->getArgOperand(2), CI->getArgOperand(3)}); 2270 break; 2271 } 2272 CI->eraseFromParent(); 2273 return; 2274 2275 case Intrinsic::x86_xop_vfrcz_ss: 2276 case Intrinsic::x86_xop_vfrcz_sd: 2277 NewCall = Builder.CreateCall(NewFn, {CI->getArgOperand(1)}); 2278 break; 2279 2280 case Intrinsic::x86_xop_vpermil2pd: 2281 case Intrinsic::x86_xop_vpermil2ps: 2282 case Intrinsic::x86_xop_vpermil2pd_256: 2283 case Intrinsic::x86_xop_vpermil2ps_256: { 2284 SmallVector<Value *, 4> Args(CI->arg_operands().begin(), 2285 CI->arg_operands().end()); 2286 VectorType *FltIdxTy = cast<VectorType>(Args[2]->getType()); 2287 VectorType *IntIdxTy = VectorType::getInteger(FltIdxTy); 2288 Args[2] = Builder.CreateBitCast(Args[2], IntIdxTy); 2289 NewCall = Builder.CreateCall(NewFn, Args); 2290 break; 2291 } 2292 2293 case Intrinsic::x86_sse41_ptestc: 2294 case Intrinsic::x86_sse41_ptestz: 2295 case Intrinsic::x86_sse41_ptestnzc: { 2296 // The arguments for these intrinsics used to be v4f32, and changed 2297 // to v2i64. This is purely a nop, since those are bitwise intrinsics. 2298 // So, the only thing required is a bitcast for both arguments. 2299 // First, check the arguments have the old type. 2300 Value *Arg0 = CI->getArgOperand(0); 2301 if (Arg0->getType() != VectorType::get(Type::getFloatTy(C), 4)) 2302 return; 2303 2304 // Old intrinsic, add bitcasts 2305 Value *Arg1 = CI->getArgOperand(1); 2306 2307 Type *NewVecTy = VectorType::get(Type::getInt64Ty(C), 2); 2308 2309 Value *BC0 = Builder.CreateBitCast(Arg0, NewVecTy, "cast"); 2310 Value *BC1 = Builder.CreateBitCast(Arg1, NewVecTy, "cast"); 2311 2312 NewCall = Builder.CreateCall(NewFn, {BC0, BC1}); 2313 break; 2314 } 2315 2316 case Intrinsic::x86_sse41_insertps: 2317 case Intrinsic::x86_sse41_dppd: 2318 case Intrinsic::x86_sse41_dpps: 2319 case Intrinsic::x86_sse41_mpsadbw: 2320 case Intrinsic::x86_avx_dp_ps_256: 2321 case Intrinsic::x86_avx2_mpsadbw: { 2322 // Need to truncate the last argument from i32 to i8 -- this argument models 2323 // an inherently 8-bit immediate operand to these x86 instructions. 2324 SmallVector<Value *, 4> Args(CI->arg_operands().begin(), 2325 CI->arg_operands().end()); 2326 2327 // Replace the last argument with a trunc. 2328 Args.back() = Builder.CreateTrunc(Args.back(), Type::getInt8Ty(C), "trunc"); 2329 NewCall = Builder.CreateCall(NewFn, Args); 2330 break; 2331 } 2332 2333 case Intrinsic::thread_pointer: { 2334 NewCall = Builder.CreateCall(NewFn, {}); 2335 break; 2336 } 2337 2338 case Intrinsic::invariant_start: 2339 case Intrinsic::invariant_end: 2340 case Intrinsic::masked_load: 2341 case Intrinsic::masked_store: 2342 case Intrinsic::masked_gather: 2343 case Intrinsic::masked_scatter: { 2344 SmallVector<Value *, 4> Args(CI->arg_operands().begin(), 2345 CI->arg_operands().end()); 2346 NewCall = Builder.CreateCall(NewFn, Args); 2347 break; 2348 } 2349 } 2350 assert(NewCall && "Should have either set this variable or returned through " 2351 "the default case"); 2352 std::string Name = CI->getName(); 2353 if (!Name.empty()) { 2354 CI->setName(Name + ".old"); 2355 NewCall->setName(Name); 2356 } 2357 CI->replaceAllUsesWith(NewCall); 2358 CI->eraseFromParent(); 2359 } 2360 2361 void llvm::UpgradeCallsToIntrinsic(Function *F) { 2362 assert(F && "Illegal attempt to upgrade a non-existent intrinsic."); 2363 2364 // Check if this function should be upgraded and get the replacement function 2365 // if there is one. 2366 Function *NewFn; 2367 if (UpgradeIntrinsicFunction(F, NewFn)) { 2368 // Replace all users of the old function with the new function or new 2369 // instructions. This is not a range loop because the call is deleted. 2370 for (auto UI = F->user_begin(), UE = F->user_end(); UI != UE; ) 2371 if (CallInst *CI = dyn_cast<CallInst>(*UI++)) 2372 UpgradeIntrinsicCall(CI, NewFn); 2373 2374 // Remove old function, no longer used, from the module. 2375 F->eraseFromParent(); 2376 } 2377 } 2378 2379 MDNode *llvm::UpgradeTBAANode(MDNode &MD) { 2380 // Check if the tag uses struct-path aware TBAA format. 2381 if (isa<MDNode>(MD.getOperand(0)) && MD.getNumOperands() >= 3) 2382 return &MD; 2383 2384 auto &Context = MD.getContext(); 2385 if (MD.getNumOperands() == 3) { 2386 Metadata *Elts[] = {MD.getOperand(0), MD.getOperand(1)}; 2387 MDNode *ScalarType = MDNode::get(Context, Elts); 2388 // Create a MDNode <ScalarType, ScalarType, offset 0, const> 2389 Metadata *Elts2[] = {ScalarType, ScalarType, 2390 ConstantAsMetadata::get( 2391 Constant::getNullValue(Type::getInt64Ty(Context))), 2392 MD.getOperand(2)}; 2393 return MDNode::get(Context, Elts2); 2394 } 2395 // Create a MDNode <MD, MD, offset 0> 2396 Metadata *Elts[] = {&MD, &MD, ConstantAsMetadata::get(Constant::getNullValue( 2397 Type::getInt64Ty(Context)))}; 2398 return MDNode::get(Context, Elts); 2399 } 2400 2401 Instruction *llvm::UpgradeBitCastInst(unsigned Opc, Value *V, Type *DestTy, 2402 Instruction *&Temp) { 2403 if (Opc != Instruction::BitCast) 2404 return nullptr; 2405 2406 Temp = nullptr; 2407 Type *SrcTy = V->getType(); 2408 if (SrcTy->isPtrOrPtrVectorTy() && DestTy->isPtrOrPtrVectorTy() && 2409 SrcTy->getPointerAddressSpace() != DestTy->getPointerAddressSpace()) { 2410 LLVMContext &Context = V->getContext(); 2411 2412 // We have no information about target data layout, so we assume that 2413 // the maximum pointer size is 64bit. 2414 Type *MidTy = Type::getInt64Ty(Context); 2415 Temp = CastInst::Create(Instruction::PtrToInt, V, MidTy); 2416 2417 return CastInst::Create(Instruction::IntToPtr, Temp, DestTy); 2418 } 2419 2420 return nullptr; 2421 } 2422 2423 Value *llvm::UpgradeBitCastExpr(unsigned Opc, Constant *C, Type *DestTy) { 2424 if (Opc != Instruction::BitCast) 2425 return nullptr; 2426 2427 Type *SrcTy = C->getType(); 2428 if (SrcTy->isPtrOrPtrVectorTy() && DestTy->isPtrOrPtrVectorTy() && 2429 SrcTy->getPointerAddressSpace() != DestTy->getPointerAddressSpace()) { 2430 LLVMContext &Context = C->getContext(); 2431 2432 // We have no information about target data layout, so we assume that 2433 // the maximum pointer size is 64bit. 2434 Type *MidTy = Type::getInt64Ty(Context); 2435 2436 return ConstantExpr::getIntToPtr(ConstantExpr::getPtrToInt(C, MidTy), 2437 DestTy); 2438 } 2439 2440 return nullptr; 2441 } 2442 2443 /// Check the debug info version number, if it is out-dated, drop the debug 2444 /// info. Return true if module is modified. 2445 bool llvm::UpgradeDebugInfo(Module &M) { 2446 unsigned Version = getDebugMetadataVersionFromModule(M); 2447 if (Version == DEBUG_METADATA_VERSION) { 2448 bool BrokenDebugInfo = false; 2449 if (verifyModule(M, &llvm::errs(), &BrokenDebugInfo)) 2450 report_fatal_error("Broken module found, compilation aborted!"); 2451 if (!BrokenDebugInfo) 2452 // Everything is ok. 2453 return false; 2454 else { 2455 // Diagnose malformed debug info. 2456 DiagnosticInfoIgnoringInvalidDebugMetadata Diag(M); 2457 M.getContext().diagnose(Diag); 2458 } 2459 } 2460 bool Modified = StripDebugInfo(M); 2461 if (Modified && Version != DEBUG_METADATA_VERSION) { 2462 // Diagnose a version mismatch. 2463 DiagnosticInfoDebugMetadataVersion DiagVersion(M, Version); 2464 M.getContext().diagnose(DiagVersion); 2465 } 2466 return Modified; 2467 } 2468 2469 bool llvm::UpgradeModuleFlags(Module &M) { 2470 NamedMDNode *ModFlags = M.getModuleFlagsMetadata(); 2471 if (!ModFlags) 2472 return false; 2473 2474 bool HasObjCFlag = false, HasClassProperties = false, Changed = false; 2475 for (unsigned I = 0, E = ModFlags->getNumOperands(); I != E; ++I) { 2476 MDNode *Op = ModFlags->getOperand(I); 2477 if (Op->getNumOperands() != 3) 2478 continue; 2479 MDString *ID = dyn_cast_or_null<MDString>(Op->getOperand(1)); 2480 if (!ID) 2481 continue; 2482 if (ID->getString() == "Objective-C Image Info Version") 2483 HasObjCFlag = true; 2484 if (ID->getString() == "Objective-C Class Properties") 2485 HasClassProperties = true; 2486 // Upgrade PIC/PIE Module Flags. The module flag behavior for these two 2487 // field was Error and now they are Max. 2488 if (ID->getString() == "PIC Level" || ID->getString() == "PIE Level") { 2489 if (auto *Behavior = 2490 mdconst::dyn_extract_or_null<ConstantInt>(Op->getOperand(0))) { 2491 if (Behavior->getLimitedValue() == Module::Error) { 2492 Type *Int32Ty = Type::getInt32Ty(M.getContext()); 2493 Metadata *Ops[3] = { 2494 ConstantAsMetadata::get(ConstantInt::get(Int32Ty, Module::Max)), 2495 MDString::get(M.getContext(), ID->getString()), 2496 Op->getOperand(2)}; 2497 ModFlags->setOperand(I, MDNode::get(M.getContext(), Ops)); 2498 Changed = true; 2499 } 2500 } 2501 } 2502 // Upgrade Objective-C Image Info Section. Removed the whitespce in the 2503 // section name so that llvm-lto will not complain about mismatching 2504 // module flags that is functionally the same. 2505 if (ID->getString() == "Objective-C Image Info Section") { 2506 if (auto *Value = dyn_cast_or_null<MDString>(Op->getOperand(2))) { 2507 SmallVector<StringRef, 4> ValueComp; 2508 Value->getString().split(ValueComp, " "); 2509 if (ValueComp.size() != 1) { 2510 std::string NewValue; 2511 for (auto &S : ValueComp) 2512 NewValue += S.str(); 2513 Metadata *Ops[3] = {Op->getOperand(0), Op->getOperand(1), 2514 MDString::get(M.getContext(), NewValue)}; 2515 ModFlags->setOperand(I, MDNode::get(M.getContext(), Ops)); 2516 Changed = true; 2517 } 2518 } 2519 } 2520 } 2521 2522 // "Objective-C Class Properties" is recently added for Objective-C. We 2523 // upgrade ObjC bitcodes to contain a "Objective-C Class Properties" module 2524 // flag of value 0, so we can correclty downgrade this flag when trying to 2525 // link an ObjC bitcode without this module flag with an ObjC bitcode with 2526 // this module flag. 2527 if (HasObjCFlag && !HasClassProperties) { 2528 M.addModuleFlag(llvm::Module::Override, "Objective-C Class Properties", 2529 (uint32_t)0); 2530 Changed = true; 2531 } 2532 2533 return Changed; 2534 } 2535 2536 void llvm::UpgradeSectionAttributes(Module &M) { 2537 auto TrimSpaces = [](StringRef Section) -> std::string { 2538 SmallVector<StringRef, 5> Components; 2539 Section.split(Components, ','); 2540 2541 SmallString<32> Buffer; 2542 raw_svector_ostream OS(Buffer); 2543 2544 for (auto Component : Components) 2545 OS << ',' << Component.trim(); 2546 2547 return OS.str().substr(1); 2548 }; 2549 2550 for (auto &GV : M.globals()) { 2551 if (!GV.hasSection()) 2552 continue; 2553 2554 StringRef Section = GV.getSection(); 2555 2556 if (!Section.startswith("__DATA, __objc_catlist")) 2557 continue; 2558 2559 // __DATA, __objc_catlist, regular, no_dead_strip 2560 // __DATA,__objc_catlist,regular,no_dead_strip 2561 GV.setSection(TrimSpaces(Section)); 2562 } 2563 } 2564 2565 static bool isOldLoopArgument(Metadata *MD) { 2566 auto *T = dyn_cast_or_null<MDTuple>(MD); 2567 if (!T) 2568 return false; 2569 if (T->getNumOperands() < 1) 2570 return false; 2571 auto *S = dyn_cast_or_null<MDString>(T->getOperand(0)); 2572 if (!S) 2573 return false; 2574 return S->getString().startswith("llvm.vectorizer."); 2575 } 2576 2577 static MDString *upgradeLoopTag(LLVMContext &C, StringRef OldTag) { 2578 StringRef OldPrefix = "llvm.vectorizer."; 2579 assert(OldTag.startswith(OldPrefix) && "Expected old prefix"); 2580 2581 if (OldTag == "llvm.vectorizer.unroll") 2582 return MDString::get(C, "llvm.loop.interleave.count"); 2583 2584 return MDString::get( 2585 C, (Twine("llvm.loop.vectorize.") + OldTag.drop_front(OldPrefix.size())) 2586 .str()); 2587 } 2588 2589 static Metadata *upgradeLoopArgument(Metadata *MD) { 2590 auto *T = dyn_cast_or_null<MDTuple>(MD); 2591 if (!T) 2592 return MD; 2593 if (T->getNumOperands() < 1) 2594 return MD; 2595 auto *OldTag = dyn_cast_or_null<MDString>(T->getOperand(0)); 2596 if (!OldTag) 2597 return MD; 2598 if (!OldTag->getString().startswith("llvm.vectorizer.")) 2599 return MD; 2600 2601 // This has an old tag. Upgrade it. 2602 SmallVector<Metadata *, 8> Ops; 2603 Ops.reserve(T->getNumOperands()); 2604 Ops.push_back(upgradeLoopTag(T->getContext(), OldTag->getString())); 2605 for (unsigned I = 1, E = T->getNumOperands(); I != E; ++I) 2606 Ops.push_back(T->getOperand(I)); 2607 2608 return MDTuple::get(T->getContext(), Ops); 2609 } 2610 2611 MDNode *llvm::upgradeInstructionLoopAttachment(MDNode &N) { 2612 auto *T = dyn_cast<MDTuple>(&N); 2613 if (!T) 2614 return &N; 2615 2616 if (none_of(T->operands(), isOldLoopArgument)) 2617 return &N; 2618 2619 SmallVector<Metadata *, 8> Ops; 2620 Ops.reserve(T->getNumOperands()); 2621 for (Metadata *MD : T->operands()) 2622 Ops.push_back(upgradeLoopArgument(MD)); 2623 2624 return MDTuple::get(T->getContext(), Ops); 2625 } 2626