1 //===-- AutoUpgrade.cpp - Implement auto-upgrade helper functions ---------===// 2 // 3 // The LLVM Compiler Infrastructure 4 // 5 // This file is distributed under the University of Illinois Open Source 6 // License. See LICENSE.TXT for details. 7 // 8 //===----------------------------------------------------------------------===// 9 // 10 // This file implements the auto-upgrade helper functions. 11 // This is where deprecated IR intrinsics and other IR features are updated to 12 // current specifications. 13 // 14 //===----------------------------------------------------------------------===// 15 16 #include "llvm/IR/AutoUpgrade.h" 17 #include "llvm/IR/CFG.h" 18 #include "llvm/IR/CallSite.h" 19 #include "llvm/IR/Constants.h" 20 #include "llvm/IR/DIBuilder.h" 21 #include "llvm/IR/DebugInfo.h" 22 #include "llvm/IR/DiagnosticInfo.h" 23 #include "llvm/IR/Function.h" 24 #include "llvm/IR/IRBuilder.h" 25 #include "llvm/IR/Instruction.h" 26 #include "llvm/IR/IntrinsicInst.h" 27 #include "llvm/IR/LLVMContext.h" 28 #include "llvm/IR/Module.h" 29 #include "llvm/Support/ErrorHandling.h" 30 #include "llvm/Support/Regex.h" 31 #include <cstring> 32 using namespace llvm; 33 34 static void rename(GlobalValue *GV) { GV->setName(GV->getName() + ".old"); } 35 36 // Upgrade the declarations of the SSE4.1 functions whose arguments have 37 // changed their type from v4f32 to v2i64. 38 static bool UpgradeSSE41Function(Function* F, Intrinsic::ID IID, 39 Function *&NewFn) { 40 // Check whether this is an old version of the function, which received 41 // v4f32 arguments. 42 Type *Arg0Type = F->getFunctionType()->getParamType(0); 43 if (Arg0Type != VectorType::get(Type::getFloatTy(F->getContext()), 4)) 44 return false; 45 46 // Yes, it's old, replace it with new version. 47 rename(F); 48 NewFn = Intrinsic::getDeclaration(F->getParent(), IID); 49 return true; 50 } 51 52 // Upgrade the declarations of intrinsic functions whose 8-bit immediate mask 53 // arguments have changed their type from i32 to i8. 54 static bool UpgradeX86IntrinsicsWith8BitMask(Function *F, Intrinsic::ID IID, 55 Function *&NewFn) { 56 // Check that the last argument is an i32. 57 Type *LastArgType = F->getFunctionType()->getParamType( 58 F->getFunctionType()->getNumParams() - 1); 59 if (!LastArgType->isIntegerTy(32)) 60 return false; 61 62 // Move this function aside and map down. 63 rename(F); 64 NewFn = Intrinsic::getDeclaration(F->getParent(), IID); 65 return true; 66 } 67 68 static bool UpgradeIntrinsicFunction1(Function *F, Function *&NewFn) { 69 assert(F && "Illegal to upgrade a non-existent Function."); 70 71 // Quickly eliminate it, if it's not a candidate. 72 StringRef Name = F->getName(); 73 if (Name.size() <= 8 || !Name.startswith("llvm.")) 74 return false; 75 Name = Name.substr(5); // Strip off "llvm." 76 77 switch (Name[0]) { 78 default: break; 79 case 'a': { 80 if (Name.startswith("arm.neon.vclz")) { 81 Type* args[2] = { 82 F->arg_begin()->getType(), 83 Type::getInt1Ty(F->getContext()) 84 }; 85 // Can't use Intrinsic::getDeclaration here as it adds a ".i1" to 86 // the end of the name. Change name from llvm.arm.neon.vclz.* to 87 // llvm.ctlz.* 88 FunctionType* fType = FunctionType::get(F->getReturnType(), args, false); 89 NewFn = Function::Create(fType, F->getLinkage(), 90 "llvm.ctlz." + Name.substr(14), F->getParent()); 91 return true; 92 } 93 if (Name.startswith("arm.neon.vcnt")) { 94 NewFn = Intrinsic::getDeclaration(F->getParent(), Intrinsic::ctpop, 95 F->arg_begin()->getType()); 96 return true; 97 } 98 Regex vldRegex("^arm\\.neon\\.vld([1234]|[234]lane)\\.v[a-z0-9]*$"); 99 if (vldRegex.match(Name)) { 100 auto fArgs = F->getFunctionType()->params(); 101 SmallVector<Type *, 4> Tys(fArgs.begin(), fArgs.end()); 102 // Can't use Intrinsic::getDeclaration here as the return types might 103 // then only be structurally equal. 104 FunctionType* fType = FunctionType::get(F->getReturnType(), Tys, false); 105 NewFn = Function::Create(fType, F->getLinkage(), 106 "llvm." + Name + ".p0i8", F->getParent()); 107 return true; 108 } 109 Regex vstRegex("^arm\\.neon\\.vst([1234]|[234]lane)\\.v[a-z0-9]*$"); 110 if (vstRegex.match(Name)) { 111 static const Intrinsic::ID StoreInts[] = {Intrinsic::arm_neon_vst1, 112 Intrinsic::arm_neon_vst2, 113 Intrinsic::arm_neon_vst3, 114 Intrinsic::arm_neon_vst4}; 115 116 static const Intrinsic::ID StoreLaneInts[] = { 117 Intrinsic::arm_neon_vst2lane, Intrinsic::arm_neon_vst3lane, 118 Intrinsic::arm_neon_vst4lane 119 }; 120 121 auto fArgs = F->getFunctionType()->params(); 122 Type *Tys[] = {fArgs[0], fArgs[1]}; 123 if (Name.find("lane") == StringRef::npos) 124 NewFn = Intrinsic::getDeclaration(F->getParent(), 125 StoreInts[fArgs.size() - 3], Tys); 126 else 127 NewFn = Intrinsic::getDeclaration(F->getParent(), 128 StoreLaneInts[fArgs.size() - 5], Tys); 129 return true; 130 } 131 if (Name == "aarch64.thread.pointer" || Name == "arm.thread.pointer") { 132 NewFn = Intrinsic::getDeclaration(F->getParent(), Intrinsic::thread_pointer); 133 return true; 134 } 135 break; 136 } 137 138 case 'c': { 139 if (Name.startswith("ctlz.") && F->arg_size() == 1) { 140 rename(F); 141 NewFn = Intrinsic::getDeclaration(F->getParent(), Intrinsic::ctlz, 142 F->arg_begin()->getType()); 143 return true; 144 } 145 if (Name.startswith("cttz.") && F->arg_size() == 1) { 146 rename(F); 147 NewFn = Intrinsic::getDeclaration(F->getParent(), Intrinsic::cttz, 148 F->arg_begin()->getType()); 149 return true; 150 } 151 break; 152 } 153 case 'i': { 154 if (Name.startswith("invariant.start")) { 155 auto Args = F->getFunctionType()->params(); 156 Type* ObjectPtr[1] = {Args[1]}; 157 if (F->getName() != 158 Intrinsic::getName(Intrinsic::invariant_start, ObjectPtr)) { 159 rename(F); 160 NewFn = Intrinsic::getDeclaration( 161 F->getParent(), Intrinsic::invariant_start, ObjectPtr); 162 return true; 163 } 164 } 165 if (Name.startswith("invariant.end")) { 166 auto Args = F->getFunctionType()->params(); 167 Type* ObjectPtr[1] = {Args[2]}; 168 if (F->getName() != 169 Intrinsic::getName(Intrinsic::invariant_end, ObjectPtr)) { 170 rename(F); 171 NewFn = Intrinsic::getDeclaration(F->getParent(), 172 Intrinsic::invariant_end, ObjectPtr); 173 return true; 174 } 175 } 176 break; 177 } 178 case 'm': { 179 if (Name.startswith("masked.load.")) { 180 Type *Tys[] = { F->getReturnType(), F->arg_begin()->getType() }; 181 if (F->getName() != Intrinsic::getName(Intrinsic::masked_load, Tys)) { 182 rename(F); 183 NewFn = Intrinsic::getDeclaration(F->getParent(), 184 Intrinsic::masked_load, 185 Tys); 186 return true; 187 } 188 } 189 if (Name.startswith("masked.store.")) { 190 auto Args = F->getFunctionType()->params(); 191 Type *Tys[] = { Args[0], Args[1] }; 192 if (F->getName() != Intrinsic::getName(Intrinsic::masked_store, Tys)) { 193 rename(F); 194 NewFn = Intrinsic::getDeclaration(F->getParent(), 195 Intrinsic::masked_store, 196 Tys); 197 return true; 198 } 199 } 200 break; 201 } 202 203 case 'o': 204 // We only need to change the name to match the mangling including the 205 // address space. 206 if (F->arg_size() == 2 && Name.startswith("objectsize.")) { 207 Type *Tys[2] = { F->getReturnType(), F->arg_begin()->getType() }; 208 if (F->getName() != Intrinsic::getName(Intrinsic::objectsize, Tys)) { 209 rename(F); 210 NewFn = Intrinsic::getDeclaration(F->getParent(), 211 Intrinsic::objectsize, Tys); 212 return true; 213 } 214 } 215 break; 216 217 case 's': 218 if (Name == "stackprotectorcheck") { 219 NewFn = nullptr; 220 return true; 221 } 222 223 case 'x': { 224 bool IsX86 = Name.startswith("x86."); 225 if (IsX86) 226 Name = Name.substr(4); 227 228 if (IsX86 && 229 (Name.startswith("sse2.pcmpeq.") || 230 Name.startswith("sse2.pcmpgt.") || 231 Name.startswith("avx2.pcmpeq.") || 232 Name.startswith("avx2.pcmpgt.") || 233 Name.startswith("avx512.mask.pcmpeq.") || 234 Name.startswith("avx512.mask.pcmpgt.") || 235 Name == "sse41.pmaxsb" || 236 Name == "sse2.pmaxs.w" || 237 Name == "sse41.pmaxsd" || 238 Name == "sse2.pmaxu.b" || 239 Name == "sse41.pmaxuw" || 240 Name == "sse41.pmaxud" || 241 Name == "sse41.pminsb" || 242 Name == "sse2.pmins.w" || 243 Name == "sse41.pminsd" || 244 Name == "sse2.pminu.b" || 245 Name == "sse41.pminuw" || 246 Name == "sse41.pminud" || 247 Name.startswith("avx2.pmax") || 248 Name.startswith("avx2.pmin") || 249 Name.startswith("avx512.mask.pmax") || 250 Name.startswith("avx512.mask.pmin") || 251 Name.startswith("avx2.vbroadcast") || 252 Name.startswith("avx2.pbroadcast") || 253 Name.startswith("avx.vpermil.") || 254 Name.startswith("sse2.pshuf") || 255 Name.startswith("avx512.pbroadcast") || 256 Name.startswith("avx512.mask.broadcast.s") || 257 Name.startswith("avx512.mask.movddup") || 258 Name.startswith("avx512.mask.movshdup") || 259 Name.startswith("avx512.mask.movsldup") || 260 Name.startswith("avx512.mask.pshuf.d.") || 261 Name.startswith("avx512.mask.pshufl.w.") || 262 Name.startswith("avx512.mask.pshufh.w.") || 263 Name.startswith("avx512.mask.shuf.p") || 264 Name.startswith("avx512.mask.vpermil.p") || 265 Name.startswith("avx512.mask.perm.df.") || 266 Name.startswith("avx512.mask.perm.di.") || 267 Name.startswith("avx512.mask.punpckl") || 268 Name.startswith("avx512.mask.punpckh") || 269 Name.startswith("avx512.mask.unpckl.") || 270 Name.startswith("avx512.mask.unpckh.") || 271 Name.startswith("avx512.mask.pand.") || 272 Name.startswith("avx512.mask.pandn.") || 273 Name.startswith("avx512.mask.por.") || 274 Name.startswith("avx512.mask.pxor.") || 275 Name.startswith("avx512.mask.and.") || 276 Name.startswith("avx512.mask.andn.") || 277 Name.startswith("avx512.mask.or.") || 278 Name.startswith("avx512.mask.xor.") || 279 Name.startswith("avx512.mask.padd.") || 280 Name.startswith("avx512.mask.psub.") || 281 Name.startswith("avx512.mask.pmull.") || 282 Name.startswith("avx512.mask.add.pd.128") || 283 Name.startswith("avx512.mask.add.pd.256") || 284 Name.startswith("avx512.mask.add.ps.128") || 285 Name.startswith("avx512.mask.add.ps.256") || 286 Name.startswith("avx512.mask.div.pd.128") || 287 Name.startswith("avx512.mask.div.pd.256") || 288 Name.startswith("avx512.mask.div.ps.128") || 289 Name.startswith("avx512.mask.div.ps.256") || 290 Name.startswith("avx512.mask.mul.pd.128") || 291 Name.startswith("avx512.mask.mul.pd.256") || 292 Name.startswith("avx512.mask.mul.ps.128") || 293 Name.startswith("avx512.mask.mul.ps.256") || 294 Name.startswith("avx512.mask.sub.pd.128") || 295 Name.startswith("avx512.mask.sub.pd.256") || 296 Name.startswith("avx512.mask.sub.ps.128") || 297 Name.startswith("avx512.mask.sub.ps.256") || 298 Name.startswith("sse41.pmovsx") || 299 Name.startswith("sse41.pmovzx") || 300 Name.startswith("avx2.pmovsx") || 301 Name.startswith("avx2.pmovzx") || 302 Name == "sse2.cvtdq2pd" || 303 Name == "sse2.cvtps2pd" || 304 Name == "avx.cvtdq2.pd.256" || 305 Name == "avx.cvt.ps2.pd.256" || 306 Name.startswith("avx.vinsertf128.") || 307 Name == "avx2.vinserti128" || 308 Name.startswith("avx.vextractf128.") || 309 Name == "avx2.vextracti128" || 310 Name.startswith("sse4a.movnt.") || 311 Name.startswith("avx.movnt.") || 312 Name.startswith("avx512.storent.") || 313 Name == "sse2.storel.dq" || 314 Name.startswith("sse.storeu.") || 315 Name.startswith("sse2.storeu.") || 316 Name.startswith("avx.storeu.") || 317 Name.startswith("avx512.mask.storeu.") || 318 Name.startswith("avx512.mask.store.p") || 319 Name.startswith("avx512.mask.store.b.") || 320 Name.startswith("avx512.mask.store.w.") || 321 Name.startswith("avx512.mask.store.d.") || 322 Name.startswith("avx512.mask.store.q.") || 323 Name.startswith("avx512.mask.loadu.") || 324 Name.startswith("avx512.mask.load.") || 325 Name == "sse42.crc32.64.8" || 326 Name.startswith("avx.vbroadcast.s") || 327 Name.startswith("avx512.mask.palignr.") || 328 Name.startswith("sse2.psll.dq") || 329 Name.startswith("sse2.psrl.dq") || 330 Name.startswith("avx2.psll.dq") || 331 Name.startswith("avx2.psrl.dq") || 332 Name.startswith("avx512.psll.dq") || 333 Name.startswith("avx512.psrl.dq") || 334 Name == "sse41.pblendw" || 335 Name.startswith("sse41.blendp") || 336 Name.startswith("avx.blend.p") || 337 Name == "avx2.pblendw" || 338 Name.startswith("avx2.pblendd.") || 339 Name.startswith("avx.vbroadcastf128") || 340 Name == "avx2.vbroadcasti128" || 341 Name == "xop.vpcmov" || 342 (Name.startswith("xop.vpcom") && F->arg_size() == 2))) { 343 NewFn = nullptr; 344 return true; 345 } 346 // SSE4.1 ptest functions may have an old signature. 347 if (IsX86 && Name.startswith("sse41.ptest")) { 348 if (Name.substr(11) == "c") 349 return UpgradeSSE41Function(F, Intrinsic::x86_sse41_ptestc, NewFn); 350 if (Name.substr(11) == "z") 351 return UpgradeSSE41Function(F, Intrinsic::x86_sse41_ptestz, NewFn); 352 if (Name.substr(11) == "nzc") 353 return UpgradeSSE41Function(F, Intrinsic::x86_sse41_ptestnzc, NewFn); 354 } 355 // Several blend and other instructions with masks used the wrong number of 356 // bits. 357 if (IsX86 && Name == "sse41.insertps") 358 return UpgradeX86IntrinsicsWith8BitMask(F, Intrinsic::x86_sse41_insertps, 359 NewFn); 360 if (IsX86 && Name == "sse41.dppd") 361 return UpgradeX86IntrinsicsWith8BitMask(F, Intrinsic::x86_sse41_dppd, 362 NewFn); 363 if (IsX86 && Name == "sse41.dpps") 364 return UpgradeX86IntrinsicsWith8BitMask(F, Intrinsic::x86_sse41_dpps, 365 NewFn); 366 if (IsX86 && Name == "sse41.mpsadbw") 367 return UpgradeX86IntrinsicsWith8BitMask(F, Intrinsic::x86_sse41_mpsadbw, 368 NewFn); 369 if (IsX86 && Name == "avx.dp.ps.256") 370 return UpgradeX86IntrinsicsWith8BitMask(F, Intrinsic::x86_avx_dp_ps_256, 371 NewFn); 372 if (IsX86 && Name == "avx2.mpsadbw") 373 return UpgradeX86IntrinsicsWith8BitMask(F, Intrinsic::x86_avx2_mpsadbw, 374 NewFn); 375 376 // frcz.ss/sd may need to have an argument dropped 377 if (IsX86 && Name.startswith("xop.vfrcz.ss") && F->arg_size() == 2) { 378 rename(F); 379 NewFn = Intrinsic::getDeclaration(F->getParent(), 380 Intrinsic::x86_xop_vfrcz_ss); 381 return true; 382 } 383 if (IsX86 && Name.startswith("xop.vfrcz.sd") && F->arg_size() == 2) { 384 rename(F); 385 NewFn = Intrinsic::getDeclaration(F->getParent(), 386 Intrinsic::x86_xop_vfrcz_sd); 387 return true; 388 } 389 if (IsX86 && (Name.startswith("avx512.mask.pslli.") || 390 Name.startswith("avx512.mask.psrai.") || 391 Name.startswith("avx512.mask.psrli."))) { 392 Intrinsic::ID ShiftID; 393 if (Name.slice(12, 16) == "psll") 394 ShiftID = Name[18] == 'd' ? Intrinsic::x86_avx512_mask_psll_di_512 395 : Intrinsic::x86_avx512_mask_psll_qi_512; 396 else if (Name.slice(12, 16) == "psra") 397 ShiftID = Name[18] == 'd' ? Intrinsic::x86_avx512_mask_psra_di_512 398 : Intrinsic::x86_avx512_mask_psra_qi_512; 399 else 400 ShiftID = Name[18] == 'd' ? Intrinsic::x86_avx512_mask_psrl_di_512 401 : Intrinsic::x86_avx512_mask_psrl_qi_512; 402 rename(F); 403 NewFn = Intrinsic::getDeclaration(F->getParent(), ShiftID); 404 return true; 405 } 406 // Fix the FMA4 intrinsics to remove the 4 407 if (IsX86 && Name.startswith("fma4.")) { 408 rename(F); 409 NewFn = F; 410 return true; 411 } 412 // Upgrade any XOP PERMIL2 index operand still using a float/double vector. 413 if (IsX86 && Name.startswith("xop.vpermil2")) { 414 auto Params = F->getFunctionType()->params(); 415 auto Idx = Params[2]; 416 if (Idx->getScalarType()->isFloatingPointTy()) { 417 rename(F); 418 unsigned IdxSize = Idx->getPrimitiveSizeInBits(); 419 unsigned EltSize = Idx->getScalarSizeInBits(); 420 Intrinsic::ID Permil2ID; 421 if (EltSize == 64 && IdxSize == 128) 422 Permil2ID = Intrinsic::x86_xop_vpermil2pd; 423 else if (EltSize == 32 && IdxSize == 128) 424 Permil2ID = Intrinsic::x86_xop_vpermil2ps; 425 else if (EltSize == 64 && IdxSize == 256) 426 Permil2ID = Intrinsic::x86_xop_vpermil2pd_256; 427 else 428 Permil2ID = Intrinsic::x86_xop_vpermil2ps_256; 429 NewFn = Intrinsic::getDeclaration(F->getParent(), Permil2ID); 430 return true; 431 } 432 } 433 break; 434 } 435 } 436 437 // This may not belong here. This function is effectively being overloaded 438 // to both detect an intrinsic which needs upgrading, and to provide the 439 // upgraded form of the intrinsic. We should perhaps have two separate 440 // functions for this. 441 return false; 442 } 443 444 bool llvm::UpgradeIntrinsicFunction(Function *F, Function *&NewFn) { 445 NewFn = nullptr; 446 bool Upgraded = UpgradeIntrinsicFunction1(F, NewFn); 447 assert(F != NewFn && "Intrinsic function upgraded to the same function"); 448 449 // Upgrade intrinsic attributes. This does not change the function. 450 if (NewFn) 451 F = NewFn; 452 if (Intrinsic::ID id = F->getIntrinsicID()) 453 F->setAttributes(Intrinsic::getAttributes(F->getContext(), id)); 454 return Upgraded; 455 } 456 457 bool llvm::UpgradeGlobalVariable(GlobalVariable *GV) { 458 // Nothing to do yet. 459 return false; 460 } 461 462 // Handles upgrading SSE2/AVX2/AVX512BW PSLLDQ intrinsics by converting them 463 // to byte shuffles. 464 static Value *UpgradeX86PSLLDQIntrinsics(IRBuilder<> &Builder, 465 Value *Op, unsigned Shift) { 466 Type *ResultTy = Op->getType(); 467 unsigned NumElts = ResultTy->getVectorNumElements() * 8; 468 469 // Bitcast from a 64-bit element type to a byte element type. 470 Type *VecTy = VectorType::get(Builder.getInt8Ty(), NumElts); 471 Op = Builder.CreateBitCast(Op, VecTy, "cast"); 472 473 // We'll be shuffling in zeroes. 474 Value *Res = Constant::getNullValue(VecTy); 475 476 // If shift is less than 16, emit a shuffle to move the bytes. Otherwise, 477 // we'll just return the zero vector. 478 if (Shift < 16) { 479 uint32_t Idxs[64]; 480 // 256/512-bit version is split into 2/4 16-byte lanes. 481 for (unsigned l = 0; l != NumElts; l += 16) 482 for (unsigned i = 0; i != 16; ++i) { 483 unsigned Idx = NumElts + i - Shift; 484 if (Idx < NumElts) 485 Idx -= NumElts - 16; // end of lane, switch operand. 486 Idxs[l + i] = Idx + l; 487 } 488 489 Res = Builder.CreateShuffleVector(Res, Op, makeArrayRef(Idxs, NumElts)); 490 } 491 492 // Bitcast back to a 64-bit element type. 493 return Builder.CreateBitCast(Res, ResultTy, "cast"); 494 } 495 496 // Handles upgrading SSE2/AVX2/AVX512BW PSRLDQ intrinsics by converting them 497 // to byte shuffles. 498 static Value *UpgradeX86PSRLDQIntrinsics(IRBuilder<> &Builder, Value *Op, 499 unsigned Shift) { 500 Type *ResultTy = Op->getType(); 501 unsigned NumElts = ResultTy->getVectorNumElements() * 8; 502 503 // Bitcast from a 64-bit element type to a byte element type. 504 Type *VecTy = VectorType::get(Builder.getInt8Ty(), NumElts); 505 Op = Builder.CreateBitCast(Op, VecTy, "cast"); 506 507 // We'll be shuffling in zeroes. 508 Value *Res = Constant::getNullValue(VecTy); 509 510 // If shift is less than 16, emit a shuffle to move the bytes. Otherwise, 511 // we'll just return the zero vector. 512 if (Shift < 16) { 513 uint32_t Idxs[64]; 514 // 256/512-bit version is split into 2/4 16-byte lanes. 515 for (unsigned l = 0; l != NumElts; l += 16) 516 for (unsigned i = 0; i != 16; ++i) { 517 unsigned Idx = i + Shift; 518 if (Idx >= 16) 519 Idx += NumElts - 16; // end of lane, switch operand. 520 Idxs[l + i] = Idx + l; 521 } 522 523 Res = Builder.CreateShuffleVector(Op, Res, makeArrayRef(Idxs, NumElts)); 524 } 525 526 // Bitcast back to a 64-bit element type. 527 return Builder.CreateBitCast(Res, ResultTy, "cast"); 528 } 529 530 static Value *getX86MaskVec(IRBuilder<> &Builder, Value *Mask, 531 unsigned NumElts) { 532 llvm::VectorType *MaskTy = llvm::VectorType::get(Builder.getInt1Ty(), 533 cast<IntegerType>(Mask->getType())->getBitWidth()); 534 Mask = Builder.CreateBitCast(Mask, MaskTy); 535 536 // If we have less than 8 elements, then the starting mask was an i8 and 537 // we need to extract down to the right number of elements. 538 if (NumElts < 8) { 539 uint32_t Indices[4]; 540 for (unsigned i = 0; i != NumElts; ++i) 541 Indices[i] = i; 542 Mask = Builder.CreateShuffleVector(Mask, Mask, 543 makeArrayRef(Indices, NumElts), 544 "extract"); 545 } 546 547 return Mask; 548 } 549 550 static Value *EmitX86Select(IRBuilder<> &Builder, Value *Mask, 551 Value *Op0, Value *Op1) { 552 // If the mask is all ones just emit the align operation. 553 if (const auto *C = dyn_cast<Constant>(Mask)) 554 if (C->isAllOnesValue()) 555 return Op0; 556 557 Mask = getX86MaskVec(Builder, Mask, Op0->getType()->getVectorNumElements()); 558 return Builder.CreateSelect(Mask, Op0, Op1); 559 } 560 561 static Value *UpgradeX86PALIGNRIntrinsics(IRBuilder<> &Builder, 562 Value *Op0, Value *Op1, Value *Shift, 563 Value *Passthru, Value *Mask) { 564 unsigned ShiftVal = cast<llvm::ConstantInt>(Shift)->getZExtValue(); 565 566 unsigned NumElts = Op0->getType()->getVectorNumElements(); 567 assert(NumElts % 16 == 0); 568 569 // If palignr is shifting the pair of vectors more than the size of two 570 // lanes, emit zero. 571 if (ShiftVal >= 32) 572 return llvm::Constant::getNullValue(Op0->getType()); 573 574 // If palignr is shifting the pair of input vectors more than one lane, 575 // but less than two lanes, convert to shifting in zeroes. 576 if (ShiftVal > 16) { 577 ShiftVal -= 16; 578 Op1 = Op0; 579 Op0 = llvm::Constant::getNullValue(Op0->getType()); 580 } 581 582 uint32_t Indices[64]; 583 // 256-bit palignr operates on 128-bit lanes so we need to handle that 584 for (unsigned l = 0; l != NumElts; l += 16) { 585 for (unsigned i = 0; i != 16; ++i) { 586 unsigned Idx = ShiftVal + i; 587 if (Idx >= 16) 588 Idx += NumElts - 16; // End of lane, switch operand. 589 Indices[l + i] = Idx + l; 590 } 591 } 592 593 Value *Align = Builder.CreateShuffleVector(Op1, Op0, 594 makeArrayRef(Indices, NumElts), 595 "palignr"); 596 597 return EmitX86Select(Builder, Mask, Align, Passthru); 598 } 599 600 static Value *UpgradeMaskedStore(IRBuilder<> &Builder, 601 Value *Ptr, Value *Data, Value *Mask, 602 bool Aligned) { 603 // Cast the pointer to the right type. 604 Ptr = Builder.CreateBitCast(Ptr, 605 llvm::PointerType::getUnqual(Data->getType())); 606 unsigned Align = 607 Aligned ? cast<VectorType>(Data->getType())->getBitWidth() / 8 : 1; 608 609 // If the mask is all ones just emit a regular store. 610 if (const auto *C = dyn_cast<Constant>(Mask)) 611 if (C->isAllOnesValue()) 612 return Builder.CreateAlignedStore(Data, Ptr, Align); 613 614 // Convert the mask from an integer type to a vector of i1. 615 unsigned NumElts = Data->getType()->getVectorNumElements(); 616 Mask = getX86MaskVec(Builder, Mask, NumElts); 617 return Builder.CreateMaskedStore(Data, Ptr, Align, Mask); 618 } 619 620 static Value *UpgradeMaskedLoad(IRBuilder<> &Builder, 621 Value *Ptr, Value *Passthru, Value *Mask, 622 bool Aligned) { 623 // Cast the pointer to the right type. 624 Ptr = Builder.CreateBitCast(Ptr, 625 llvm::PointerType::getUnqual(Passthru->getType())); 626 unsigned Align = 627 Aligned ? cast<VectorType>(Passthru->getType())->getBitWidth() / 8 : 1; 628 629 // If the mask is all ones just emit a regular store. 630 if (const auto *C = dyn_cast<Constant>(Mask)) 631 if (C->isAllOnesValue()) 632 return Builder.CreateAlignedLoad(Ptr, Align); 633 634 // Convert the mask from an integer type to a vector of i1. 635 unsigned NumElts = Passthru->getType()->getVectorNumElements(); 636 Mask = getX86MaskVec(Builder, Mask, NumElts); 637 return Builder.CreateMaskedLoad(Ptr, Align, Mask, Passthru); 638 } 639 640 static Value *upgradeIntMinMax(IRBuilder<> &Builder, CallInst &CI, 641 ICmpInst::Predicate Pred) { 642 Value *Op0 = CI.getArgOperand(0); 643 Value *Op1 = CI.getArgOperand(1); 644 Value *Cmp = Builder.CreateICmp(Pred, Op0, Op1); 645 Value *Res = Builder.CreateSelect(Cmp, Op0, Op1); 646 647 if (CI.getNumArgOperands() == 4) 648 Res = EmitX86Select(Builder, CI.getArgOperand(3), Res, CI.getArgOperand(2)); 649 650 return Res; 651 } 652 653 static Value *upgradeMaskedCompare(IRBuilder<> &Builder, CallInst &CI, 654 ICmpInst::Predicate Pred) { 655 Value *Op0 = CI.getArgOperand(0); 656 unsigned NumElts = Op0->getType()->getVectorNumElements(); 657 Value *Cmp = Builder.CreateICmp(Pred, Op0, CI.getArgOperand(1)); 658 659 Value *Mask = CI.getArgOperand(2); 660 const auto *C = dyn_cast<Constant>(Mask); 661 if (!C || !C->isAllOnesValue()) 662 Cmp = Builder.CreateAnd(Cmp, getX86MaskVec(Builder, Mask, NumElts)); 663 664 if (NumElts < 8) { 665 uint32_t Indices[8]; 666 for (unsigned i = 0; i != NumElts; ++i) 667 Indices[i] = i; 668 for (unsigned i = NumElts; i != 8; ++i) 669 Indices[i] = NumElts + i % NumElts; 670 Cmp = Builder.CreateShuffleVector(Cmp, 671 Constant::getNullValue(Cmp->getType()), 672 Indices); 673 } 674 return Builder.CreateBitCast(Cmp, IntegerType::get(CI.getContext(), 675 std::max(NumElts, 8U))); 676 } 677 678 /// Upgrade a call to an old intrinsic. All argument and return casting must be 679 /// provided to seamlessly integrate with existing context. 680 void llvm::UpgradeIntrinsicCall(CallInst *CI, Function *NewFn) { 681 Function *F = CI->getCalledFunction(); 682 LLVMContext &C = CI->getContext(); 683 IRBuilder<> Builder(C); 684 Builder.SetInsertPoint(CI->getParent(), CI->getIterator()); 685 686 assert(F && "Intrinsic call is not direct?"); 687 688 if (!NewFn) { 689 // Get the Function's name. 690 StringRef Name = F->getName(); 691 692 assert(Name.startswith("llvm.") && "Intrinsic doesn't start with 'llvm.'"); 693 Name = Name.substr(5); 694 695 bool IsX86 = Name.startswith("x86."); 696 if (IsX86) 697 Name = Name.substr(4); 698 699 Value *Rep; 700 // Upgrade packed integer vector compare intrinsics to compare instructions. 701 if (IsX86 && (Name.startswith("sse2.pcmpeq.") || 702 Name.startswith("avx2.pcmpeq."))) { 703 Rep = Builder.CreateICmpEQ(CI->getArgOperand(0), CI->getArgOperand(1), 704 "pcmpeq"); 705 Rep = Builder.CreateSExt(Rep, CI->getType(), ""); 706 } else if (IsX86 && (Name.startswith("sse2.pcmpgt.") || 707 Name.startswith("avx2.pcmpgt."))) { 708 Rep = Builder.CreateICmpSGT(CI->getArgOperand(0), CI->getArgOperand(1), 709 "pcmpgt"); 710 Rep = Builder.CreateSExt(Rep, CI->getType(), ""); 711 } else if (IsX86 && Name.startswith("avx512.mask.pcmpeq.")) { 712 Rep = upgradeMaskedCompare(Builder, *CI, ICmpInst::ICMP_EQ); 713 } else if (IsX86 && Name.startswith("avx512.mask.pcmpgt.")) { 714 Rep = upgradeMaskedCompare(Builder, *CI, ICmpInst::ICMP_SGT); 715 } else if (IsX86 && (Name == "sse41.pmaxsb" || 716 Name == "sse2.pmaxs.w" || 717 Name == "sse41.pmaxsd" || 718 Name.startswith("avx2.pmaxs") || 719 Name.startswith("avx512.mask.pmaxs"))) { 720 Rep = upgradeIntMinMax(Builder, *CI, ICmpInst::ICMP_SGT); 721 } else if (IsX86 && (Name == "sse2.pmaxu.b" || 722 Name == "sse41.pmaxuw" || 723 Name == "sse41.pmaxud" || 724 Name.startswith("avx2.pmaxu") || 725 Name.startswith("avx512.mask.pmaxu"))) { 726 Rep = upgradeIntMinMax(Builder, *CI, ICmpInst::ICMP_UGT); 727 } else if (IsX86 && (Name == "sse41.pminsb" || 728 Name == "sse2.pmins.w" || 729 Name == "sse41.pminsd" || 730 Name.startswith("avx2.pmins") || 731 Name.startswith("avx512.mask.pmins"))) { 732 Rep = upgradeIntMinMax(Builder, *CI, ICmpInst::ICMP_SLT); 733 } else if (IsX86 && (Name == "sse2.pminu.b" || 734 Name == "sse41.pminuw" || 735 Name == "sse41.pminud" || 736 Name.startswith("avx2.pminu") || 737 Name.startswith("avx512.mask.pminu"))) { 738 Rep = upgradeIntMinMax(Builder, *CI, ICmpInst::ICMP_ULT); 739 } else if (IsX86 && (Name == "sse2.cvtdq2pd" || 740 Name == "sse2.cvtps2pd" || 741 Name == "avx.cvtdq2.pd.256" || 742 Name == "avx.cvt.ps2.pd.256")) { 743 // Lossless i32/float to double conversion. 744 // Extract the bottom elements if necessary and convert to double vector. 745 Value *Src = CI->getArgOperand(0); 746 VectorType *SrcTy = cast<VectorType>(Src->getType()); 747 VectorType *DstTy = cast<VectorType>(CI->getType()); 748 Rep = CI->getArgOperand(0); 749 750 unsigned NumDstElts = DstTy->getNumElements(); 751 if (NumDstElts < SrcTy->getNumElements()) { 752 assert(NumDstElts == 2 && "Unexpected vector size"); 753 uint32_t ShuffleMask[2] = { 0, 1 }; 754 Rep = Builder.CreateShuffleVector(Rep, UndefValue::get(SrcTy), 755 ShuffleMask); 756 } 757 758 bool Int2Double = (StringRef::npos != Name.find("cvtdq2")); 759 if (Int2Double) 760 Rep = Builder.CreateSIToFP(Rep, DstTy, "cvtdq2pd"); 761 else 762 Rep = Builder.CreateFPExt(Rep, DstTy, "cvtps2pd"); 763 } else if (IsX86 && Name.startswith("sse4a.movnt.")) { 764 Module *M = F->getParent(); 765 SmallVector<Metadata *, 1> Elts; 766 Elts.push_back( 767 ConstantAsMetadata::get(ConstantInt::get(Type::getInt32Ty(C), 1))); 768 MDNode *Node = MDNode::get(C, Elts); 769 770 Value *Arg0 = CI->getArgOperand(0); 771 Value *Arg1 = CI->getArgOperand(1); 772 773 // Nontemporal (unaligned) store of the 0'th element of the float/double 774 // vector. 775 Type *SrcEltTy = cast<VectorType>(Arg1->getType())->getElementType(); 776 PointerType *EltPtrTy = PointerType::getUnqual(SrcEltTy); 777 Value *Addr = Builder.CreateBitCast(Arg0, EltPtrTy, "cast"); 778 Value *Extract = 779 Builder.CreateExtractElement(Arg1, (uint64_t)0, "extractelement"); 780 781 StoreInst *SI = Builder.CreateAlignedStore(Extract, Addr, 1); 782 SI->setMetadata(M->getMDKindID("nontemporal"), Node); 783 784 // Remove intrinsic. 785 CI->eraseFromParent(); 786 return; 787 } else if (IsX86 && (Name.startswith("avx.movnt.") || 788 Name.startswith("avx512.storent."))) { 789 Module *M = F->getParent(); 790 SmallVector<Metadata *, 1> Elts; 791 Elts.push_back( 792 ConstantAsMetadata::get(ConstantInt::get(Type::getInt32Ty(C), 1))); 793 MDNode *Node = MDNode::get(C, Elts); 794 795 Value *Arg0 = CI->getArgOperand(0); 796 Value *Arg1 = CI->getArgOperand(1); 797 798 // Convert the type of the pointer to a pointer to the stored type. 799 Value *BC = Builder.CreateBitCast(Arg0, 800 PointerType::getUnqual(Arg1->getType()), 801 "cast"); 802 VectorType *VTy = cast<VectorType>(Arg1->getType()); 803 StoreInst *SI = Builder.CreateAlignedStore(Arg1, BC, 804 VTy->getBitWidth() / 8); 805 SI->setMetadata(M->getMDKindID("nontemporal"), Node); 806 807 // Remove intrinsic. 808 CI->eraseFromParent(); 809 return; 810 } else if (IsX86 && Name == "sse2.storel.dq") { 811 Value *Arg0 = CI->getArgOperand(0); 812 Value *Arg1 = CI->getArgOperand(1); 813 814 Type *NewVecTy = VectorType::get(Type::getInt64Ty(C), 2); 815 Value *BC0 = Builder.CreateBitCast(Arg1, NewVecTy, "cast"); 816 Value *Elt = Builder.CreateExtractElement(BC0, (uint64_t)0); 817 Value *BC = Builder.CreateBitCast(Arg0, 818 PointerType::getUnqual(Elt->getType()), 819 "cast"); 820 Builder.CreateAlignedStore(Elt, BC, 1); 821 822 // Remove intrinsic. 823 CI->eraseFromParent(); 824 return; 825 } else if (IsX86 && (Name.startswith("sse.storeu.") || 826 Name.startswith("sse2.storeu.") || 827 Name.startswith("avx.storeu."))) { 828 Value *Arg0 = CI->getArgOperand(0); 829 Value *Arg1 = CI->getArgOperand(1); 830 831 Arg0 = Builder.CreateBitCast(Arg0, 832 PointerType::getUnqual(Arg1->getType()), 833 "cast"); 834 Builder.CreateAlignedStore(Arg1, Arg0, 1); 835 836 // Remove intrinsic. 837 CI->eraseFromParent(); 838 return; 839 } else if (IsX86 && (Name.startswith("avx512.mask.storeu."))) { 840 UpgradeMaskedStore(Builder, CI->getArgOperand(0), CI->getArgOperand(1), 841 CI->getArgOperand(2), /*Aligned*/false); 842 843 // Remove intrinsic. 844 CI->eraseFromParent(); 845 return; 846 } else if (IsX86 && (Name.startswith("avx512.mask.store."))) { 847 UpgradeMaskedStore(Builder, CI->getArgOperand(0), CI->getArgOperand(1), 848 CI->getArgOperand(2), /*Aligned*/true); 849 850 // Remove intrinsic. 851 CI->eraseFromParent(); 852 return; 853 } else if (IsX86 && (Name.startswith("avx512.mask.loadu."))) { 854 Rep = UpgradeMaskedLoad(Builder, CI->getArgOperand(0), 855 CI->getArgOperand(1), CI->getArgOperand(2), 856 /*Aligned*/false); 857 } else if (IsX86 && (Name.startswith("avx512.mask.load."))) { 858 Rep = UpgradeMaskedLoad(Builder, CI->getArgOperand(0), 859 CI->getArgOperand(1),CI->getArgOperand(2), 860 /*Aligned*/true); 861 } else if (IsX86 && Name.startswith("xop.vpcom")) { 862 Intrinsic::ID intID; 863 if (Name.endswith("ub")) 864 intID = Intrinsic::x86_xop_vpcomub; 865 else if (Name.endswith("uw")) 866 intID = Intrinsic::x86_xop_vpcomuw; 867 else if (Name.endswith("ud")) 868 intID = Intrinsic::x86_xop_vpcomud; 869 else if (Name.endswith("uq")) 870 intID = Intrinsic::x86_xop_vpcomuq; 871 else if (Name.endswith("b")) 872 intID = Intrinsic::x86_xop_vpcomb; 873 else if (Name.endswith("w")) 874 intID = Intrinsic::x86_xop_vpcomw; 875 else if (Name.endswith("d")) 876 intID = Intrinsic::x86_xop_vpcomd; 877 else if (Name.endswith("q")) 878 intID = Intrinsic::x86_xop_vpcomq; 879 else 880 llvm_unreachable("Unknown suffix"); 881 882 Name = Name.substr(9); // strip off "xop.vpcom" 883 unsigned Imm; 884 if (Name.startswith("lt")) 885 Imm = 0; 886 else if (Name.startswith("le")) 887 Imm = 1; 888 else if (Name.startswith("gt")) 889 Imm = 2; 890 else if (Name.startswith("ge")) 891 Imm = 3; 892 else if (Name.startswith("eq")) 893 Imm = 4; 894 else if (Name.startswith("ne")) 895 Imm = 5; 896 else if (Name.startswith("false")) 897 Imm = 6; 898 else if (Name.startswith("true")) 899 Imm = 7; 900 else 901 llvm_unreachable("Unknown condition"); 902 903 Function *VPCOM = Intrinsic::getDeclaration(F->getParent(), intID); 904 Rep = 905 Builder.CreateCall(VPCOM, {CI->getArgOperand(0), CI->getArgOperand(1), 906 Builder.getInt8(Imm)}); 907 } else if (IsX86 && Name == "xop.vpcmov") { 908 Value *Arg0 = CI->getArgOperand(0); 909 Value *Arg1 = CI->getArgOperand(1); 910 Value *Sel = CI->getArgOperand(2); 911 unsigned NumElts = CI->getType()->getVectorNumElements(); 912 Constant *MinusOne = ConstantVector::getSplat(NumElts, Builder.getInt64(-1)); 913 Value *NotSel = Builder.CreateXor(Sel, MinusOne); 914 Value *Sel0 = Builder.CreateAnd(Arg0, Sel); 915 Value *Sel1 = Builder.CreateAnd(Arg1, NotSel); 916 Rep = Builder.CreateOr(Sel0, Sel1); 917 } else if (IsX86 && Name == "sse42.crc32.64.8") { 918 Function *CRC32 = Intrinsic::getDeclaration(F->getParent(), 919 Intrinsic::x86_sse42_crc32_32_8); 920 Value *Trunc0 = Builder.CreateTrunc(CI->getArgOperand(0), Type::getInt32Ty(C)); 921 Rep = Builder.CreateCall(CRC32, {Trunc0, CI->getArgOperand(1)}); 922 Rep = Builder.CreateZExt(Rep, CI->getType(), ""); 923 } else if (IsX86 && Name.startswith("avx.vbroadcast.s")) { 924 // Replace broadcasts with a series of insertelements. 925 Type *VecTy = CI->getType(); 926 Type *EltTy = VecTy->getVectorElementType(); 927 unsigned EltNum = VecTy->getVectorNumElements(); 928 Value *Cast = Builder.CreateBitCast(CI->getArgOperand(0), 929 EltTy->getPointerTo()); 930 Value *Load = Builder.CreateLoad(EltTy, Cast); 931 Type *I32Ty = Type::getInt32Ty(C); 932 Rep = UndefValue::get(VecTy); 933 for (unsigned I = 0; I < EltNum; ++I) 934 Rep = Builder.CreateInsertElement(Rep, Load, 935 ConstantInt::get(I32Ty, I)); 936 } else if (IsX86 && (Name.startswith("sse41.pmovsx") || 937 Name.startswith("sse41.pmovzx") || 938 Name.startswith("avx2.pmovsx") || 939 Name.startswith("avx2.pmovzx"))) { 940 VectorType *SrcTy = cast<VectorType>(CI->getArgOperand(0)->getType()); 941 VectorType *DstTy = cast<VectorType>(CI->getType()); 942 unsigned NumDstElts = DstTy->getNumElements(); 943 944 // Extract a subvector of the first NumDstElts lanes and sign/zero extend. 945 SmallVector<uint32_t, 8> ShuffleMask(NumDstElts); 946 for (unsigned i = 0; i != NumDstElts; ++i) 947 ShuffleMask[i] = i; 948 949 Value *SV = Builder.CreateShuffleVector( 950 CI->getArgOperand(0), UndefValue::get(SrcTy), ShuffleMask); 951 952 bool DoSext = (StringRef::npos != Name.find("pmovsx")); 953 Rep = DoSext ? Builder.CreateSExt(SV, DstTy) 954 : Builder.CreateZExt(SV, DstTy); 955 } else if (IsX86 && (Name.startswith("avx.vbroadcastf128") || 956 Name == "avx2.vbroadcasti128")) { 957 // Replace vbroadcastf128/vbroadcasti128 with a vector load+shuffle. 958 Type *EltTy = CI->getType()->getVectorElementType(); 959 unsigned NumSrcElts = 128 / EltTy->getPrimitiveSizeInBits(); 960 Type *VT = VectorType::get(EltTy, NumSrcElts); 961 Value *Op = Builder.CreatePointerCast(CI->getArgOperand(0), 962 PointerType::getUnqual(VT)); 963 Value *Load = Builder.CreateAlignedLoad(Op, 1); 964 if (NumSrcElts == 2) 965 Rep = Builder.CreateShuffleVector(Load, UndefValue::get(Load->getType()), 966 { 0, 1, 0, 1 }); 967 else 968 Rep = Builder.CreateShuffleVector(Load, UndefValue::get(Load->getType()), 969 { 0, 1, 2, 3, 0, 1, 2, 3 }); 970 } else if (IsX86 && (Name.startswith("avx2.pbroadcast") || 971 Name.startswith("avx2.vbroadcast") || 972 Name.startswith("avx512.pbroadcast") || 973 Name.startswith("avx512.mask.broadcast.s"))) { 974 // Replace vp?broadcasts with a vector shuffle. 975 Value *Op = CI->getArgOperand(0); 976 unsigned NumElts = CI->getType()->getVectorNumElements(); 977 Type *MaskTy = VectorType::get(Type::getInt32Ty(C), NumElts); 978 Rep = Builder.CreateShuffleVector(Op, UndefValue::get(Op->getType()), 979 Constant::getNullValue(MaskTy)); 980 981 if (CI->getNumArgOperands() == 3) 982 Rep = EmitX86Select(Builder, CI->getArgOperand(2), Rep, 983 CI->getArgOperand(1)); 984 } else if (IsX86 && Name.startswith("avx512.mask.palignr.")) { 985 Rep = UpgradeX86PALIGNRIntrinsics(Builder, CI->getArgOperand(0), 986 CI->getArgOperand(1), 987 CI->getArgOperand(2), 988 CI->getArgOperand(3), 989 CI->getArgOperand(4)); 990 } else if (IsX86 && (Name == "sse2.psll.dq" || 991 Name == "avx2.psll.dq")) { 992 // 128/256-bit shift left specified in bits. 993 unsigned Shift = cast<ConstantInt>(CI->getArgOperand(1))->getZExtValue(); 994 Rep = UpgradeX86PSLLDQIntrinsics(Builder, CI->getArgOperand(0), 995 Shift / 8); // Shift is in bits. 996 } else if (IsX86 && (Name == "sse2.psrl.dq" || 997 Name == "avx2.psrl.dq")) { 998 // 128/256-bit shift right specified in bits. 999 unsigned Shift = cast<ConstantInt>(CI->getArgOperand(1))->getZExtValue(); 1000 Rep = UpgradeX86PSRLDQIntrinsics(Builder, CI->getArgOperand(0), 1001 Shift / 8); // Shift is in bits. 1002 } else if (IsX86 && (Name == "sse2.psll.dq.bs" || 1003 Name == "avx2.psll.dq.bs" || 1004 Name == "avx512.psll.dq.512")) { 1005 // 128/256/512-bit shift left specified in bytes. 1006 unsigned Shift = cast<ConstantInt>(CI->getArgOperand(1))->getZExtValue(); 1007 Rep = UpgradeX86PSLLDQIntrinsics(Builder, CI->getArgOperand(0), Shift); 1008 } else if (IsX86 && (Name == "sse2.psrl.dq.bs" || 1009 Name == "avx2.psrl.dq.bs" || 1010 Name == "avx512.psrl.dq.512")) { 1011 // 128/256/512-bit shift right specified in bytes. 1012 unsigned Shift = cast<ConstantInt>(CI->getArgOperand(1))->getZExtValue(); 1013 Rep = UpgradeX86PSRLDQIntrinsics(Builder, CI->getArgOperand(0), Shift); 1014 } else if (IsX86 && (Name == "sse41.pblendw" || 1015 Name.startswith("sse41.blendp") || 1016 Name.startswith("avx.blend.p") || 1017 Name == "avx2.pblendw" || 1018 Name.startswith("avx2.pblendd."))) { 1019 Value *Op0 = CI->getArgOperand(0); 1020 Value *Op1 = CI->getArgOperand(1); 1021 unsigned Imm = cast <ConstantInt>(CI->getArgOperand(2))->getZExtValue(); 1022 VectorType *VecTy = cast<VectorType>(CI->getType()); 1023 unsigned NumElts = VecTy->getNumElements(); 1024 1025 SmallVector<uint32_t, 16> Idxs(NumElts); 1026 for (unsigned i = 0; i != NumElts; ++i) 1027 Idxs[i] = ((Imm >> (i%8)) & 1) ? i + NumElts : i; 1028 1029 Rep = Builder.CreateShuffleVector(Op0, Op1, Idxs); 1030 } else if (IsX86 && (Name.startswith("avx.vinsertf128.") || 1031 Name == "avx2.vinserti128")) { 1032 Value *Op0 = CI->getArgOperand(0); 1033 Value *Op1 = CI->getArgOperand(1); 1034 unsigned Imm = cast<ConstantInt>(CI->getArgOperand(2))->getZExtValue(); 1035 VectorType *VecTy = cast<VectorType>(CI->getType()); 1036 unsigned NumElts = VecTy->getNumElements(); 1037 1038 // Mask off the high bits of the immediate value; hardware ignores those. 1039 Imm = Imm & 1; 1040 1041 // Extend the second operand into a vector that is twice as big. 1042 Value *UndefV = UndefValue::get(Op1->getType()); 1043 SmallVector<uint32_t, 8> Idxs(NumElts); 1044 for (unsigned i = 0; i != NumElts; ++i) 1045 Idxs[i] = i; 1046 Rep = Builder.CreateShuffleVector(Op1, UndefV, Idxs); 1047 1048 // Insert the second operand into the first operand. 1049 1050 // Note that there is no guarantee that instruction lowering will actually 1051 // produce a vinsertf128 instruction for the created shuffles. In 1052 // particular, the 0 immediate case involves no lane changes, so it can 1053 // be handled as a blend. 1054 1055 // Example of shuffle mask for 32-bit elements: 1056 // Imm = 1 <i32 0, i32 1, i32 2, i32 3, i32 8, i32 9, i32 10, i32 11> 1057 // Imm = 0 <i32 8, i32 9, i32 10, i32 11, i32 4, i32 5, i32 6, i32 7 > 1058 1059 // The low half of the result is either the low half of the 1st operand 1060 // or the low half of the 2nd operand (the inserted vector). 1061 for (unsigned i = 0; i != NumElts / 2; ++i) 1062 Idxs[i] = Imm ? i : (i + NumElts); 1063 // The high half of the result is either the low half of the 2nd operand 1064 // (the inserted vector) or the high half of the 1st operand. 1065 for (unsigned i = NumElts / 2; i != NumElts; ++i) 1066 Idxs[i] = Imm ? (i + NumElts / 2) : i; 1067 Rep = Builder.CreateShuffleVector(Op0, Rep, Idxs); 1068 } else if (IsX86 && (Name.startswith("avx.vextractf128.") || 1069 Name == "avx2.vextracti128")) { 1070 Value *Op0 = CI->getArgOperand(0); 1071 unsigned Imm = cast<ConstantInt>(CI->getArgOperand(1))->getZExtValue(); 1072 VectorType *VecTy = cast<VectorType>(CI->getType()); 1073 unsigned NumElts = VecTy->getNumElements(); 1074 1075 // Mask off the high bits of the immediate value; hardware ignores those. 1076 Imm = Imm & 1; 1077 1078 // Get indexes for either the high half or low half of the input vector. 1079 SmallVector<uint32_t, 4> Idxs(NumElts); 1080 for (unsigned i = 0; i != NumElts; ++i) { 1081 Idxs[i] = Imm ? (i + NumElts) : i; 1082 } 1083 1084 Value *UndefV = UndefValue::get(Op0->getType()); 1085 Rep = Builder.CreateShuffleVector(Op0, UndefV, Idxs); 1086 } else if (!IsX86 && Name == "stackprotectorcheck") { 1087 Rep = nullptr; 1088 } else if (IsX86 && (Name.startswith("avx512.mask.perm.df.") || 1089 Name.startswith("avx512.mask.perm.di."))) { 1090 Value *Op0 = CI->getArgOperand(0); 1091 unsigned Imm = cast<ConstantInt>(CI->getArgOperand(1))->getZExtValue(); 1092 VectorType *VecTy = cast<VectorType>(CI->getType()); 1093 unsigned NumElts = VecTy->getNumElements(); 1094 1095 SmallVector<uint32_t, 8> Idxs(NumElts); 1096 for (unsigned i = 0; i != NumElts; ++i) 1097 Idxs[i] = (i & ~0x3) + ((Imm >> (2 * (i & 0x3))) & 3); 1098 1099 Rep = Builder.CreateShuffleVector(Op0, Op0, Idxs); 1100 1101 if (CI->getNumArgOperands() == 4) 1102 Rep = EmitX86Select(Builder, CI->getArgOperand(3), Rep, 1103 CI->getArgOperand(2)); 1104 } else if (IsX86 && (Name.startswith("avx.vpermil.") || 1105 Name == "sse2.pshuf.d" || 1106 Name.startswith("avx512.mask.vpermil.p") || 1107 Name.startswith("avx512.mask.pshuf.d."))) { 1108 Value *Op0 = CI->getArgOperand(0); 1109 unsigned Imm = cast<ConstantInt>(CI->getArgOperand(1))->getZExtValue(); 1110 VectorType *VecTy = cast<VectorType>(CI->getType()); 1111 unsigned NumElts = VecTy->getNumElements(); 1112 // Calculate the size of each index in the immediate. 1113 unsigned IdxSize = 64 / VecTy->getScalarSizeInBits(); 1114 unsigned IdxMask = ((1 << IdxSize) - 1); 1115 1116 SmallVector<uint32_t, 8> Idxs(NumElts); 1117 // Lookup the bits for this element, wrapping around the immediate every 1118 // 8-bits. Elements are grouped into sets of 2 or 4 elements so we need 1119 // to offset by the first index of each group. 1120 for (unsigned i = 0; i != NumElts; ++i) 1121 Idxs[i] = ((Imm >> ((i * IdxSize) % 8)) & IdxMask) | (i & ~IdxMask); 1122 1123 Rep = Builder.CreateShuffleVector(Op0, Op0, Idxs); 1124 1125 if (CI->getNumArgOperands() == 4) 1126 Rep = EmitX86Select(Builder, CI->getArgOperand(3), Rep, 1127 CI->getArgOperand(2)); 1128 } else if (IsX86 && (Name == "sse2.pshufl.w" || 1129 Name.startswith("avx512.mask.pshufl.w."))) { 1130 Value *Op0 = CI->getArgOperand(0); 1131 unsigned Imm = cast<ConstantInt>(CI->getArgOperand(1))->getZExtValue(); 1132 unsigned NumElts = CI->getType()->getVectorNumElements(); 1133 1134 SmallVector<uint32_t, 16> Idxs(NumElts); 1135 for (unsigned l = 0; l != NumElts; l += 8) { 1136 for (unsigned i = 0; i != 4; ++i) 1137 Idxs[i + l] = ((Imm >> (2 * i)) & 0x3) + l; 1138 for (unsigned i = 4; i != 8; ++i) 1139 Idxs[i + l] = i + l; 1140 } 1141 1142 Rep = Builder.CreateShuffleVector(Op0, Op0, Idxs); 1143 1144 if (CI->getNumArgOperands() == 4) 1145 Rep = EmitX86Select(Builder, CI->getArgOperand(3), Rep, 1146 CI->getArgOperand(2)); 1147 } else if (IsX86 && (Name == "sse2.pshufh.w" || 1148 Name.startswith("avx512.mask.pshufh.w."))) { 1149 Value *Op0 = CI->getArgOperand(0); 1150 unsigned Imm = cast<ConstantInt>(CI->getArgOperand(1))->getZExtValue(); 1151 unsigned NumElts = CI->getType()->getVectorNumElements(); 1152 1153 SmallVector<uint32_t, 16> Idxs(NumElts); 1154 for (unsigned l = 0; l != NumElts; l += 8) { 1155 for (unsigned i = 0; i != 4; ++i) 1156 Idxs[i + l] = i + l; 1157 for (unsigned i = 0; i != 4; ++i) 1158 Idxs[i + l + 4] = ((Imm >> (2 * i)) & 0x3) + 4 + l; 1159 } 1160 1161 Rep = Builder.CreateShuffleVector(Op0, Op0, Idxs); 1162 1163 if (CI->getNumArgOperands() == 4) 1164 Rep = EmitX86Select(Builder, CI->getArgOperand(3), Rep, 1165 CI->getArgOperand(2)); 1166 } else if (IsX86 && Name.startswith("avx512.mask.shuf.p")) { 1167 Value *Op0 = CI->getArgOperand(0); 1168 Value *Op1 = CI->getArgOperand(1); 1169 unsigned Imm = cast<ConstantInt>(CI->getArgOperand(2))->getZExtValue(); 1170 unsigned NumElts = CI->getType()->getVectorNumElements(); 1171 1172 unsigned NumLaneElts = 128/CI->getType()->getScalarSizeInBits(); 1173 unsigned HalfLaneElts = NumLaneElts / 2; 1174 1175 SmallVector<uint32_t, 16> Idxs(NumElts); 1176 for (unsigned i = 0; i != NumElts; ++i) { 1177 // Base index is the starting element of the lane. 1178 Idxs[i] = i - (i % NumLaneElts); 1179 // If we are half way through the lane switch to the other source. 1180 if ((i % NumLaneElts) >= HalfLaneElts) 1181 Idxs[i] += NumElts; 1182 // Now select the specific element. By adding HalfLaneElts bits from 1183 // the immediate. Wrapping around the immediate every 8-bits. 1184 Idxs[i] += (Imm >> ((i * HalfLaneElts) % 8)) & ((1 << HalfLaneElts) - 1); 1185 } 1186 1187 Rep = Builder.CreateShuffleVector(Op0, Op1, Idxs); 1188 1189 Rep = EmitX86Select(Builder, CI->getArgOperand(4), Rep, 1190 CI->getArgOperand(3)); 1191 } else if (IsX86 && (Name.startswith("avx512.mask.movddup") || 1192 Name.startswith("avx512.mask.movshdup") || 1193 Name.startswith("avx512.mask.movsldup"))) { 1194 Value *Op0 = CI->getArgOperand(0); 1195 unsigned NumElts = CI->getType()->getVectorNumElements(); 1196 unsigned NumLaneElts = 128/CI->getType()->getScalarSizeInBits(); 1197 1198 unsigned Offset = 0; 1199 if (Name.startswith("avx512.mask.movshdup.")) 1200 Offset = 1; 1201 1202 SmallVector<uint32_t, 16> Idxs(NumElts); 1203 for (unsigned l = 0; l != NumElts; l += NumLaneElts) 1204 for (unsigned i = 0; i != NumLaneElts; i += 2) { 1205 Idxs[i + l + 0] = i + l + Offset; 1206 Idxs[i + l + 1] = i + l + Offset; 1207 } 1208 1209 Rep = Builder.CreateShuffleVector(Op0, Op0, Idxs); 1210 1211 Rep = EmitX86Select(Builder, CI->getArgOperand(2), Rep, 1212 CI->getArgOperand(1)); 1213 } else if (IsX86 && (Name.startswith("avx512.mask.punpckl") || 1214 Name.startswith("avx512.mask.unpckl."))) { 1215 Value *Op0 = CI->getArgOperand(0); 1216 Value *Op1 = CI->getArgOperand(1); 1217 int NumElts = CI->getType()->getVectorNumElements(); 1218 int NumLaneElts = 128/CI->getType()->getScalarSizeInBits(); 1219 1220 SmallVector<uint32_t, 64> Idxs(NumElts); 1221 for (int l = 0; l != NumElts; l += NumLaneElts) 1222 for (int i = 0; i != NumLaneElts; ++i) 1223 Idxs[i + l] = l + (i / 2) + NumElts * (i % 2); 1224 1225 Rep = Builder.CreateShuffleVector(Op0, Op1, Idxs); 1226 1227 Rep = EmitX86Select(Builder, CI->getArgOperand(3), Rep, 1228 CI->getArgOperand(2)); 1229 } else if (IsX86 && (Name.startswith("avx512.mask.punpckh") || 1230 Name.startswith("avx512.mask.unpckh."))) { 1231 Value *Op0 = CI->getArgOperand(0); 1232 Value *Op1 = CI->getArgOperand(1); 1233 int NumElts = CI->getType()->getVectorNumElements(); 1234 int NumLaneElts = 128/CI->getType()->getScalarSizeInBits(); 1235 1236 SmallVector<uint32_t, 64> Idxs(NumElts); 1237 for (int l = 0; l != NumElts; l += NumLaneElts) 1238 for (int i = 0; i != NumLaneElts; ++i) 1239 Idxs[i + l] = (NumLaneElts / 2) + l + (i / 2) + NumElts * (i % 2); 1240 1241 Rep = Builder.CreateShuffleVector(Op0, Op1, Idxs); 1242 1243 Rep = EmitX86Select(Builder, CI->getArgOperand(3), Rep, 1244 CI->getArgOperand(2)); 1245 } else if (IsX86 && Name.startswith("avx512.mask.pand.")) { 1246 Rep = Builder.CreateAnd(CI->getArgOperand(0), CI->getArgOperand(1)); 1247 Rep = EmitX86Select(Builder, CI->getArgOperand(3), Rep, 1248 CI->getArgOperand(2)); 1249 } else if (IsX86 && Name.startswith("avx512.mask.pandn.")) { 1250 Rep = Builder.CreateAnd(Builder.CreateNot(CI->getArgOperand(0)), 1251 CI->getArgOperand(1)); 1252 Rep = EmitX86Select(Builder, CI->getArgOperand(3), Rep, 1253 CI->getArgOperand(2)); 1254 } else if (IsX86 && Name.startswith("avx512.mask.por.")) { 1255 Rep = Builder.CreateOr(CI->getArgOperand(0), CI->getArgOperand(1)); 1256 Rep = EmitX86Select(Builder, CI->getArgOperand(3), Rep, 1257 CI->getArgOperand(2)); 1258 } else if (IsX86 && Name.startswith("avx512.mask.pxor.")) { 1259 Rep = Builder.CreateXor(CI->getArgOperand(0), CI->getArgOperand(1)); 1260 Rep = EmitX86Select(Builder, CI->getArgOperand(3), Rep, 1261 CI->getArgOperand(2)); 1262 } else if (IsX86 && Name.startswith("avx512.mask.and.")) { 1263 VectorType *FTy = cast<VectorType>(CI->getType()); 1264 VectorType *ITy = VectorType::getInteger(FTy); 1265 Rep = Builder.CreateAnd(Builder.CreateBitCast(CI->getArgOperand(0), ITy), 1266 Builder.CreateBitCast(CI->getArgOperand(1), ITy)); 1267 Rep = Builder.CreateBitCast(Rep, FTy); 1268 Rep = EmitX86Select(Builder, CI->getArgOperand(3), Rep, 1269 CI->getArgOperand(2)); 1270 } else if (IsX86 && Name.startswith("avx512.mask.andn.")) { 1271 VectorType *FTy = cast<VectorType>(CI->getType()); 1272 VectorType *ITy = VectorType::getInteger(FTy); 1273 Rep = Builder.CreateNot(Builder.CreateBitCast(CI->getArgOperand(0), ITy)); 1274 Rep = Builder.CreateAnd(Rep, 1275 Builder.CreateBitCast(CI->getArgOperand(1), ITy)); 1276 Rep = Builder.CreateBitCast(Rep, FTy); 1277 Rep = EmitX86Select(Builder, CI->getArgOperand(3), Rep, 1278 CI->getArgOperand(2)); 1279 } else if (IsX86 && Name.startswith("avx512.mask.or.")) { 1280 VectorType *FTy = cast<VectorType>(CI->getType()); 1281 VectorType *ITy = VectorType::getInteger(FTy); 1282 Rep = Builder.CreateOr(Builder.CreateBitCast(CI->getArgOperand(0), ITy), 1283 Builder.CreateBitCast(CI->getArgOperand(1), ITy)); 1284 Rep = Builder.CreateBitCast(Rep, FTy); 1285 Rep = EmitX86Select(Builder, CI->getArgOperand(3), Rep, 1286 CI->getArgOperand(2)); 1287 } else if (IsX86 && Name.startswith("avx512.mask.xor.")) { 1288 VectorType *FTy = cast<VectorType>(CI->getType()); 1289 VectorType *ITy = VectorType::getInteger(FTy); 1290 Rep = Builder.CreateXor(Builder.CreateBitCast(CI->getArgOperand(0), ITy), 1291 Builder.CreateBitCast(CI->getArgOperand(1), ITy)); 1292 Rep = Builder.CreateBitCast(Rep, FTy); 1293 Rep = EmitX86Select(Builder, CI->getArgOperand(3), Rep, 1294 CI->getArgOperand(2)); 1295 } else if (IsX86 && Name.startswith("avx512.mask.padd.")) { 1296 Rep = Builder.CreateAdd(CI->getArgOperand(0), CI->getArgOperand(1)); 1297 Rep = EmitX86Select(Builder, CI->getArgOperand(3), Rep, 1298 CI->getArgOperand(2)); 1299 } else if (IsX86 && Name.startswith("avx512.mask.psub.")) { 1300 Rep = Builder.CreateSub(CI->getArgOperand(0), CI->getArgOperand(1)); 1301 Rep = EmitX86Select(Builder, CI->getArgOperand(3), Rep, 1302 CI->getArgOperand(2)); 1303 } else if (IsX86 && Name.startswith("avx512.mask.pmull.")) { 1304 Rep = Builder.CreateMul(CI->getArgOperand(0), CI->getArgOperand(1)); 1305 Rep = EmitX86Select(Builder, CI->getArgOperand(3), Rep, 1306 CI->getArgOperand(2)); 1307 } else if (IsX86 && (Name.startswith("avx512.mask.add.pd.128") || 1308 Name.startswith("avx512.mask.add.pd.256") || 1309 Name.startswith("avx512.mask.add.ps.128") || 1310 Name.startswith("avx512.mask.add.ps.256"))) { 1311 Rep = Builder.CreateFAdd(CI->getArgOperand(0), CI->getArgOperand(1)); 1312 Rep = EmitX86Select(Builder, CI->getArgOperand(3), Rep, 1313 CI->getArgOperand(2)); 1314 } else if (IsX86 && (Name.startswith("avx512.mask.div.pd.128") || 1315 Name.startswith("avx512.mask.div.pd.256") || 1316 Name.startswith("avx512.mask.div.ps.128") || 1317 Name.startswith("avx512.mask.div.ps.256"))) { 1318 Rep = Builder.CreateFDiv(CI->getArgOperand(0), CI->getArgOperand(1)); 1319 Rep = EmitX86Select(Builder, CI->getArgOperand(3), Rep, 1320 CI->getArgOperand(2)); 1321 } else if (IsX86 && (Name.startswith("avx512.mask.mul.pd.128") || 1322 Name.startswith("avx512.mask.mul.pd.256") || 1323 Name.startswith("avx512.mask.mul.ps.128") || 1324 Name.startswith("avx512.mask.mul.ps.256"))) { 1325 Rep = Builder.CreateFMul(CI->getArgOperand(0), CI->getArgOperand(1)); 1326 Rep = EmitX86Select(Builder, CI->getArgOperand(3), Rep, 1327 CI->getArgOperand(2)); 1328 } else if (IsX86 && (Name.startswith("avx512.mask.sub.pd.128") || 1329 Name.startswith("avx512.mask.sub.pd.256") || 1330 Name.startswith("avx512.mask.sub.ps.128") || 1331 Name.startswith("avx512.mask.sub.ps.256"))) { 1332 Rep = Builder.CreateFSub(CI->getArgOperand(0), CI->getArgOperand(1)); 1333 Rep = EmitX86Select(Builder, CI->getArgOperand(3), Rep, 1334 CI->getArgOperand(2)); 1335 } else { 1336 llvm_unreachable("Unknown function for CallInst upgrade."); 1337 } 1338 1339 if (Rep) 1340 CI->replaceAllUsesWith(Rep); 1341 CI->eraseFromParent(); 1342 return; 1343 } 1344 1345 std::string Name = CI->getName(); 1346 if (!Name.empty()) 1347 CI->setName(Name + ".old"); 1348 1349 switch (NewFn->getIntrinsicID()) { 1350 default: 1351 llvm_unreachable("Unknown function for CallInst upgrade."); 1352 1353 case Intrinsic::x86_avx512_mask_psll_di_512: 1354 case Intrinsic::x86_avx512_mask_psra_di_512: 1355 case Intrinsic::x86_avx512_mask_psrl_di_512: 1356 case Intrinsic::x86_avx512_mask_psll_qi_512: 1357 case Intrinsic::x86_avx512_mask_psra_qi_512: 1358 case Intrinsic::x86_avx512_mask_psrl_qi_512: 1359 case Intrinsic::arm_neon_vld1: 1360 case Intrinsic::arm_neon_vld2: 1361 case Intrinsic::arm_neon_vld3: 1362 case Intrinsic::arm_neon_vld4: 1363 case Intrinsic::arm_neon_vld2lane: 1364 case Intrinsic::arm_neon_vld3lane: 1365 case Intrinsic::arm_neon_vld4lane: 1366 case Intrinsic::arm_neon_vst1: 1367 case Intrinsic::arm_neon_vst2: 1368 case Intrinsic::arm_neon_vst3: 1369 case Intrinsic::arm_neon_vst4: 1370 case Intrinsic::arm_neon_vst2lane: 1371 case Intrinsic::arm_neon_vst3lane: 1372 case Intrinsic::arm_neon_vst4lane: { 1373 SmallVector<Value *, 4> Args(CI->arg_operands().begin(), 1374 CI->arg_operands().end()); 1375 CI->replaceAllUsesWith(Builder.CreateCall(NewFn, Args)); 1376 CI->eraseFromParent(); 1377 return; 1378 } 1379 1380 case Intrinsic::ctlz: 1381 case Intrinsic::cttz: 1382 assert(CI->getNumArgOperands() == 1 && 1383 "Mismatch between function args and call args"); 1384 CI->replaceAllUsesWith(Builder.CreateCall( 1385 NewFn, {CI->getArgOperand(0), Builder.getFalse()}, Name)); 1386 CI->eraseFromParent(); 1387 return; 1388 1389 case Intrinsic::objectsize: 1390 CI->replaceAllUsesWith(Builder.CreateCall( 1391 NewFn, {CI->getArgOperand(0), CI->getArgOperand(1)}, Name)); 1392 CI->eraseFromParent(); 1393 return; 1394 1395 case Intrinsic::ctpop: { 1396 CI->replaceAllUsesWith(Builder.CreateCall(NewFn, {CI->getArgOperand(0)})); 1397 CI->eraseFromParent(); 1398 return; 1399 } 1400 1401 case Intrinsic::x86_xop_vfrcz_ss: 1402 case Intrinsic::x86_xop_vfrcz_sd: 1403 CI->replaceAllUsesWith( 1404 Builder.CreateCall(NewFn, {CI->getArgOperand(1)}, Name)); 1405 CI->eraseFromParent(); 1406 return; 1407 1408 case Intrinsic::x86_xop_vpermil2pd: 1409 case Intrinsic::x86_xop_vpermil2ps: 1410 case Intrinsic::x86_xop_vpermil2pd_256: 1411 case Intrinsic::x86_xop_vpermil2ps_256: { 1412 SmallVector<Value *, 4> Args(CI->arg_operands().begin(), 1413 CI->arg_operands().end()); 1414 VectorType *FltIdxTy = cast<VectorType>(Args[2]->getType()); 1415 VectorType *IntIdxTy = VectorType::getInteger(FltIdxTy); 1416 Args[2] = Builder.CreateBitCast(Args[2], IntIdxTy); 1417 CI->replaceAllUsesWith(Builder.CreateCall(NewFn, Args, Name)); 1418 CI->eraseFromParent(); 1419 return; 1420 } 1421 1422 case Intrinsic::x86_sse41_ptestc: 1423 case Intrinsic::x86_sse41_ptestz: 1424 case Intrinsic::x86_sse41_ptestnzc: { 1425 // The arguments for these intrinsics used to be v4f32, and changed 1426 // to v2i64. This is purely a nop, since those are bitwise intrinsics. 1427 // So, the only thing required is a bitcast for both arguments. 1428 // First, check the arguments have the old type. 1429 Value *Arg0 = CI->getArgOperand(0); 1430 if (Arg0->getType() != VectorType::get(Type::getFloatTy(C), 4)) 1431 return; 1432 1433 // Old intrinsic, add bitcasts 1434 Value *Arg1 = CI->getArgOperand(1); 1435 1436 Type *NewVecTy = VectorType::get(Type::getInt64Ty(C), 2); 1437 1438 Value *BC0 = Builder.CreateBitCast(Arg0, NewVecTy, "cast"); 1439 Value *BC1 = Builder.CreateBitCast(Arg1, NewVecTy, "cast"); 1440 1441 CallInst *NewCall = Builder.CreateCall(NewFn, {BC0, BC1}, Name); 1442 CI->replaceAllUsesWith(NewCall); 1443 CI->eraseFromParent(); 1444 return; 1445 } 1446 1447 case Intrinsic::x86_sse41_insertps: 1448 case Intrinsic::x86_sse41_dppd: 1449 case Intrinsic::x86_sse41_dpps: 1450 case Intrinsic::x86_sse41_mpsadbw: 1451 case Intrinsic::x86_avx_dp_ps_256: 1452 case Intrinsic::x86_avx2_mpsadbw: { 1453 // Need to truncate the last argument from i32 to i8 -- this argument models 1454 // an inherently 8-bit immediate operand to these x86 instructions. 1455 SmallVector<Value *, 4> Args(CI->arg_operands().begin(), 1456 CI->arg_operands().end()); 1457 1458 // Replace the last argument with a trunc. 1459 Args.back() = Builder.CreateTrunc(Args.back(), Type::getInt8Ty(C), "trunc"); 1460 1461 CallInst *NewCall = Builder.CreateCall(NewFn, Args); 1462 CI->replaceAllUsesWith(NewCall); 1463 CI->eraseFromParent(); 1464 return; 1465 } 1466 1467 case Intrinsic::thread_pointer: { 1468 CI->replaceAllUsesWith(Builder.CreateCall(NewFn, {})); 1469 CI->eraseFromParent(); 1470 return; 1471 } 1472 1473 case Intrinsic::invariant_start: 1474 case Intrinsic::invariant_end: 1475 case Intrinsic::masked_load: 1476 case Intrinsic::masked_store: { 1477 SmallVector<Value *, 4> Args(CI->arg_operands().begin(), 1478 CI->arg_operands().end()); 1479 CI->replaceAllUsesWith(Builder.CreateCall(NewFn, Args)); 1480 CI->eraseFromParent(); 1481 return; 1482 } 1483 } 1484 } 1485 1486 void llvm::UpgradeCallsToIntrinsic(Function *F) { 1487 assert(F && "Illegal attempt to upgrade a non-existent intrinsic."); 1488 1489 // Check if this function should be upgraded and get the replacement function 1490 // if there is one. 1491 Function *NewFn; 1492 if (UpgradeIntrinsicFunction(F, NewFn)) { 1493 // Replace all users of the old function with the new function or new 1494 // instructions. This is not a range loop because the call is deleted. 1495 for (auto UI = F->user_begin(), UE = F->user_end(); UI != UE; ) 1496 if (CallInst *CI = dyn_cast<CallInst>(*UI++)) 1497 UpgradeIntrinsicCall(CI, NewFn); 1498 1499 // Remove old function, no longer used, from the module. 1500 F->eraseFromParent(); 1501 } 1502 } 1503 1504 MDNode *llvm::UpgradeTBAANode(MDNode &MD) { 1505 // Check if the tag uses struct-path aware TBAA format. 1506 if (isa<MDNode>(MD.getOperand(0)) && MD.getNumOperands() >= 3) 1507 return &MD; 1508 1509 auto &Context = MD.getContext(); 1510 if (MD.getNumOperands() == 3) { 1511 Metadata *Elts[] = {MD.getOperand(0), MD.getOperand(1)}; 1512 MDNode *ScalarType = MDNode::get(Context, Elts); 1513 // Create a MDNode <ScalarType, ScalarType, offset 0, const> 1514 Metadata *Elts2[] = {ScalarType, ScalarType, 1515 ConstantAsMetadata::get( 1516 Constant::getNullValue(Type::getInt64Ty(Context))), 1517 MD.getOperand(2)}; 1518 return MDNode::get(Context, Elts2); 1519 } 1520 // Create a MDNode <MD, MD, offset 0> 1521 Metadata *Elts[] = {&MD, &MD, ConstantAsMetadata::get(Constant::getNullValue( 1522 Type::getInt64Ty(Context)))}; 1523 return MDNode::get(Context, Elts); 1524 } 1525 1526 Instruction *llvm::UpgradeBitCastInst(unsigned Opc, Value *V, Type *DestTy, 1527 Instruction *&Temp) { 1528 if (Opc != Instruction::BitCast) 1529 return nullptr; 1530 1531 Temp = nullptr; 1532 Type *SrcTy = V->getType(); 1533 if (SrcTy->isPtrOrPtrVectorTy() && DestTy->isPtrOrPtrVectorTy() && 1534 SrcTy->getPointerAddressSpace() != DestTy->getPointerAddressSpace()) { 1535 LLVMContext &Context = V->getContext(); 1536 1537 // We have no information about target data layout, so we assume that 1538 // the maximum pointer size is 64bit. 1539 Type *MidTy = Type::getInt64Ty(Context); 1540 Temp = CastInst::Create(Instruction::PtrToInt, V, MidTy); 1541 1542 return CastInst::Create(Instruction::IntToPtr, Temp, DestTy); 1543 } 1544 1545 return nullptr; 1546 } 1547 1548 Value *llvm::UpgradeBitCastExpr(unsigned Opc, Constant *C, Type *DestTy) { 1549 if (Opc != Instruction::BitCast) 1550 return nullptr; 1551 1552 Type *SrcTy = C->getType(); 1553 if (SrcTy->isPtrOrPtrVectorTy() && DestTy->isPtrOrPtrVectorTy() && 1554 SrcTy->getPointerAddressSpace() != DestTy->getPointerAddressSpace()) { 1555 LLVMContext &Context = C->getContext(); 1556 1557 // We have no information about target data layout, so we assume that 1558 // the maximum pointer size is 64bit. 1559 Type *MidTy = Type::getInt64Ty(Context); 1560 1561 return ConstantExpr::getIntToPtr(ConstantExpr::getPtrToInt(C, MidTy), 1562 DestTy); 1563 } 1564 1565 return nullptr; 1566 } 1567 1568 /// Check the debug info version number, if it is out-dated, drop the debug 1569 /// info. Return true if module is modified. 1570 bool llvm::UpgradeDebugInfo(Module &M) { 1571 unsigned Version = getDebugMetadataVersionFromModule(M); 1572 if (Version == DEBUG_METADATA_VERSION) 1573 return false; 1574 1575 bool RetCode = StripDebugInfo(M); 1576 if (RetCode) { 1577 DiagnosticInfoDebugMetadataVersion DiagVersion(M, Version); 1578 M.getContext().diagnose(DiagVersion); 1579 } 1580 return RetCode; 1581 } 1582 1583 bool llvm::UpgradeModuleFlags(Module &M) { 1584 const NamedMDNode *ModFlags = M.getModuleFlagsMetadata(); 1585 if (!ModFlags) 1586 return false; 1587 1588 bool HasObjCFlag = false, HasClassProperties = false; 1589 for (unsigned I = 0, E = ModFlags->getNumOperands(); I != E; ++I) { 1590 MDNode *Op = ModFlags->getOperand(I); 1591 if (Op->getNumOperands() < 2) 1592 continue; 1593 MDString *ID = dyn_cast_or_null<MDString>(Op->getOperand(1)); 1594 if (!ID) 1595 continue; 1596 if (ID->getString() == "Objective-C Image Info Version") 1597 HasObjCFlag = true; 1598 if (ID->getString() == "Objective-C Class Properties") 1599 HasClassProperties = true; 1600 } 1601 // "Objective-C Class Properties" is recently added for Objective-C. We 1602 // upgrade ObjC bitcodes to contain a "Objective-C Class Properties" module 1603 // flag of value 0, so we can correclty downgrade this flag when trying to 1604 // link an ObjC bitcode without this module flag with an ObjC bitcode with 1605 // this module flag. 1606 if (HasObjCFlag && !HasClassProperties) { 1607 M.addModuleFlag(llvm::Module::Override, "Objective-C Class Properties", 1608 (uint32_t)0); 1609 return true; 1610 } 1611 return false; 1612 } 1613 1614 static bool isOldLoopArgument(Metadata *MD) { 1615 auto *T = dyn_cast_or_null<MDTuple>(MD); 1616 if (!T) 1617 return false; 1618 if (T->getNumOperands() < 1) 1619 return false; 1620 auto *S = dyn_cast_or_null<MDString>(T->getOperand(0)); 1621 if (!S) 1622 return false; 1623 return S->getString().startswith("llvm.vectorizer."); 1624 } 1625 1626 static MDString *upgradeLoopTag(LLVMContext &C, StringRef OldTag) { 1627 StringRef OldPrefix = "llvm.vectorizer."; 1628 assert(OldTag.startswith(OldPrefix) && "Expected old prefix"); 1629 1630 if (OldTag == "llvm.vectorizer.unroll") 1631 return MDString::get(C, "llvm.loop.interleave.count"); 1632 1633 return MDString::get( 1634 C, (Twine("llvm.loop.vectorize.") + OldTag.drop_front(OldPrefix.size())) 1635 .str()); 1636 } 1637 1638 static Metadata *upgradeLoopArgument(Metadata *MD) { 1639 auto *T = dyn_cast_or_null<MDTuple>(MD); 1640 if (!T) 1641 return MD; 1642 if (T->getNumOperands() < 1) 1643 return MD; 1644 auto *OldTag = dyn_cast_or_null<MDString>(T->getOperand(0)); 1645 if (!OldTag) 1646 return MD; 1647 if (!OldTag->getString().startswith("llvm.vectorizer.")) 1648 return MD; 1649 1650 // This has an old tag. Upgrade it. 1651 SmallVector<Metadata *, 8> Ops; 1652 Ops.reserve(T->getNumOperands()); 1653 Ops.push_back(upgradeLoopTag(T->getContext(), OldTag->getString())); 1654 for (unsigned I = 1, E = T->getNumOperands(); I != E; ++I) 1655 Ops.push_back(T->getOperand(I)); 1656 1657 return MDTuple::get(T->getContext(), Ops); 1658 } 1659 1660 MDNode *llvm::upgradeInstructionLoopAttachment(MDNode &N) { 1661 auto *T = dyn_cast<MDTuple>(&N); 1662 if (!T) 1663 return &N; 1664 1665 if (none_of(T->operands(), isOldLoopArgument)) 1666 return &N; 1667 1668 SmallVector<Metadata *, 8> Ops; 1669 Ops.reserve(T->getNumOperands()); 1670 for (Metadata *MD : T->operands()) 1671 Ops.push_back(upgradeLoopArgument(MD)); 1672 1673 return MDTuple::get(T->getContext(), Ops); 1674 } 1675