1 //===-- AutoUpgrade.cpp - Implement auto-upgrade helper functions ---------===// 2 // 3 // The LLVM Compiler Infrastructure 4 // 5 // This file is distributed under the University of Illinois Open Source 6 // License. See LICENSE.TXT for details. 7 // 8 //===----------------------------------------------------------------------===// 9 // 10 // This file implements the auto-upgrade helper functions. 11 // This is where deprecated IR intrinsics and other IR features are updated to 12 // current specifications. 13 // 14 //===----------------------------------------------------------------------===// 15 16 #include "llvm/IR/AutoUpgrade.h" 17 #include "llvm/IR/CFG.h" 18 #include "llvm/IR/CallSite.h" 19 #include "llvm/IR/Constants.h" 20 #include "llvm/IR/DIBuilder.h" 21 #include "llvm/IR/DebugInfo.h" 22 #include "llvm/IR/DiagnosticInfo.h" 23 #include "llvm/IR/Function.h" 24 #include "llvm/IR/IRBuilder.h" 25 #include "llvm/IR/Instruction.h" 26 #include "llvm/IR/IntrinsicInst.h" 27 #include "llvm/IR/LLVMContext.h" 28 #include "llvm/IR/Module.h" 29 #include "llvm/Support/ErrorHandling.h" 30 #include "llvm/Support/Regex.h" 31 #include <cstring> 32 using namespace llvm; 33 34 // Upgrade the declarations of the SSE4.1 functions whose arguments have 35 // changed their type from v4f32 to v2i64. 36 static bool UpgradeSSE41Function(Function* F, Intrinsic::ID IID, 37 Function *&NewFn) { 38 // Check whether this is an old version of the function, which received 39 // v4f32 arguments. 40 Type *Arg0Type = F->getFunctionType()->getParamType(0); 41 if (Arg0Type != VectorType::get(Type::getFloatTy(F->getContext()), 4)) 42 return false; 43 44 // Yes, it's old, replace it with new version. 45 F->setName(F->getName() + ".old"); 46 NewFn = Intrinsic::getDeclaration(F->getParent(), IID); 47 return true; 48 } 49 50 // Upgrade the declarations of intrinsic functions whose 8-bit immediate mask 51 // arguments have changed their type from i32 to i8. 52 static bool UpgradeX86IntrinsicsWith8BitMask(Function *F, Intrinsic::ID IID, 53 Function *&NewFn) { 54 // Check that the last argument is an i32. 55 Type *LastArgType = F->getFunctionType()->getParamType( 56 F->getFunctionType()->getNumParams() - 1); 57 if (!LastArgType->isIntegerTy(32)) 58 return false; 59 60 // Move this function aside and map down. 61 F->setName(F->getName() + ".old"); 62 NewFn = Intrinsic::getDeclaration(F->getParent(), IID); 63 return true; 64 } 65 66 static bool UpgradeIntrinsicFunction1(Function *F, Function *&NewFn) { 67 assert(F && "Illegal to upgrade a non-existent Function."); 68 69 // Quickly eliminate it, if it's not a candidate. 70 StringRef Name = F->getName(); 71 if (Name.size() <= 8 || !Name.startswith("llvm.")) 72 return false; 73 Name = Name.substr(5); // Strip off "llvm." 74 75 switch (Name[0]) { 76 default: break; 77 case 'a': { 78 if (Name.startswith("arm.neon.vclz")) { 79 Type* args[2] = { 80 F->arg_begin()->getType(), 81 Type::getInt1Ty(F->getContext()) 82 }; 83 // Can't use Intrinsic::getDeclaration here as it adds a ".i1" to 84 // the end of the name. Change name from llvm.arm.neon.vclz.* to 85 // llvm.ctlz.* 86 FunctionType* fType = FunctionType::get(F->getReturnType(), args, false); 87 NewFn = Function::Create(fType, F->getLinkage(), 88 "llvm.ctlz." + Name.substr(14), F->getParent()); 89 return true; 90 } 91 if (Name.startswith("arm.neon.vcnt")) { 92 NewFn = Intrinsic::getDeclaration(F->getParent(), Intrinsic::ctpop, 93 F->arg_begin()->getType()); 94 return true; 95 } 96 Regex vldRegex("^arm\\.neon\\.vld([1234]|[234]lane)\\.v[a-z0-9]*$"); 97 if (vldRegex.match(Name)) { 98 auto fArgs = F->getFunctionType()->params(); 99 SmallVector<Type *, 4> Tys(fArgs.begin(), fArgs.end()); 100 // Can't use Intrinsic::getDeclaration here as the return types might 101 // then only be structurally equal. 102 FunctionType* fType = FunctionType::get(F->getReturnType(), Tys, false); 103 NewFn = Function::Create(fType, F->getLinkage(), 104 "llvm." + Name + ".p0i8", F->getParent()); 105 return true; 106 } 107 Regex vstRegex("^arm\\.neon\\.vst([1234]|[234]lane)\\.v[a-z0-9]*$"); 108 if (vstRegex.match(Name)) { 109 static const Intrinsic::ID StoreInts[] = {Intrinsic::arm_neon_vst1, 110 Intrinsic::arm_neon_vst2, 111 Intrinsic::arm_neon_vst3, 112 Intrinsic::arm_neon_vst4}; 113 114 static const Intrinsic::ID StoreLaneInts[] = { 115 Intrinsic::arm_neon_vst2lane, Intrinsic::arm_neon_vst3lane, 116 Intrinsic::arm_neon_vst4lane 117 }; 118 119 auto fArgs = F->getFunctionType()->params(); 120 Type *Tys[] = {fArgs[0], fArgs[1]}; 121 if (Name.find("lane") == StringRef::npos) 122 NewFn = Intrinsic::getDeclaration(F->getParent(), 123 StoreInts[fArgs.size() - 3], Tys); 124 else 125 NewFn = Intrinsic::getDeclaration(F->getParent(), 126 StoreLaneInts[fArgs.size() - 5], Tys); 127 return true; 128 } 129 if (Name == "aarch64.thread.pointer" || Name == "arm.thread.pointer") { 130 NewFn = Intrinsic::getDeclaration(F->getParent(), Intrinsic::thread_pointer); 131 return true; 132 } 133 break; 134 } 135 136 case 'c': { 137 if (Name.startswith("ctlz.") && F->arg_size() == 1) { 138 F->setName(Name + ".old"); 139 NewFn = Intrinsic::getDeclaration(F->getParent(), Intrinsic::ctlz, 140 F->arg_begin()->getType()); 141 return true; 142 } 143 if (Name.startswith("cttz.") && F->arg_size() == 1) { 144 F->setName(Name + ".old"); 145 NewFn = Intrinsic::getDeclaration(F->getParent(), Intrinsic::cttz, 146 F->arg_begin()->getType()); 147 return true; 148 } 149 break; 150 } 151 152 case 'o': 153 // We only need to change the name to match the mangling including the 154 // address space. 155 if (F->arg_size() == 2 && Name.startswith("objectsize.")) { 156 Type *Tys[2] = { F->getReturnType(), F->arg_begin()->getType() }; 157 if (F->getName() != Intrinsic::getName(Intrinsic::objectsize, Tys)) { 158 F->setName(Name + ".old"); 159 NewFn = Intrinsic::getDeclaration(F->getParent(), 160 Intrinsic::objectsize, Tys); 161 return true; 162 } 163 } 164 break; 165 166 case 's': 167 if (Name == "stackprotectorcheck") { 168 NewFn = nullptr; 169 return true; 170 } 171 172 case 'x': { 173 if (Name.startswith("x86.sse2.pcmpeq.") || 174 Name.startswith("x86.sse2.pcmpgt.") || 175 Name.startswith("x86.avx2.pcmpeq.") || 176 Name.startswith("x86.avx2.pcmpgt.") || 177 Name.startswith("x86.avx2.vbroadcast") || 178 Name.startswith("x86.avx2.pbroadcast") || 179 Name.startswith("x86.avx.vpermil.") || 180 Name.startswith("x86.sse2.pshuf") || 181 Name.startswith("x86.avx512.mask.pshuf.d.") || 182 Name.startswith("x86.avx512.mask.pshufl.w.") || 183 Name.startswith("x86.avx512.mask.pshufh.w.") || 184 Name.startswith("x86.sse41.pmovsx") || 185 Name.startswith("x86.sse41.pmovzx") || 186 Name.startswith("x86.avx2.pmovsx") || 187 Name.startswith("x86.avx2.pmovzx") || 188 Name == "x86.sse2.cvtdq2pd" || 189 Name == "x86.sse2.cvtps2pd" || 190 Name == "x86.avx.cvtdq2.pd.256" || 191 Name == "x86.avx.cvt.ps2.pd.256" || 192 Name == "x86.sse2.cvttps2dq" || 193 Name.startswith("x86.avx.cvtt.") || 194 Name.startswith("x86.avx.vinsertf128.") || 195 Name == "x86.avx2.vinserti128" || 196 Name.startswith("x86.avx.vextractf128.") || 197 Name == "x86.avx2.vextracti128" || 198 Name.startswith("x86.avx.movnt.") || 199 Name == "x86.sse2.storel.dq" || 200 Name.startswith("x86.sse.storeu.") || 201 Name.startswith("x86.sse2.storeu.") || 202 Name.startswith("x86.avx.storeu.") || 203 Name.startswith("x86.avx512.mask.storeu.p") || 204 Name.startswith("x86.avx512.mask.storeu.b.") || 205 Name.startswith("x86.avx512.mask.storeu.w.") || 206 Name.startswith("x86.avx512.mask.storeu.d.") || 207 Name.startswith("x86.avx512.mask.storeu.q.") || 208 Name.startswith("x86.avx512.mask.store.p") || 209 Name.startswith("x86.avx512.mask.store.b.") || 210 Name.startswith("x86.avx512.mask.store.w.") || 211 Name.startswith("x86.avx512.mask.store.d.") || 212 Name.startswith("x86.avx512.mask.store.q.") || 213 Name.startswith("x86.avx512.mask.loadu.p") || 214 Name.startswith("x86.avx512.mask.loadu.b.") || 215 Name.startswith("x86.avx512.mask.loadu.w.") || 216 Name.startswith("x86.avx512.mask.loadu.d.") || 217 Name.startswith("x86.avx512.mask.loadu.q.") || 218 Name.startswith("x86.avx512.mask.load.p") || 219 Name.startswith("x86.avx512.mask.load.b.") || 220 Name.startswith("x86.avx512.mask.load.w.") || 221 Name.startswith("x86.avx512.mask.load.d.") || 222 Name.startswith("x86.avx512.mask.load.q.") || 223 Name == "x86.sse42.crc32.64.8" || 224 Name.startswith("x86.avx.vbroadcast.s") || 225 Name.startswith("x86.avx512.mask.palignr.") || 226 Name.startswith("x86.sse2.psll.dq") || 227 Name.startswith("x86.sse2.psrl.dq") || 228 Name.startswith("x86.avx2.psll.dq") || 229 Name.startswith("x86.avx2.psrl.dq") || 230 Name.startswith("x86.avx512.psll.dq") || 231 Name.startswith("x86.avx512.psrl.dq") || 232 Name == "x86.sse41.pblendw" || 233 Name.startswith("x86.sse41.blendp") || 234 Name.startswith("x86.avx.blend.p") || 235 Name == "x86.avx2.pblendw" || 236 Name.startswith("x86.avx2.pblendd.") || 237 Name == "x86.avx2.vbroadcasti128" || 238 Name == "x86.xop.vpcmov" || 239 (Name.startswith("x86.xop.vpcom") && F->arg_size() == 2)) { 240 NewFn = nullptr; 241 return true; 242 } 243 // SSE4.1 ptest functions may have an old signature. 244 if (Name.startswith("x86.sse41.ptest")) { 245 if (Name == "x86.sse41.ptestc") 246 return UpgradeSSE41Function(F, Intrinsic::x86_sse41_ptestc, NewFn); 247 if (Name == "x86.sse41.ptestz") 248 return UpgradeSSE41Function(F, Intrinsic::x86_sse41_ptestz, NewFn); 249 if (Name == "x86.sse41.ptestnzc") 250 return UpgradeSSE41Function(F, Intrinsic::x86_sse41_ptestnzc, NewFn); 251 } 252 // Several blend and other instructions with masks used the wrong number of 253 // bits. 254 if (Name == "x86.sse41.insertps") 255 return UpgradeX86IntrinsicsWith8BitMask(F, Intrinsic::x86_sse41_insertps, 256 NewFn); 257 if (Name == "x86.sse41.dppd") 258 return UpgradeX86IntrinsicsWith8BitMask(F, Intrinsic::x86_sse41_dppd, 259 NewFn); 260 if (Name == "x86.sse41.dpps") 261 return UpgradeX86IntrinsicsWith8BitMask(F, Intrinsic::x86_sse41_dpps, 262 NewFn); 263 if (Name == "x86.sse41.mpsadbw") 264 return UpgradeX86IntrinsicsWith8BitMask(F, Intrinsic::x86_sse41_mpsadbw, 265 NewFn); 266 if (Name == "x86.avx.dp.ps.256") 267 return UpgradeX86IntrinsicsWith8BitMask(F, Intrinsic::x86_avx_dp_ps_256, 268 NewFn); 269 if (Name == "x86.avx2.mpsadbw") 270 return UpgradeX86IntrinsicsWith8BitMask(F, Intrinsic::x86_avx2_mpsadbw, 271 NewFn); 272 273 // frcz.ss/sd may need to have an argument dropped 274 if (Name.startswith("x86.xop.vfrcz.ss") && F->arg_size() == 2) { 275 F->setName(Name + ".old"); 276 NewFn = Intrinsic::getDeclaration(F->getParent(), 277 Intrinsic::x86_xop_vfrcz_ss); 278 return true; 279 } 280 if (Name.startswith("x86.xop.vfrcz.sd") && F->arg_size() == 2) { 281 F->setName(Name + ".old"); 282 NewFn = Intrinsic::getDeclaration(F->getParent(), 283 Intrinsic::x86_xop_vfrcz_sd); 284 return true; 285 } 286 // Fix the FMA4 intrinsics to remove the 4 287 if (Name.startswith("x86.fma4.")) { 288 F->setName("llvm.x86.fma" + Name.substr(8)); 289 NewFn = F; 290 return true; 291 } 292 // Upgrade any XOP PERMIL2 index operand still using a float/double vector. 293 if (Name.startswith("x86.xop.vpermil2")) { 294 auto Params = F->getFunctionType()->params(); 295 auto Idx = Params[2]; 296 if (Idx->getScalarType()->isFloatingPointTy()) { 297 F->setName(Name + ".old"); 298 unsigned IdxSize = Idx->getPrimitiveSizeInBits(); 299 unsigned EltSize = Idx->getScalarSizeInBits(); 300 Intrinsic::ID Permil2ID; 301 if (EltSize == 64 && IdxSize == 128) 302 Permil2ID = Intrinsic::x86_xop_vpermil2pd; 303 else if (EltSize == 32 && IdxSize == 128) 304 Permil2ID = Intrinsic::x86_xop_vpermil2ps; 305 else if (EltSize == 64 && IdxSize == 256) 306 Permil2ID = Intrinsic::x86_xop_vpermil2pd_256; 307 else 308 Permil2ID = Intrinsic::x86_xop_vpermil2ps_256; 309 NewFn = Intrinsic::getDeclaration(F->getParent(), Permil2ID); 310 return true; 311 } 312 } 313 break; 314 } 315 } 316 317 // This may not belong here. This function is effectively being overloaded 318 // to both detect an intrinsic which needs upgrading, and to provide the 319 // upgraded form of the intrinsic. We should perhaps have two separate 320 // functions for this. 321 return false; 322 } 323 324 bool llvm::UpgradeIntrinsicFunction(Function *F, Function *&NewFn) { 325 NewFn = nullptr; 326 bool Upgraded = UpgradeIntrinsicFunction1(F, NewFn); 327 assert(F != NewFn && "Intrinsic function upgraded to the same function"); 328 329 // Upgrade intrinsic attributes. This does not change the function. 330 if (NewFn) 331 F = NewFn; 332 if (Intrinsic::ID id = F->getIntrinsicID()) 333 F->setAttributes(Intrinsic::getAttributes(F->getContext(), id)); 334 return Upgraded; 335 } 336 337 bool llvm::UpgradeGlobalVariable(GlobalVariable *GV) { 338 // Nothing to do yet. 339 return false; 340 } 341 342 // Handles upgrading SSE2/AVX2/AVX512BW PSLLDQ intrinsics by converting them 343 // to byte shuffles. 344 static Value *UpgradeX86PSLLDQIntrinsics(IRBuilder<> &Builder, LLVMContext &C, 345 Value *Op, unsigned Shift) { 346 Type *ResultTy = Op->getType(); 347 unsigned NumElts = ResultTy->getVectorNumElements() * 8; 348 349 // Bitcast from a 64-bit element type to a byte element type. 350 Type *VecTy = VectorType::get(Type::getInt8Ty(C), NumElts); 351 Op = Builder.CreateBitCast(Op, VecTy, "cast"); 352 353 // We'll be shuffling in zeroes. 354 Value *Res = Constant::getNullValue(VecTy); 355 356 // If shift is less than 16, emit a shuffle to move the bytes. Otherwise, 357 // we'll just return the zero vector. 358 if (Shift < 16) { 359 uint32_t Idxs[64]; 360 // 256/512-bit version is split into 2/4 16-byte lanes. 361 for (unsigned l = 0; l != NumElts; l += 16) 362 for (unsigned i = 0; i != 16; ++i) { 363 unsigned Idx = NumElts + i - Shift; 364 if (Idx < NumElts) 365 Idx -= NumElts - 16; // end of lane, switch operand. 366 Idxs[l + i] = Idx + l; 367 } 368 369 Res = Builder.CreateShuffleVector(Res, Op, makeArrayRef(Idxs, NumElts)); 370 } 371 372 // Bitcast back to a 64-bit element type. 373 return Builder.CreateBitCast(Res, ResultTy, "cast"); 374 } 375 376 // Handles upgrading SSE2/AVX2/AVX512BW PSRLDQ intrinsics by converting them 377 // to byte shuffles. 378 static Value *UpgradeX86PSRLDQIntrinsics(IRBuilder<> &Builder, LLVMContext &C, 379 Value *Op, 380 unsigned Shift) { 381 Type *ResultTy = Op->getType(); 382 unsigned NumElts = ResultTy->getVectorNumElements() * 8; 383 384 // Bitcast from a 64-bit element type to a byte element type. 385 Type *VecTy = VectorType::get(Type::getInt8Ty(C), NumElts); 386 Op = Builder.CreateBitCast(Op, VecTy, "cast"); 387 388 // We'll be shuffling in zeroes. 389 Value *Res = Constant::getNullValue(VecTy); 390 391 // If shift is less than 16, emit a shuffle to move the bytes. Otherwise, 392 // we'll just return the zero vector. 393 if (Shift < 16) { 394 uint32_t Idxs[64]; 395 // 256/512-bit version is split into 2/4 16-byte lanes. 396 for (unsigned l = 0; l != NumElts; l += 16) 397 for (unsigned i = 0; i != 16; ++i) { 398 unsigned Idx = i + Shift; 399 if (Idx >= 16) 400 Idx += NumElts - 16; // end of lane, switch operand. 401 Idxs[l + i] = Idx + l; 402 } 403 404 Res = Builder.CreateShuffleVector(Op, Res, makeArrayRef(Idxs, NumElts)); 405 } 406 407 // Bitcast back to a 64-bit element type. 408 return Builder.CreateBitCast(Res, ResultTy, "cast"); 409 } 410 411 static Value *getX86MaskVec(IRBuilder<> &Builder, Value *Mask, 412 unsigned NumElts) { 413 llvm::VectorType *MaskTy = llvm::VectorType::get(Builder.getInt1Ty(), 414 cast<IntegerType>(Mask->getType())->getBitWidth()); 415 Mask = Builder.CreateBitCast(Mask, MaskTy); 416 417 // If we have less than 8 elements, then the starting mask was an i8 and 418 // we need to extract down to the right number of elements. 419 if (NumElts < 8) { 420 uint32_t Indices[4]; 421 for (unsigned i = 0; i != NumElts; ++i) 422 Indices[i] = i; 423 Mask = Builder.CreateShuffleVector(Mask, Mask, 424 makeArrayRef(Indices, NumElts), 425 "extract"); 426 } 427 428 return Mask; 429 } 430 431 static Value *EmitX86Select(IRBuilder<> &Builder, Value *Mask, 432 Value *Op0, Value *Op1) { 433 // If the mask is all ones just emit the align operation. 434 if (const auto *C = dyn_cast<Constant>(Mask)) 435 if (C->isAllOnesValue()) 436 return Op0; 437 438 Mask = getX86MaskVec(Builder, Mask, Op0->getType()->getVectorNumElements()); 439 return Builder.CreateSelect(Mask, Op0, Op1); 440 } 441 442 static Value *UpgradeX86PALIGNRIntrinsics(IRBuilder<> &Builder, LLVMContext &C, 443 Value *Op0, Value *Op1, Value *Shift, 444 Value *Passthru, Value *Mask) { 445 unsigned ShiftVal = cast<llvm::ConstantInt>(Shift)->getZExtValue(); 446 447 unsigned NumElts = Op0->getType()->getVectorNumElements(); 448 assert(NumElts % 16 == 0); 449 450 // If palignr is shifting the pair of vectors more than the size of two 451 // lanes, emit zero. 452 if (ShiftVal >= 32) 453 return llvm::Constant::getNullValue(Op0->getType()); 454 455 // If palignr is shifting the pair of input vectors more than one lane, 456 // but less than two lanes, convert to shifting in zeroes. 457 if (ShiftVal > 16) { 458 ShiftVal -= 16; 459 Op1 = Op0; 460 Op0 = llvm::Constant::getNullValue(Op0->getType()); 461 } 462 463 uint32_t Indices[64]; 464 // 256-bit palignr operates on 128-bit lanes so we need to handle that 465 for (unsigned l = 0; l != NumElts; l += 16) { 466 for (unsigned i = 0; i != 16; ++i) { 467 unsigned Idx = ShiftVal + i; 468 if (Idx >= 16) 469 Idx += NumElts - 16; // End of lane, switch operand. 470 Indices[l + i] = Idx + l; 471 } 472 } 473 474 Value *Align = Builder.CreateShuffleVector(Op1, Op0, 475 makeArrayRef(Indices, NumElts), 476 "palignr"); 477 478 return EmitX86Select(Builder, Mask, Align, Passthru); 479 } 480 481 static Value *UpgradeMaskedStore(IRBuilder<> &Builder, LLVMContext &C, 482 Value *Ptr, Value *Data, Value *Mask, 483 bool Aligned) { 484 // Cast the pointer to the right type. 485 Ptr = Builder.CreateBitCast(Ptr, 486 llvm::PointerType::getUnqual(Data->getType())); 487 unsigned Align = 488 Aligned ? cast<VectorType>(Data->getType())->getBitWidth() / 8 : 1; 489 490 // If the mask is all ones just emit a regular store. 491 if (const auto *C = dyn_cast<Constant>(Mask)) 492 if (C->isAllOnesValue()) 493 return Builder.CreateAlignedStore(Data, Ptr, Align); 494 495 // Convert the mask from an integer type to a vector of i1. 496 unsigned NumElts = Data->getType()->getVectorNumElements(); 497 Mask = getX86MaskVec(Builder, Mask, NumElts); 498 return Builder.CreateMaskedStore(Data, Ptr, Align, Mask); 499 } 500 501 static Value *UpgradeMaskedLoad(IRBuilder<> &Builder, LLVMContext &C, 502 Value *Ptr, Value *Passthru, Value *Mask, 503 bool Aligned) { 504 // Cast the pointer to the right type. 505 Ptr = Builder.CreateBitCast(Ptr, 506 llvm::PointerType::getUnqual(Passthru->getType())); 507 unsigned Align = 508 Aligned ? cast<VectorType>(Passthru->getType())->getBitWidth() / 8 : 1; 509 510 // If the mask is all ones just emit a regular store. 511 if (const auto *C = dyn_cast<Constant>(Mask)) 512 if (C->isAllOnesValue()) 513 return Builder.CreateAlignedLoad(Ptr, Align); 514 515 // Convert the mask from an integer type to a vector of i1. 516 unsigned NumElts = Passthru->getType()->getVectorNumElements(); 517 Mask = getX86MaskVec(Builder, Mask, NumElts); 518 return Builder.CreateMaskedLoad(Ptr, Align, Mask, Passthru); 519 } 520 521 // UpgradeIntrinsicCall - Upgrade a call to an old intrinsic to be a call the 522 // upgraded intrinsic. All argument and return casting must be provided in 523 // order to seamlessly integrate with existing context. 524 void llvm::UpgradeIntrinsicCall(CallInst *CI, Function *NewFn) { 525 Function *F = CI->getCalledFunction(); 526 LLVMContext &C = CI->getContext(); 527 IRBuilder<> Builder(C); 528 Builder.SetInsertPoint(CI->getParent(), CI->getIterator()); 529 530 assert(F && "Intrinsic call is not direct?"); 531 532 if (!NewFn) { 533 // Get the Function's name. 534 StringRef Name = F->getName(); 535 536 Value *Rep; 537 // Upgrade packed integer vector compares intrinsics to compare instructions 538 if (Name.startswith("llvm.x86.sse2.pcmpeq.") || 539 Name.startswith("llvm.x86.avx2.pcmpeq.")) { 540 Rep = Builder.CreateICmpEQ(CI->getArgOperand(0), CI->getArgOperand(1), 541 "pcmpeq"); 542 // need to sign extend since icmp returns vector of i1 543 Rep = Builder.CreateSExt(Rep, CI->getType(), ""); 544 } else if (Name.startswith("llvm.x86.sse2.pcmpgt.") || 545 Name.startswith("llvm.x86.avx2.pcmpgt.")) { 546 Rep = Builder.CreateICmpSGT(CI->getArgOperand(0), CI->getArgOperand(1), 547 "pcmpgt"); 548 // need to sign extend since icmp returns vector of i1 549 Rep = Builder.CreateSExt(Rep, CI->getType(), ""); 550 } else if (Name == "llvm.x86.sse2.cvtdq2pd" || 551 Name == "llvm.x86.sse2.cvtps2pd" || 552 Name == "llvm.x86.avx.cvtdq2.pd.256" || 553 Name == "llvm.x86.avx.cvt.ps2.pd.256") { 554 // Lossless i32/float to double conversion. 555 // Extract the bottom elements if necessary and convert to double vector. 556 Value *Src = CI->getArgOperand(0); 557 VectorType *SrcTy = cast<VectorType>(Src->getType()); 558 VectorType *DstTy = cast<VectorType>(CI->getType()); 559 Rep = CI->getArgOperand(0); 560 561 unsigned NumDstElts = DstTy->getNumElements(); 562 if (NumDstElts < SrcTy->getNumElements()) { 563 assert(NumDstElts == 2 && "Unexpected vector size"); 564 uint32_t ShuffleMask[2] = { 0, 1 }; 565 Rep = Builder.CreateShuffleVector(Rep, UndefValue::get(SrcTy), 566 ShuffleMask); 567 } 568 569 bool Int2Double = (StringRef::npos != Name.find("cvtdq2")); 570 if (Int2Double) 571 Rep = Builder.CreateSIToFP(Rep, DstTy, "cvtdq2pd"); 572 else 573 Rep = Builder.CreateFPExt(Rep, DstTy, "cvtps2pd"); 574 } else if (Name == "llvm.x86.sse2.cvttps2dq" || 575 Name.startswith("llvm.x86.avx.cvtt.")) { 576 // Truncation (round to zero) float/double to i32 vector conversion. 577 Value *Src = CI->getArgOperand(0); 578 VectorType *DstTy = cast<VectorType>(CI->getType()); 579 Rep = Builder.CreateFPToSI(Src, DstTy, "cvtt"); 580 } else if (Name.startswith("llvm.x86.avx.movnt.")) { 581 Module *M = F->getParent(); 582 SmallVector<Metadata *, 1> Elts; 583 Elts.push_back( 584 ConstantAsMetadata::get(ConstantInt::get(Type::getInt32Ty(C), 1))); 585 MDNode *Node = MDNode::get(C, Elts); 586 587 Value *Arg0 = CI->getArgOperand(0); 588 Value *Arg1 = CI->getArgOperand(1); 589 590 // Convert the type of the pointer to a pointer to the stored type. 591 Value *BC = Builder.CreateBitCast(Arg0, 592 PointerType::getUnqual(Arg1->getType()), 593 "cast"); 594 StoreInst *SI = Builder.CreateAlignedStore(Arg1, BC, 32); 595 SI->setMetadata(M->getMDKindID("nontemporal"), Node); 596 597 // Remove intrinsic. 598 CI->eraseFromParent(); 599 return; 600 } else if (Name == "llvm.x86.sse2.storel.dq") { 601 Value *Arg0 = CI->getArgOperand(0); 602 Value *Arg1 = CI->getArgOperand(1); 603 604 Type *NewVecTy = VectorType::get(Type::getInt64Ty(C), 2); 605 Value *BC0 = Builder.CreateBitCast(Arg1, NewVecTy, "cast"); 606 Value *Elt = Builder.CreateExtractElement(BC0, (uint64_t)0); 607 Value *BC = Builder.CreateBitCast(Arg0, 608 PointerType::getUnqual(Elt->getType()), 609 "cast"); 610 Builder.CreateAlignedStore(Elt, BC, 1); 611 612 // Remove intrinsic. 613 CI->eraseFromParent(); 614 return; 615 } else if (Name.startswith("llvm.x86.sse.storeu.") || 616 Name.startswith("llvm.x86.sse2.storeu.") || 617 Name.startswith("llvm.x86.avx.storeu.")) { 618 Value *Arg0 = CI->getArgOperand(0); 619 Value *Arg1 = CI->getArgOperand(1); 620 621 Arg0 = Builder.CreateBitCast(Arg0, 622 PointerType::getUnqual(Arg1->getType()), 623 "cast"); 624 Builder.CreateAlignedStore(Arg1, Arg0, 1); 625 626 // Remove intrinsic. 627 CI->eraseFromParent(); 628 return; 629 } else if (Name.startswith("llvm.x86.avx512.mask.storeu.p") || 630 Name.startswith("llvm.x86.avx512.mask.storeu.b.") || 631 Name.startswith("llvm.x86.avx512.mask.storeu.w.") || 632 Name.startswith("llvm.x86.avx512.mask.storeu.d.") || 633 Name.startswith("llvm.x86.avx512.mask.storeu.q.")) { 634 UpgradeMaskedStore(Builder, C, CI->getArgOperand(0), CI->getArgOperand(1), 635 CI->getArgOperand(2), /*Aligned*/false); 636 637 // Remove intrinsic. 638 CI->eraseFromParent(); 639 return; 640 } else if (Name.startswith("llvm.x86.avx512.mask.store.p") || 641 Name.startswith("llvm.x86.avx512.mask.store.b.") || 642 Name.startswith("llvm.x86.avx512.mask.store.w.") || 643 Name.startswith("llvm.x86.avx512.mask.store.d.") || 644 Name.startswith("llvm.x86.avx512.mask.store.q.")) { 645 UpgradeMaskedStore(Builder, C, CI->getArgOperand(0), CI->getArgOperand(1), 646 CI->getArgOperand(2), /*Aligned*/true); 647 648 // Remove intrinsic. 649 CI->eraseFromParent(); 650 return; 651 } else if (Name.startswith("llvm.x86.avx512.mask.loadu.p") || 652 Name.startswith("llvm.x86.avx512.mask.loadu.b.") || 653 Name.startswith("llvm.x86.avx512.mask.loadu.w.") || 654 Name.startswith("llvm.x86.avx512.mask.loadu.d.") || 655 Name.startswith("llvm.x86.avx512.mask.loadu.q.")) { 656 Rep = UpgradeMaskedLoad(Builder, C, CI->getArgOperand(0), 657 CI->getArgOperand(1), CI->getArgOperand(2), 658 /*Aligned*/false); 659 } else if (Name.startswith("llvm.x86.avx512.mask.load.p") || 660 Name.startswith("llvm.x86.avx512.mask.load.b.") || 661 Name.startswith("llvm.x86.avx512.mask.load.w.") || 662 Name.startswith("llvm.x86.avx512.mask.load.d.") || 663 Name.startswith("llvm.x86.avx512.mask.load.q.")) { 664 Rep = UpgradeMaskedLoad(Builder, C, CI->getArgOperand(0), 665 CI->getArgOperand(1),CI->getArgOperand(2), 666 /*Aligned*/true); 667 } else if (Name.startswith("llvm.x86.xop.vpcom")) { 668 Intrinsic::ID intID; 669 if (Name.endswith("ub")) 670 intID = Intrinsic::x86_xop_vpcomub; 671 else if (Name.endswith("uw")) 672 intID = Intrinsic::x86_xop_vpcomuw; 673 else if (Name.endswith("ud")) 674 intID = Intrinsic::x86_xop_vpcomud; 675 else if (Name.endswith("uq")) 676 intID = Intrinsic::x86_xop_vpcomuq; 677 else if (Name.endswith("b")) 678 intID = Intrinsic::x86_xop_vpcomb; 679 else if (Name.endswith("w")) 680 intID = Intrinsic::x86_xop_vpcomw; 681 else if (Name.endswith("d")) 682 intID = Intrinsic::x86_xop_vpcomd; 683 else if (Name.endswith("q")) 684 intID = Intrinsic::x86_xop_vpcomq; 685 else 686 llvm_unreachable("Unknown suffix"); 687 688 Name = Name.substr(18); // strip off "llvm.x86.xop.vpcom" 689 unsigned Imm; 690 if (Name.startswith("lt")) 691 Imm = 0; 692 else if (Name.startswith("le")) 693 Imm = 1; 694 else if (Name.startswith("gt")) 695 Imm = 2; 696 else if (Name.startswith("ge")) 697 Imm = 3; 698 else if (Name.startswith("eq")) 699 Imm = 4; 700 else if (Name.startswith("ne")) 701 Imm = 5; 702 else if (Name.startswith("false")) 703 Imm = 6; 704 else if (Name.startswith("true")) 705 Imm = 7; 706 else 707 llvm_unreachable("Unknown condition"); 708 709 Function *VPCOM = Intrinsic::getDeclaration(F->getParent(), intID); 710 Rep = 711 Builder.CreateCall(VPCOM, {CI->getArgOperand(0), CI->getArgOperand(1), 712 Builder.getInt8(Imm)}); 713 } else if (Name == "llvm.x86.xop.vpcmov") { 714 Value *Arg0 = CI->getArgOperand(0); 715 Value *Arg1 = CI->getArgOperand(1); 716 Value *Sel = CI->getArgOperand(2); 717 unsigned NumElts = CI->getType()->getVectorNumElements(); 718 Constant *MinusOne = ConstantVector::getSplat(NumElts, Builder.getInt64(-1)); 719 Value *NotSel = Builder.CreateXor(Sel, MinusOne); 720 Value *Sel0 = Builder.CreateAnd(Arg0, Sel); 721 Value *Sel1 = Builder.CreateAnd(Arg1, NotSel); 722 Rep = Builder.CreateOr(Sel0, Sel1); 723 } else if (Name == "llvm.x86.sse42.crc32.64.8") { 724 Function *CRC32 = Intrinsic::getDeclaration(F->getParent(), 725 Intrinsic::x86_sse42_crc32_32_8); 726 Value *Trunc0 = Builder.CreateTrunc(CI->getArgOperand(0), Type::getInt32Ty(C)); 727 Rep = Builder.CreateCall(CRC32, {Trunc0, CI->getArgOperand(1)}); 728 Rep = Builder.CreateZExt(Rep, CI->getType(), ""); 729 } else if (Name.startswith("llvm.x86.avx.vbroadcast")) { 730 // Replace broadcasts with a series of insertelements. 731 Type *VecTy = CI->getType(); 732 Type *EltTy = VecTy->getVectorElementType(); 733 unsigned EltNum = VecTy->getVectorNumElements(); 734 Value *Cast = Builder.CreateBitCast(CI->getArgOperand(0), 735 EltTy->getPointerTo()); 736 Value *Load = Builder.CreateLoad(EltTy, Cast); 737 Type *I32Ty = Type::getInt32Ty(C); 738 Rep = UndefValue::get(VecTy); 739 for (unsigned I = 0; I < EltNum; ++I) 740 Rep = Builder.CreateInsertElement(Rep, Load, 741 ConstantInt::get(I32Ty, I)); 742 } else if (Name.startswith("llvm.x86.sse41.pmovsx") || 743 Name.startswith("llvm.x86.sse41.pmovzx") || 744 Name.startswith("llvm.x86.avx2.pmovsx") || 745 Name.startswith("llvm.x86.avx2.pmovzx")) { 746 VectorType *SrcTy = cast<VectorType>(CI->getArgOperand(0)->getType()); 747 VectorType *DstTy = cast<VectorType>(CI->getType()); 748 unsigned NumDstElts = DstTy->getNumElements(); 749 750 // Extract a subvector of the first NumDstElts lanes and sign/zero extend. 751 SmallVector<uint32_t, 8> ShuffleMask(NumDstElts); 752 for (unsigned i = 0; i != NumDstElts; ++i) 753 ShuffleMask[i] = i; 754 755 Value *SV = Builder.CreateShuffleVector( 756 CI->getArgOperand(0), UndefValue::get(SrcTy), ShuffleMask); 757 758 bool DoSext = (StringRef::npos != Name.find("pmovsx")); 759 Rep = DoSext ? Builder.CreateSExt(SV, DstTy) 760 : Builder.CreateZExt(SV, DstTy); 761 } else if (Name == "llvm.x86.avx2.vbroadcasti128") { 762 // Replace vbroadcasts with a vector shuffle. 763 Type *VT = VectorType::get(Type::getInt64Ty(C), 2); 764 Value *Op = Builder.CreatePointerCast(CI->getArgOperand(0), 765 PointerType::getUnqual(VT)); 766 Value *Load = Builder.CreateLoad(VT, Op); 767 uint32_t Idxs[4] = { 0, 1, 0, 1 }; 768 Rep = Builder.CreateShuffleVector(Load, UndefValue::get(Load->getType()), 769 Idxs); 770 } else if (Name.startswith("llvm.x86.avx2.pbroadcast") || 771 Name.startswith("llvm.x86.avx2.vbroadcast")) { 772 // Replace vp?broadcasts with a vector shuffle. 773 Value *Op = CI->getArgOperand(0); 774 unsigned NumElts = CI->getType()->getVectorNumElements(); 775 Type *MaskTy = VectorType::get(Type::getInt32Ty(C), NumElts); 776 Rep = Builder.CreateShuffleVector(Op, UndefValue::get(Op->getType()), 777 Constant::getNullValue(MaskTy)); 778 } else if (Name.startswith("llvm.x86.avx512.mask.palignr.")) { 779 Rep = UpgradeX86PALIGNRIntrinsics(Builder, C, CI->getArgOperand(0), 780 CI->getArgOperand(1), 781 CI->getArgOperand(2), 782 CI->getArgOperand(3), 783 CI->getArgOperand(4)); 784 } else if (Name == "llvm.x86.sse2.psll.dq" || 785 Name == "llvm.x86.avx2.psll.dq") { 786 // 128/256-bit shift left specified in bits. 787 unsigned Shift = cast<ConstantInt>(CI->getArgOperand(1))->getZExtValue(); 788 Rep = UpgradeX86PSLLDQIntrinsics(Builder, C, CI->getArgOperand(0), 789 Shift / 8); // Shift is in bits. 790 } else if (Name == "llvm.x86.sse2.psrl.dq" || 791 Name == "llvm.x86.avx2.psrl.dq") { 792 // 128/256-bit shift right specified in bits. 793 unsigned Shift = cast<ConstantInt>(CI->getArgOperand(1))->getZExtValue(); 794 Rep = UpgradeX86PSRLDQIntrinsics(Builder, C, CI->getArgOperand(0), 795 Shift / 8); // Shift is in bits. 796 } else if (Name == "llvm.x86.sse2.psll.dq.bs" || 797 Name == "llvm.x86.avx2.psll.dq.bs" || 798 Name == "llvm.x86.avx512.psll.dq.512") { 799 // 128/256/512-bit shift left specified in bytes. 800 unsigned Shift = cast<ConstantInt>(CI->getArgOperand(1))->getZExtValue(); 801 Rep = UpgradeX86PSLLDQIntrinsics(Builder, C, CI->getArgOperand(0), Shift); 802 } else if (Name == "llvm.x86.sse2.psrl.dq.bs" || 803 Name == "llvm.x86.avx2.psrl.dq.bs" || 804 Name == "llvm.x86.avx512.psrl.dq.512") { 805 // 128/256/512-bit shift right specified in bytes. 806 unsigned Shift = cast<ConstantInt>(CI->getArgOperand(1))->getZExtValue(); 807 Rep = UpgradeX86PSRLDQIntrinsics(Builder, C, CI->getArgOperand(0), Shift); 808 } else if (Name == "llvm.x86.sse41.pblendw" || 809 Name.startswith("llvm.x86.sse41.blendp") || 810 Name.startswith("llvm.x86.avx.blend.p") || 811 Name == "llvm.x86.avx2.pblendw" || 812 Name.startswith("llvm.x86.avx2.pblendd.")) { 813 Value *Op0 = CI->getArgOperand(0); 814 Value *Op1 = CI->getArgOperand(1); 815 unsigned Imm = cast <ConstantInt>(CI->getArgOperand(2))->getZExtValue(); 816 VectorType *VecTy = cast<VectorType>(CI->getType()); 817 unsigned NumElts = VecTy->getNumElements(); 818 819 SmallVector<uint32_t, 16> Idxs(NumElts); 820 for (unsigned i = 0; i != NumElts; ++i) 821 Idxs[i] = ((Imm >> (i%8)) & 1) ? i + NumElts : i; 822 823 Rep = Builder.CreateShuffleVector(Op0, Op1, Idxs); 824 } else if (Name.startswith("llvm.x86.avx.vinsertf128.") || 825 Name == "llvm.x86.avx2.vinserti128") { 826 Value *Op0 = CI->getArgOperand(0); 827 Value *Op1 = CI->getArgOperand(1); 828 unsigned Imm = cast<ConstantInt>(CI->getArgOperand(2))->getZExtValue(); 829 VectorType *VecTy = cast<VectorType>(CI->getType()); 830 unsigned NumElts = VecTy->getNumElements(); 831 832 // Mask off the high bits of the immediate value; hardware ignores those. 833 Imm = Imm & 1; 834 835 // Extend the second operand into a vector that is twice as big. 836 Value *UndefV = UndefValue::get(Op1->getType()); 837 SmallVector<uint32_t, 8> Idxs(NumElts); 838 for (unsigned i = 0; i != NumElts; ++i) 839 Idxs[i] = i; 840 Rep = Builder.CreateShuffleVector(Op1, UndefV, Idxs); 841 842 // Insert the second operand into the first operand. 843 844 // Note that there is no guarantee that instruction lowering will actually 845 // produce a vinsertf128 instruction for the created shuffles. In 846 // particular, the 0 immediate case involves no lane changes, so it can 847 // be handled as a blend. 848 849 // Example of shuffle mask for 32-bit elements: 850 // Imm = 1 <i32 0, i32 1, i32 2, i32 3, i32 8, i32 9, i32 10, i32 11> 851 // Imm = 0 <i32 8, i32 9, i32 10, i32 11, i32 4, i32 5, i32 6, i32 7 > 852 853 // The low half of the result is either the low half of the 1st operand 854 // or the low half of the 2nd operand (the inserted vector). 855 for (unsigned i = 0; i != NumElts / 2; ++i) 856 Idxs[i] = Imm ? i : (i + NumElts); 857 // The high half of the result is either the low half of the 2nd operand 858 // (the inserted vector) or the high half of the 1st operand. 859 for (unsigned i = NumElts / 2; i != NumElts; ++i) 860 Idxs[i] = Imm ? (i + NumElts / 2) : i; 861 Rep = Builder.CreateShuffleVector(Op0, Rep, Idxs); 862 } else if (Name.startswith("llvm.x86.avx.vextractf128.") || 863 Name == "llvm.x86.avx2.vextracti128") { 864 Value *Op0 = CI->getArgOperand(0); 865 unsigned Imm = cast<ConstantInt>(CI->getArgOperand(1))->getZExtValue(); 866 VectorType *VecTy = cast<VectorType>(CI->getType()); 867 unsigned NumElts = VecTy->getNumElements(); 868 869 // Mask off the high bits of the immediate value; hardware ignores those. 870 Imm = Imm & 1; 871 872 // Get indexes for either the high half or low half of the input vector. 873 SmallVector<uint32_t, 4> Idxs(NumElts); 874 for (unsigned i = 0; i != NumElts; ++i) { 875 Idxs[i] = Imm ? (i + NumElts) : i; 876 } 877 878 Value *UndefV = UndefValue::get(Op0->getType()); 879 Rep = Builder.CreateShuffleVector(Op0, UndefV, Idxs); 880 } else if (Name == "llvm.stackprotectorcheck") { 881 Rep = nullptr; 882 } else if (Name.startswith("llvm.x86.avx.vpermil.") || 883 Name == "llvm.x86.sse2.pshuf.d" || 884 Name.startswith("llvm.x86.avx512.mask.pshuf.d.")) { 885 Value *Op0 = CI->getArgOperand(0); 886 unsigned Imm = cast<ConstantInt>(CI->getArgOperand(1))->getZExtValue(); 887 VectorType *VecTy = cast<VectorType>(CI->getType()); 888 unsigned NumElts = VecTy->getNumElements(); 889 // Calcuate the size of each index in the immediate. 890 unsigned IdxSize = 64 / VecTy->getScalarSizeInBits(); 891 unsigned IdxMask = ((1 << IdxSize) - 1); 892 893 SmallVector<uint32_t, 8> Idxs(NumElts); 894 // Lookup the bits for this element, wrapping around the immediate every 895 // 8-bits. Elements are grouped into sets of 2 or 4 elements so we need 896 // to offset by the first index of each group. 897 for (unsigned i = 0; i != NumElts; ++i) 898 Idxs[i] = ((Imm >> ((i * IdxSize) % 8)) & IdxMask) | (i & ~IdxMask); 899 900 Rep = Builder.CreateShuffleVector(Op0, Op0, Idxs); 901 902 if (CI->getNumArgOperands() == 4) 903 Rep = EmitX86Select(Builder, CI->getArgOperand(3), Rep, 904 CI->getArgOperand(2)); 905 } else if (Name == "llvm.x86.sse2.pshufl.w" || 906 Name.startswith("llvm.x86.avx512.mask.pshufl.w.")) { 907 Value *Op0 = CI->getArgOperand(0); 908 unsigned Imm = cast<ConstantInt>(CI->getArgOperand(1))->getZExtValue(); 909 unsigned NumElts = CI->getType()->getVectorNumElements(); 910 911 SmallVector<uint32_t, 16> Idxs(NumElts); 912 for (unsigned l = 0; l != NumElts; l += 8) { 913 for (unsigned i = 0; i != 4; ++i) 914 Idxs[i + l] = ((Imm >> (2 * i)) & 0x3) + l; 915 for (unsigned i = 4; i != 8; ++i) 916 Idxs[i + l] = i + l; 917 } 918 919 Rep = Builder.CreateShuffleVector(Op0, Op0, Idxs); 920 921 if (CI->getNumArgOperands() == 4) 922 Rep = EmitX86Select(Builder, CI->getArgOperand(3), Rep, 923 CI->getArgOperand(2)); 924 } else if (Name == "llvm.x86.sse2.pshufh.w" || 925 Name.startswith("llvm.x86.avx512.mask.pshufh.w.")) { 926 Value *Op0 = CI->getArgOperand(0); 927 unsigned Imm = cast<ConstantInt>(CI->getArgOperand(1))->getZExtValue(); 928 unsigned NumElts = CI->getType()->getVectorNumElements(); 929 930 SmallVector<uint32_t, 16> Idxs(NumElts); 931 for (unsigned l = 0; l != NumElts; l += 8) { 932 for (unsigned i = 0; i != 4; ++i) 933 Idxs[i + l] = i + l; 934 for (unsigned i = 0; i != 4; ++i) 935 Idxs[i + l + 4] = ((Imm >> (2 * i)) & 0x3) + 4 + l; 936 } 937 938 Rep = Builder.CreateShuffleVector(Op0, Op0, Idxs); 939 940 if (CI->getNumArgOperands() == 4) 941 Rep = EmitX86Select(Builder, CI->getArgOperand(3), Rep, 942 CI->getArgOperand(2)); 943 } else { 944 llvm_unreachable("Unknown function for CallInst upgrade."); 945 } 946 947 if (Rep) 948 CI->replaceAllUsesWith(Rep); 949 CI->eraseFromParent(); 950 return; 951 } 952 953 std::string Name = CI->getName(); 954 if (!Name.empty()) 955 CI->setName(Name + ".old"); 956 957 switch (NewFn->getIntrinsicID()) { 958 default: 959 llvm_unreachable("Unknown function for CallInst upgrade."); 960 961 case Intrinsic::arm_neon_vld1: 962 case Intrinsic::arm_neon_vld2: 963 case Intrinsic::arm_neon_vld3: 964 case Intrinsic::arm_neon_vld4: 965 case Intrinsic::arm_neon_vld2lane: 966 case Intrinsic::arm_neon_vld3lane: 967 case Intrinsic::arm_neon_vld4lane: 968 case Intrinsic::arm_neon_vst1: 969 case Intrinsic::arm_neon_vst2: 970 case Intrinsic::arm_neon_vst3: 971 case Intrinsic::arm_neon_vst4: 972 case Intrinsic::arm_neon_vst2lane: 973 case Intrinsic::arm_neon_vst3lane: 974 case Intrinsic::arm_neon_vst4lane: { 975 SmallVector<Value *, 4> Args(CI->arg_operands().begin(), 976 CI->arg_operands().end()); 977 CI->replaceAllUsesWith(Builder.CreateCall(NewFn, Args)); 978 CI->eraseFromParent(); 979 return; 980 } 981 982 case Intrinsic::ctlz: 983 case Intrinsic::cttz: 984 assert(CI->getNumArgOperands() == 1 && 985 "Mismatch between function args and call args"); 986 CI->replaceAllUsesWith(Builder.CreateCall( 987 NewFn, {CI->getArgOperand(0), Builder.getFalse()}, Name)); 988 CI->eraseFromParent(); 989 return; 990 991 case Intrinsic::objectsize: 992 CI->replaceAllUsesWith(Builder.CreateCall( 993 NewFn, {CI->getArgOperand(0), CI->getArgOperand(1)}, Name)); 994 CI->eraseFromParent(); 995 return; 996 997 case Intrinsic::ctpop: { 998 CI->replaceAllUsesWith(Builder.CreateCall(NewFn, {CI->getArgOperand(0)})); 999 CI->eraseFromParent(); 1000 return; 1001 } 1002 1003 case Intrinsic::x86_xop_vfrcz_ss: 1004 case Intrinsic::x86_xop_vfrcz_sd: 1005 CI->replaceAllUsesWith( 1006 Builder.CreateCall(NewFn, {CI->getArgOperand(1)}, Name)); 1007 CI->eraseFromParent(); 1008 return; 1009 1010 case Intrinsic::x86_xop_vpermil2pd: 1011 case Intrinsic::x86_xop_vpermil2ps: 1012 case Intrinsic::x86_xop_vpermil2pd_256: 1013 case Intrinsic::x86_xop_vpermil2ps_256: { 1014 SmallVector<Value *, 4> Args(CI->arg_operands().begin(), 1015 CI->arg_operands().end()); 1016 VectorType *FltIdxTy = cast<VectorType>(Args[2]->getType()); 1017 VectorType *IntIdxTy = VectorType::getInteger(FltIdxTy); 1018 Args[2] = Builder.CreateBitCast(Args[2], IntIdxTy); 1019 CI->replaceAllUsesWith(Builder.CreateCall(NewFn, Args, Name)); 1020 CI->eraseFromParent(); 1021 return; 1022 } 1023 1024 case Intrinsic::x86_sse41_ptestc: 1025 case Intrinsic::x86_sse41_ptestz: 1026 case Intrinsic::x86_sse41_ptestnzc: { 1027 // The arguments for these intrinsics used to be v4f32, and changed 1028 // to v2i64. This is purely a nop, since those are bitwise intrinsics. 1029 // So, the only thing required is a bitcast for both arguments. 1030 // First, check the arguments have the old type. 1031 Value *Arg0 = CI->getArgOperand(0); 1032 if (Arg0->getType() != VectorType::get(Type::getFloatTy(C), 4)) 1033 return; 1034 1035 // Old intrinsic, add bitcasts 1036 Value *Arg1 = CI->getArgOperand(1); 1037 1038 Type *NewVecTy = VectorType::get(Type::getInt64Ty(C), 2); 1039 1040 Value *BC0 = Builder.CreateBitCast(Arg0, NewVecTy, "cast"); 1041 Value *BC1 = Builder.CreateBitCast(Arg1, NewVecTy, "cast"); 1042 1043 CallInst *NewCall = Builder.CreateCall(NewFn, {BC0, BC1}, Name); 1044 CI->replaceAllUsesWith(NewCall); 1045 CI->eraseFromParent(); 1046 return; 1047 } 1048 1049 case Intrinsic::x86_sse41_insertps: 1050 case Intrinsic::x86_sse41_dppd: 1051 case Intrinsic::x86_sse41_dpps: 1052 case Intrinsic::x86_sse41_mpsadbw: 1053 case Intrinsic::x86_avx_dp_ps_256: 1054 case Intrinsic::x86_avx2_mpsadbw: { 1055 // Need to truncate the last argument from i32 to i8 -- this argument models 1056 // an inherently 8-bit immediate operand to these x86 instructions. 1057 SmallVector<Value *, 4> Args(CI->arg_operands().begin(), 1058 CI->arg_operands().end()); 1059 1060 // Replace the last argument with a trunc. 1061 Args.back() = Builder.CreateTrunc(Args.back(), Type::getInt8Ty(C), "trunc"); 1062 1063 CallInst *NewCall = Builder.CreateCall(NewFn, Args); 1064 CI->replaceAllUsesWith(NewCall); 1065 CI->eraseFromParent(); 1066 return; 1067 } 1068 1069 case Intrinsic::thread_pointer: { 1070 CI->replaceAllUsesWith(Builder.CreateCall(NewFn, {})); 1071 CI->eraseFromParent(); 1072 return; 1073 } 1074 } 1075 } 1076 1077 void llvm::UpgradeCallsToIntrinsic(Function *F) { 1078 assert(F && "Illegal attempt to upgrade a non-existent intrinsic."); 1079 1080 // Check if this function should be upgraded and get the replacement function 1081 // if there is one. 1082 Function *NewFn; 1083 if (UpgradeIntrinsicFunction(F, NewFn)) { 1084 // Replace all users of the old function with the new function or new 1085 // instructions. This is not a range loop because the call is deleted. 1086 for (auto UI = F->user_begin(), UE = F->user_end(); UI != UE; ) 1087 if (CallInst *CI = dyn_cast<CallInst>(*UI++)) 1088 UpgradeIntrinsicCall(CI, NewFn); 1089 1090 // Remove old function, no longer used, from the module. 1091 F->eraseFromParent(); 1092 } 1093 } 1094 1095 void llvm::UpgradeInstWithTBAATag(Instruction *I) { 1096 MDNode *MD = I->getMetadata(LLVMContext::MD_tbaa); 1097 assert(MD && "UpgradeInstWithTBAATag should have a TBAA tag"); 1098 // Check if the tag uses struct-path aware TBAA format. 1099 if (isa<MDNode>(MD->getOperand(0)) && MD->getNumOperands() >= 3) 1100 return; 1101 1102 if (MD->getNumOperands() == 3) { 1103 Metadata *Elts[] = {MD->getOperand(0), MD->getOperand(1)}; 1104 MDNode *ScalarType = MDNode::get(I->getContext(), Elts); 1105 // Create a MDNode <ScalarType, ScalarType, offset 0, const> 1106 Metadata *Elts2[] = {ScalarType, ScalarType, 1107 ConstantAsMetadata::get(Constant::getNullValue( 1108 Type::getInt64Ty(I->getContext()))), 1109 MD->getOperand(2)}; 1110 I->setMetadata(LLVMContext::MD_tbaa, MDNode::get(I->getContext(), Elts2)); 1111 } else { 1112 // Create a MDNode <MD, MD, offset 0> 1113 Metadata *Elts[] = {MD, MD, ConstantAsMetadata::get(Constant::getNullValue( 1114 Type::getInt64Ty(I->getContext())))}; 1115 I->setMetadata(LLVMContext::MD_tbaa, MDNode::get(I->getContext(), Elts)); 1116 } 1117 } 1118 1119 Instruction *llvm::UpgradeBitCastInst(unsigned Opc, Value *V, Type *DestTy, 1120 Instruction *&Temp) { 1121 if (Opc != Instruction::BitCast) 1122 return nullptr; 1123 1124 Temp = nullptr; 1125 Type *SrcTy = V->getType(); 1126 if (SrcTy->isPtrOrPtrVectorTy() && DestTy->isPtrOrPtrVectorTy() && 1127 SrcTy->getPointerAddressSpace() != DestTy->getPointerAddressSpace()) { 1128 LLVMContext &Context = V->getContext(); 1129 1130 // We have no information about target data layout, so we assume that 1131 // the maximum pointer size is 64bit. 1132 Type *MidTy = Type::getInt64Ty(Context); 1133 Temp = CastInst::Create(Instruction::PtrToInt, V, MidTy); 1134 1135 return CastInst::Create(Instruction::IntToPtr, Temp, DestTy); 1136 } 1137 1138 return nullptr; 1139 } 1140 1141 Value *llvm::UpgradeBitCastExpr(unsigned Opc, Constant *C, Type *DestTy) { 1142 if (Opc != Instruction::BitCast) 1143 return nullptr; 1144 1145 Type *SrcTy = C->getType(); 1146 if (SrcTy->isPtrOrPtrVectorTy() && DestTy->isPtrOrPtrVectorTy() && 1147 SrcTy->getPointerAddressSpace() != DestTy->getPointerAddressSpace()) { 1148 LLVMContext &Context = C->getContext(); 1149 1150 // We have no information about target data layout, so we assume that 1151 // the maximum pointer size is 64bit. 1152 Type *MidTy = Type::getInt64Ty(Context); 1153 1154 return ConstantExpr::getIntToPtr(ConstantExpr::getPtrToInt(C, MidTy), 1155 DestTy); 1156 } 1157 1158 return nullptr; 1159 } 1160 1161 /// Check the debug info version number, if it is out-dated, drop the debug 1162 /// info. Return true if module is modified. 1163 bool llvm::UpgradeDebugInfo(Module &M) { 1164 unsigned Version = getDebugMetadataVersionFromModule(M); 1165 if (Version == DEBUG_METADATA_VERSION) 1166 return false; 1167 1168 bool RetCode = StripDebugInfo(M); 1169 if (RetCode) { 1170 DiagnosticInfoDebugMetadataVersion DiagVersion(M, Version); 1171 M.getContext().diagnose(DiagVersion); 1172 } 1173 return RetCode; 1174 } 1175 1176 bool llvm::UpgradeModuleFlags(Module &M) { 1177 const NamedMDNode *ModFlags = M.getModuleFlagsMetadata(); 1178 if (!ModFlags) 1179 return false; 1180 1181 bool HasObjCFlag = false, HasClassProperties = false; 1182 for (unsigned I = 0, E = ModFlags->getNumOperands(); I != E; ++I) { 1183 MDNode *Op = ModFlags->getOperand(I); 1184 if (Op->getNumOperands() < 2) 1185 continue; 1186 MDString *ID = dyn_cast_or_null<MDString>(Op->getOperand(1)); 1187 if (!ID) 1188 continue; 1189 if (ID->getString() == "Objective-C Image Info Version") 1190 HasObjCFlag = true; 1191 if (ID->getString() == "Objective-C Class Properties") 1192 HasClassProperties = true; 1193 } 1194 // "Objective-C Class Properties" is recently added for Objective-C. We 1195 // upgrade ObjC bitcodes to contain a "Objective-C Class Properties" module 1196 // flag of value 0, so we can correclty report error when trying to link 1197 // an ObjC bitcode without this module flag with an ObjC bitcode with this 1198 // module flag. 1199 if (HasObjCFlag && !HasClassProperties) { 1200 M.addModuleFlag(llvm::Module::Error, "Objective-C Class Properties", 1201 (uint32_t)0); 1202 return true; 1203 } 1204 return false; 1205 } 1206 1207 static bool isOldLoopArgument(Metadata *MD) { 1208 auto *T = dyn_cast_or_null<MDTuple>(MD); 1209 if (!T) 1210 return false; 1211 if (T->getNumOperands() < 1) 1212 return false; 1213 auto *S = dyn_cast_or_null<MDString>(T->getOperand(0)); 1214 if (!S) 1215 return false; 1216 return S->getString().startswith("llvm.vectorizer."); 1217 } 1218 1219 static MDString *upgradeLoopTag(LLVMContext &C, StringRef OldTag) { 1220 StringRef OldPrefix = "llvm.vectorizer."; 1221 assert(OldTag.startswith(OldPrefix) && "Expected old prefix"); 1222 1223 if (OldTag == "llvm.vectorizer.unroll") 1224 return MDString::get(C, "llvm.loop.interleave.count"); 1225 1226 return MDString::get( 1227 C, (Twine("llvm.loop.vectorize.") + OldTag.drop_front(OldPrefix.size())) 1228 .str()); 1229 } 1230 1231 static Metadata *upgradeLoopArgument(Metadata *MD) { 1232 auto *T = dyn_cast_or_null<MDTuple>(MD); 1233 if (!T) 1234 return MD; 1235 if (T->getNumOperands() < 1) 1236 return MD; 1237 auto *OldTag = dyn_cast_or_null<MDString>(T->getOperand(0)); 1238 if (!OldTag) 1239 return MD; 1240 if (!OldTag->getString().startswith("llvm.vectorizer.")) 1241 return MD; 1242 1243 // This has an old tag. Upgrade it. 1244 SmallVector<Metadata *, 8> Ops; 1245 Ops.reserve(T->getNumOperands()); 1246 Ops.push_back(upgradeLoopTag(T->getContext(), OldTag->getString())); 1247 for (unsigned I = 1, E = T->getNumOperands(); I != E; ++I) 1248 Ops.push_back(T->getOperand(I)); 1249 1250 return MDTuple::get(T->getContext(), Ops); 1251 } 1252 1253 MDNode *llvm::upgradeInstructionLoopAttachment(MDNode &N) { 1254 auto *T = dyn_cast<MDTuple>(&N); 1255 if (!T) 1256 return &N; 1257 1258 if (!llvm::any_of(T->operands(), isOldLoopArgument)) 1259 return &N; 1260 1261 SmallVector<Metadata *, 8> Ops; 1262 Ops.reserve(T->getNumOperands()); 1263 for (Metadata *MD : T->operands()) 1264 Ops.push_back(upgradeLoopArgument(MD)); 1265 1266 return MDTuple::get(T->getContext(), Ops); 1267 } 1268