1 //===-- AutoUpgrade.cpp - Implement auto-upgrade helper functions ---------===// 2 // 3 // The LLVM Compiler Infrastructure 4 // 5 // This file is distributed under the University of Illinois Open Source 6 // License. See LICENSE.TXT for details. 7 // 8 //===----------------------------------------------------------------------===// 9 // 10 // This file implements the auto-upgrade helper functions. 11 // This is where deprecated IR intrinsics and other IR features are updated to 12 // current specifications. 13 // 14 //===----------------------------------------------------------------------===// 15 16 #include "llvm/IR/AutoUpgrade.h" 17 #include "llvm/IR/CFG.h" 18 #include "llvm/IR/CallSite.h" 19 #include "llvm/IR/Constants.h" 20 #include "llvm/IR/DIBuilder.h" 21 #include "llvm/IR/DebugInfo.h" 22 #include "llvm/IR/DiagnosticInfo.h" 23 #include "llvm/IR/Function.h" 24 #include "llvm/IR/IRBuilder.h" 25 #include "llvm/IR/Instruction.h" 26 #include "llvm/IR/IntrinsicInst.h" 27 #include "llvm/IR/LLVMContext.h" 28 #include "llvm/IR/Module.h" 29 #include "llvm/Support/ErrorHandling.h" 30 #include "llvm/Support/Regex.h" 31 #include <cstring> 32 using namespace llvm; 33 34 // Upgrade the declarations of the SSE4.1 functions whose arguments have 35 // changed their type from v4f32 to v2i64. 36 static bool UpgradeSSE41Function(Function* F, Intrinsic::ID IID, 37 Function *&NewFn) { 38 // Check whether this is an old version of the function, which received 39 // v4f32 arguments. 40 Type *Arg0Type = F->getFunctionType()->getParamType(0); 41 if (Arg0Type != VectorType::get(Type::getFloatTy(F->getContext()), 4)) 42 return false; 43 44 // Yes, it's old, replace it with new version. 45 F->setName(F->getName() + ".old"); 46 NewFn = Intrinsic::getDeclaration(F->getParent(), IID); 47 return true; 48 } 49 50 // Upgrade the declarations of intrinsic functions whose 8-bit immediate mask 51 // arguments have changed their type from i32 to i8. 52 static bool UpgradeX86IntrinsicsWith8BitMask(Function *F, Intrinsic::ID IID, 53 Function *&NewFn) { 54 // Check that the last argument is an i32. 55 Type *LastArgType = F->getFunctionType()->getParamType( 56 F->getFunctionType()->getNumParams() - 1); 57 if (!LastArgType->isIntegerTy(32)) 58 return false; 59 60 // Move this function aside and map down. 61 F->setName(F->getName() + ".old"); 62 NewFn = Intrinsic::getDeclaration(F->getParent(), IID); 63 return true; 64 } 65 66 static bool UpgradeIntrinsicFunction1(Function *F, Function *&NewFn) { 67 assert(F && "Illegal to upgrade a non-existent Function."); 68 69 // Quickly eliminate it, if it's not a candidate. 70 StringRef Name = F->getName(); 71 if (Name.size() <= 8 || !Name.startswith("llvm.")) 72 return false; 73 Name = Name.substr(5); // Strip off "llvm." 74 75 switch (Name[0]) { 76 default: break; 77 case 'a': { 78 if (Name.startswith("arm.neon.vclz")) { 79 Type* args[2] = { 80 F->arg_begin()->getType(), 81 Type::getInt1Ty(F->getContext()) 82 }; 83 // Can't use Intrinsic::getDeclaration here as it adds a ".i1" to 84 // the end of the name. Change name from llvm.arm.neon.vclz.* to 85 // llvm.ctlz.* 86 FunctionType* fType = FunctionType::get(F->getReturnType(), args, false); 87 NewFn = Function::Create(fType, F->getLinkage(), 88 "llvm.ctlz." + Name.substr(14), F->getParent()); 89 return true; 90 } 91 if (Name.startswith("arm.neon.vcnt")) { 92 NewFn = Intrinsic::getDeclaration(F->getParent(), Intrinsic::ctpop, 93 F->arg_begin()->getType()); 94 return true; 95 } 96 Regex vldRegex("^arm\\.neon\\.vld([1234]|[234]lane)\\.v[a-z0-9]*$"); 97 if (vldRegex.match(Name)) { 98 auto fArgs = F->getFunctionType()->params(); 99 SmallVector<Type *, 4> Tys(fArgs.begin(), fArgs.end()); 100 // Can't use Intrinsic::getDeclaration here as the return types might 101 // then only be structurally equal. 102 FunctionType* fType = FunctionType::get(F->getReturnType(), Tys, false); 103 NewFn = Function::Create(fType, F->getLinkage(), 104 "llvm." + Name + ".p0i8", F->getParent()); 105 return true; 106 } 107 Regex vstRegex("^arm\\.neon\\.vst([1234]|[234]lane)\\.v[a-z0-9]*$"); 108 if (vstRegex.match(Name)) { 109 static const Intrinsic::ID StoreInts[] = {Intrinsic::arm_neon_vst1, 110 Intrinsic::arm_neon_vst2, 111 Intrinsic::arm_neon_vst3, 112 Intrinsic::arm_neon_vst4}; 113 114 static const Intrinsic::ID StoreLaneInts[] = { 115 Intrinsic::arm_neon_vst2lane, Intrinsic::arm_neon_vst3lane, 116 Intrinsic::arm_neon_vst4lane 117 }; 118 119 auto fArgs = F->getFunctionType()->params(); 120 Type *Tys[] = {fArgs[0], fArgs[1]}; 121 if (Name.find("lane") == StringRef::npos) 122 NewFn = Intrinsic::getDeclaration(F->getParent(), 123 StoreInts[fArgs.size() - 3], Tys); 124 else 125 NewFn = Intrinsic::getDeclaration(F->getParent(), 126 StoreLaneInts[fArgs.size() - 5], Tys); 127 return true; 128 } 129 if (Name == "aarch64.thread.pointer" || Name == "arm.thread.pointer") { 130 NewFn = Intrinsic::getDeclaration(F->getParent(), Intrinsic::thread_pointer); 131 return true; 132 } 133 break; 134 } 135 136 case 'c': { 137 if (Name.startswith("ctlz.") && F->arg_size() == 1) { 138 F->setName(Name + ".old"); 139 NewFn = Intrinsic::getDeclaration(F->getParent(), Intrinsic::ctlz, 140 F->arg_begin()->getType()); 141 return true; 142 } 143 if (Name.startswith("cttz.") && F->arg_size() == 1) { 144 F->setName(Name + ".old"); 145 NewFn = Intrinsic::getDeclaration(F->getParent(), Intrinsic::cttz, 146 F->arg_begin()->getType()); 147 return true; 148 } 149 break; 150 } 151 152 case 'o': 153 // We only need to change the name to match the mangling including the 154 // address space. 155 if (F->arg_size() == 2 && Name.startswith("objectsize.")) { 156 Type *Tys[2] = { F->getReturnType(), F->arg_begin()->getType() }; 157 if (F->getName() != Intrinsic::getName(Intrinsic::objectsize, Tys)) { 158 F->setName(Name + ".old"); 159 NewFn = Intrinsic::getDeclaration(F->getParent(), 160 Intrinsic::objectsize, Tys); 161 return true; 162 } 163 } 164 break; 165 166 case 's': 167 if (Name == "stackprotectorcheck") { 168 NewFn = nullptr; 169 return true; 170 } 171 172 case 'x': { 173 if (Name.startswith("x86.sse2.pcmpeq.") || 174 Name.startswith("x86.sse2.pcmpgt.") || 175 Name.startswith("x86.avx2.pcmpeq.") || 176 Name.startswith("x86.avx2.pcmpgt.") || 177 Name.startswith("x86.avx2.vbroadcast") || 178 Name.startswith("x86.avx2.pbroadcast") || 179 Name.startswith("x86.avx.vpermil.") || 180 Name.startswith("x86.sse41.pmovsx") || 181 Name.startswith("x86.sse41.pmovzx") || 182 Name.startswith("x86.avx2.pmovsx") || 183 Name.startswith("x86.avx2.pmovzx") || 184 Name == "x86.sse2.cvtdq2pd" || 185 Name == "x86.sse2.cvtps2pd" || 186 Name == "x86.avx.cvtdq2.pd.256" || 187 Name == "x86.avx.cvt.ps2.pd.256" || 188 Name == "x86.sse2.cvttps2dq" || 189 Name.startswith("x86.avx.cvtt.") || 190 Name.startswith("x86.avx.vinsertf128.") || 191 Name == "x86.avx2.vinserti128" || 192 Name.startswith("x86.avx.vextractf128.") || 193 Name == "x86.avx2.vextracti128" || 194 Name.startswith("x86.avx.movnt.") || 195 Name == "x86.sse2.storel.dq" || 196 Name.startswith("x86.sse.storeu.") || 197 Name.startswith("x86.sse2.storeu.") || 198 Name.startswith("x86.avx.storeu.") || 199 Name.startswith("x86.avx512.mask.storeu.p") || 200 Name.startswith("x86.avx512.mask.storeu.b.") || 201 Name.startswith("x86.avx512.mask.storeu.w.") || 202 Name.startswith("x86.avx512.mask.storeu.d.") || 203 Name.startswith("x86.avx512.mask.storeu.q.") || 204 Name.startswith("x86.avx512.mask.store.p") || 205 Name.startswith("x86.avx512.mask.store.b.") || 206 Name.startswith("x86.avx512.mask.store.w.") || 207 Name.startswith("x86.avx512.mask.store.d.") || 208 Name.startswith("x86.avx512.mask.store.q.") || 209 Name.startswith("x86.avx512.mask.loadu.p") || 210 Name.startswith("x86.avx512.mask.loadu.b.") || 211 Name.startswith("x86.avx512.mask.loadu.w.") || 212 Name.startswith("x86.avx512.mask.loadu.d.") || 213 Name.startswith("x86.avx512.mask.loadu.q.") || 214 Name.startswith("x86.avx512.mask.load.p") || 215 Name.startswith("x86.avx512.mask.load.b.") || 216 Name.startswith("x86.avx512.mask.load.w.") || 217 Name.startswith("x86.avx512.mask.load.d.") || 218 Name.startswith("x86.avx512.mask.load.q.") || 219 Name == "x86.sse42.crc32.64.8" || 220 Name.startswith("x86.avx.vbroadcast.s") || 221 Name.startswith("x86.avx512.mask.palignr.") || 222 Name.startswith("x86.sse2.psll.dq") || 223 Name.startswith("x86.sse2.psrl.dq") || 224 Name.startswith("x86.avx2.psll.dq") || 225 Name.startswith("x86.avx2.psrl.dq") || 226 Name == "x86.sse41.pblendw" || 227 Name.startswith("x86.sse41.blendp") || 228 Name.startswith("x86.avx.blend.p") || 229 Name == "x86.avx2.pblendw" || 230 Name.startswith("x86.avx2.pblendd.") || 231 Name == "x86.avx2.vbroadcasti128" || 232 Name == "x86.xop.vpcmov" || 233 (Name.startswith("x86.xop.vpcom") && F->arg_size() == 2)) { 234 NewFn = nullptr; 235 return true; 236 } 237 // SSE4.1 ptest functions may have an old signature. 238 if (Name.startswith("x86.sse41.ptest")) { 239 if (Name == "x86.sse41.ptestc") 240 return UpgradeSSE41Function(F, Intrinsic::x86_sse41_ptestc, NewFn); 241 if (Name == "x86.sse41.ptestz") 242 return UpgradeSSE41Function(F, Intrinsic::x86_sse41_ptestz, NewFn); 243 if (Name == "x86.sse41.ptestnzc") 244 return UpgradeSSE41Function(F, Intrinsic::x86_sse41_ptestnzc, NewFn); 245 } 246 // Several blend and other instructions with masks used the wrong number of 247 // bits. 248 if (Name == "x86.sse41.insertps") 249 return UpgradeX86IntrinsicsWith8BitMask(F, Intrinsic::x86_sse41_insertps, 250 NewFn); 251 if (Name == "x86.sse41.dppd") 252 return UpgradeX86IntrinsicsWith8BitMask(F, Intrinsic::x86_sse41_dppd, 253 NewFn); 254 if (Name == "x86.sse41.dpps") 255 return UpgradeX86IntrinsicsWith8BitMask(F, Intrinsic::x86_sse41_dpps, 256 NewFn); 257 if (Name == "x86.sse41.mpsadbw") 258 return UpgradeX86IntrinsicsWith8BitMask(F, Intrinsic::x86_sse41_mpsadbw, 259 NewFn); 260 if (Name == "x86.avx.dp.ps.256") 261 return UpgradeX86IntrinsicsWith8BitMask(F, Intrinsic::x86_avx_dp_ps_256, 262 NewFn); 263 if (Name == "x86.avx2.mpsadbw") 264 return UpgradeX86IntrinsicsWith8BitMask(F, Intrinsic::x86_avx2_mpsadbw, 265 NewFn); 266 267 // frcz.ss/sd may need to have an argument dropped 268 if (Name.startswith("x86.xop.vfrcz.ss") && F->arg_size() == 2) { 269 F->setName(Name + ".old"); 270 NewFn = Intrinsic::getDeclaration(F->getParent(), 271 Intrinsic::x86_xop_vfrcz_ss); 272 return true; 273 } 274 if (Name.startswith("x86.xop.vfrcz.sd") && F->arg_size() == 2) { 275 F->setName(Name + ".old"); 276 NewFn = Intrinsic::getDeclaration(F->getParent(), 277 Intrinsic::x86_xop_vfrcz_sd); 278 return true; 279 } 280 // Fix the FMA4 intrinsics to remove the 4 281 if (Name.startswith("x86.fma4.")) { 282 F->setName("llvm.x86.fma" + Name.substr(8)); 283 NewFn = F; 284 return true; 285 } 286 // Upgrade any XOP PERMIL2 index operand still using a float/double vector. 287 if (Name.startswith("x86.xop.vpermil2")) { 288 auto Params = F->getFunctionType()->params(); 289 auto Idx = Params[2]; 290 if (Idx->getScalarType()->isFloatingPointTy()) { 291 F->setName(Name + ".old"); 292 unsigned IdxSize = Idx->getPrimitiveSizeInBits(); 293 unsigned EltSize = Idx->getScalarSizeInBits(); 294 Intrinsic::ID Permil2ID; 295 if (EltSize == 64 && IdxSize == 128) 296 Permil2ID = Intrinsic::x86_xop_vpermil2pd; 297 else if (EltSize == 32 && IdxSize == 128) 298 Permil2ID = Intrinsic::x86_xop_vpermil2ps; 299 else if (EltSize == 64 && IdxSize == 256) 300 Permil2ID = Intrinsic::x86_xop_vpermil2pd_256; 301 else 302 Permil2ID = Intrinsic::x86_xop_vpermil2ps_256; 303 NewFn = Intrinsic::getDeclaration(F->getParent(), Permil2ID); 304 return true; 305 } 306 } 307 break; 308 } 309 } 310 311 // This may not belong here. This function is effectively being overloaded 312 // to both detect an intrinsic which needs upgrading, and to provide the 313 // upgraded form of the intrinsic. We should perhaps have two separate 314 // functions for this. 315 return false; 316 } 317 318 bool llvm::UpgradeIntrinsicFunction(Function *F, Function *&NewFn) { 319 NewFn = nullptr; 320 bool Upgraded = UpgradeIntrinsicFunction1(F, NewFn); 321 assert(F != NewFn && "Intrinsic function upgraded to the same function"); 322 323 // Upgrade intrinsic attributes. This does not change the function. 324 if (NewFn) 325 F = NewFn; 326 if (Intrinsic::ID id = F->getIntrinsicID()) 327 F->setAttributes(Intrinsic::getAttributes(F->getContext(), id)); 328 return Upgraded; 329 } 330 331 bool llvm::UpgradeGlobalVariable(GlobalVariable *GV) { 332 // Nothing to do yet. 333 return false; 334 } 335 336 // Handles upgrading SSE2 and AVX2 PSLLDQ intrinsics by converting them 337 // to byte shuffles. 338 static Value *UpgradeX86PSLLDQIntrinsics(IRBuilder<> &Builder, LLVMContext &C, 339 Value *Op, unsigned Shift) { 340 Type *ResultTy = Op->getType(); 341 unsigned NumElts = ResultTy->getVectorNumElements() * 8; 342 343 // Bitcast from a 64-bit element type to a byte element type. 344 Type *VecTy = VectorType::get(Type::getInt8Ty(C), NumElts); 345 Op = Builder.CreateBitCast(Op, VecTy, "cast"); 346 347 // We'll be shuffling in zeroes. 348 Value *Res = Constant::getNullValue(VecTy); 349 350 // If shift is less than 16, emit a shuffle to move the bytes. Otherwise, 351 // we'll just return the zero vector. 352 if (Shift < 16) { 353 int Idxs[32]; 354 // 256-bit version is split into two 16-byte lanes. 355 for (unsigned l = 0; l != NumElts; l += 16) 356 for (unsigned i = 0; i != 16; ++i) { 357 unsigned Idx = NumElts + i - Shift; 358 if (Idx < NumElts) 359 Idx -= NumElts - 16; // end of lane, switch operand. 360 Idxs[l + i] = Idx + l; 361 } 362 363 Res = Builder.CreateShuffleVector(Res, Op, makeArrayRef(Idxs, NumElts)); 364 } 365 366 // Bitcast back to a 64-bit element type. 367 return Builder.CreateBitCast(Res, ResultTy, "cast"); 368 } 369 370 static Value *UpgradeX86PALIGNRIntrinsics(IRBuilder<> &Builder, LLVMContext &C, 371 Value *Op0, Value *Op1, Value *Shift, 372 Value *Passthru, Value *Mask) { 373 unsigned ShiftVal = cast<llvm::ConstantInt>(Shift)->getZExtValue(); 374 375 unsigned NumElts = Op0->getType()->getVectorNumElements(); 376 assert(NumElts % 16 == 0); 377 378 // If palignr is shifting the pair of vectors more than the size of two 379 // lanes, emit zero. 380 if (ShiftVal >= 32) 381 return llvm::Constant::getNullValue(Op0->getType()); 382 383 // If palignr is shifting the pair of input vectors more than one lane, 384 // but less than two lanes, convert to shifting in zeroes. 385 if (ShiftVal > 16) { 386 ShiftVal -= 16; 387 Op1 = Op0; 388 Op0 = llvm::Constant::getNullValue(Op0->getType()); 389 } 390 391 int Indices[64]; 392 // 256-bit palignr operates on 128-bit lanes so we need to handle that 393 for (unsigned l = 0; l != NumElts; l += 16) { 394 for (unsigned i = 0; i != 16; ++i) { 395 unsigned Idx = ShiftVal + i; 396 if (Idx >= 16) 397 Idx += NumElts - 16; // End of lane, switch operand. 398 Indices[l + i] = Idx + l; 399 } 400 } 401 402 Value *Align = Builder.CreateShuffleVector(Op1, Op0, 403 makeArrayRef(Indices, NumElts), 404 "palignr"); 405 406 // If the mask is all ones just emit the align operation. 407 if (const auto *C = dyn_cast<Constant>(Mask)) 408 if (C->isAllOnesValue()) 409 return Align; 410 411 llvm::VectorType *MaskTy = llvm::VectorType::get(Builder.getInt1Ty(), 412 NumElts); 413 Mask = Builder.CreateBitCast(Mask, MaskTy, "cast"); 414 return Builder.CreateSelect(Mask, Align, Passthru); 415 } 416 417 // Handles upgrading SSE2 and AVX2 PSRLDQ intrinsics by converting them 418 // to byte shuffles. 419 static Value *UpgradeX86PSRLDQIntrinsics(IRBuilder<> &Builder, LLVMContext &C, 420 Value *Op, 421 unsigned Shift) { 422 Type *ResultTy = Op->getType(); 423 unsigned NumElts = ResultTy->getVectorNumElements() * 8; 424 425 // Bitcast from a 64-bit element type to a byte element type. 426 Type *VecTy = VectorType::get(Type::getInt8Ty(C), NumElts); 427 Op = Builder.CreateBitCast(Op, VecTy, "cast"); 428 429 // We'll be shuffling in zeroes. 430 Value *Res = Constant::getNullValue(VecTy); 431 432 // If shift is less than 16, emit a shuffle to move the bytes. Otherwise, 433 // we'll just return the zero vector. 434 if (Shift < 16) { 435 int Idxs[32]; 436 // 256-bit version is split into two 16-byte lanes. 437 for (unsigned l = 0; l != NumElts; l += 16) 438 for (unsigned i = 0; i != 16; ++i) { 439 unsigned Idx = i + Shift; 440 if (Idx >= 16) 441 Idx += NumElts - 16; // end of lane, switch operand. 442 Idxs[l + i] = Idx + l; 443 } 444 445 Res = Builder.CreateShuffleVector(Op, Res, makeArrayRef(Idxs, NumElts)); 446 } 447 448 // Bitcast back to a 64-bit element type. 449 return Builder.CreateBitCast(Res, ResultTy, "cast"); 450 } 451 452 static Value *UpgradeMaskedStore(IRBuilder<> &Builder, LLVMContext &C, 453 Value *Ptr, Value *Data, Value *Mask, 454 bool Aligned) { 455 // Cast the pointer to the right type. 456 Ptr = Builder.CreateBitCast(Ptr, 457 llvm::PointerType::getUnqual(Data->getType())); 458 unsigned Align = 459 Aligned ? cast<VectorType>(Data->getType())->getBitWidth() / 8 : 1; 460 461 // If the mask is all ones just emit a regular store. 462 if (const auto *C = dyn_cast<Constant>(Mask)) 463 if (C->isAllOnesValue()) 464 return Builder.CreateAlignedStore(Data, Ptr, Align); 465 466 // Convert the mask from an integer type to a vector of i1. 467 unsigned NumElts = Data->getType()->getVectorNumElements(); 468 llvm::VectorType *MaskTy = llvm::VectorType::get(Builder.getInt1Ty(), 469 cast<IntegerType>(Mask->getType())->getBitWidth()); 470 Mask = Builder.CreateBitCast(Mask, MaskTy); 471 472 // If we have less than 8 elements, then the starting mask was an i8 and 473 // we need to extract down to the right number of elements. 474 if (NumElts < 8) { 475 int Indices[4]; 476 for (unsigned i = 0; i != NumElts; ++i) 477 Indices[i] = i; 478 Mask = Builder.CreateShuffleVector(Mask, Mask, 479 makeArrayRef(Indices, NumElts), 480 "extract"); 481 } 482 483 return Builder.CreateMaskedStore(Data, Ptr, Align, Mask); 484 } 485 486 static Value *UpgradeMaskedLoad(IRBuilder<> &Builder, LLVMContext &C, 487 Value *Ptr, Value *Passthru, Value *Mask, 488 bool Aligned) { 489 // Cast the pointer to the right type. 490 Ptr = Builder.CreateBitCast(Ptr, 491 llvm::PointerType::getUnqual(Passthru->getType())); 492 unsigned Align = 493 Aligned ? cast<VectorType>(Passthru->getType())->getBitWidth() / 8 : 1; 494 495 // If the mask is all ones just emit a regular store. 496 if (const auto *C = dyn_cast<Constant>(Mask)) 497 if (C->isAllOnesValue()) 498 return Builder.CreateAlignedLoad(Ptr, Align); 499 500 // Convert the mask from an integer type to a vector of i1. 501 unsigned NumElts = Passthru->getType()->getVectorNumElements(); 502 llvm::VectorType *MaskTy = llvm::VectorType::get(Builder.getInt1Ty(), 503 cast<IntegerType>(Mask->getType())->getBitWidth()); 504 Mask = Builder.CreateBitCast(Mask, MaskTy); 505 506 // If we have less than 8 elements, then the starting mask was an i8 and 507 // we need to extract down to the right number of elements. 508 if (NumElts < 8) { 509 int Indices[4]; 510 for (unsigned i = 0; i != NumElts; ++i) 511 Indices[i] = i; 512 Mask = Builder.CreateShuffleVector(Mask, Mask, 513 makeArrayRef(Indices, NumElts), 514 "extract"); 515 } 516 517 return Builder.CreateMaskedLoad(Ptr, Align, Mask, Passthru); 518 } 519 520 // UpgradeIntrinsicCall - Upgrade a call to an old intrinsic to be a call the 521 // upgraded intrinsic. All argument and return casting must be provided in 522 // order to seamlessly integrate with existing context. 523 void llvm::UpgradeIntrinsicCall(CallInst *CI, Function *NewFn) { 524 Function *F = CI->getCalledFunction(); 525 LLVMContext &C = CI->getContext(); 526 IRBuilder<> Builder(C); 527 Builder.SetInsertPoint(CI->getParent(), CI->getIterator()); 528 529 assert(F && "Intrinsic call is not direct?"); 530 531 if (!NewFn) { 532 // Get the Function's name. 533 StringRef Name = F->getName(); 534 535 Value *Rep; 536 // Upgrade packed integer vector compares intrinsics to compare instructions 537 if (Name.startswith("llvm.x86.sse2.pcmpeq.") || 538 Name.startswith("llvm.x86.avx2.pcmpeq.")) { 539 Rep = Builder.CreateICmpEQ(CI->getArgOperand(0), CI->getArgOperand(1), 540 "pcmpeq"); 541 // need to sign extend since icmp returns vector of i1 542 Rep = Builder.CreateSExt(Rep, CI->getType(), ""); 543 } else if (Name.startswith("llvm.x86.sse2.pcmpgt.") || 544 Name.startswith("llvm.x86.avx2.pcmpgt.")) { 545 Rep = Builder.CreateICmpSGT(CI->getArgOperand(0), CI->getArgOperand(1), 546 "pcmpgt"); 547 // need to sign extend since icmp returns vector of i1 548 Rep = Builder.CreateSExt(Rep, CI->getType(), ""); 549 } else if (Name == "llvm.x86.sse2.cvtdq2pd" || 550 Name == "llvm.x86.sse2.cvtps2pd" || 551 Name == "llvm.x86.avx.cvtdq2.pd.256" || 552 Name == "llvm.x86.avx.cvt.ps2.pd.256") { 553 // Lossless i32/float to double conversion. 554 // Extract the bottom elements if necessary and convert to double vector. 555 Value *Src = CI->getArgOperand(0); 556 VectorType *SrcTy = cast<VectorType>(Src->getType()); 557 VectorType *DstTy = cast<VectorType>(CI->getType()); 558 Rep = CI->getArgOperand(0); 559 560 unsigned NumDstElts = DstTy->getNumElements(); 561 if (NumDstElts < SrcTy->getNumElements()) { 562 assert(NumDstElts == 2 && "Unexpected vector size"); 563 const int ShuffleMask[2] = { 0, 1 }; 564 Rep = Builder.CreateShuffleVector(Rep, UndefValue::get(SrcTy), ShuffleMask); 565 } 566 567 bool Int2Double = (StringRef::npos != Name.find("cvtdq2")); 568 if (Int2Double) 569 Rep = Builder.CreateSIToFP(Rep, DstTy, "cvtdq2pd"); 570 else 571 Rep = Builder.CreateFPExt(Rep, DstTy, "cvtps2pd"); 572 } else if (Name == "llvm.x86.sse2.cvttps2dq" || 573 Name.startswith("llvm.x86.avx.cvtt.")) { 574 // Truncation (round to zero) float/double to i32 vector conversion. 575 Value *Src = CI->getArgOperand(0); 576 VectorType *DstTy = cast<VectorType>(CI->getType()); 577 Rep = Builder.CreateFPToSI(Src, DstTy, "cvtt"); 578 } else if (Name.startswith("llvm.x86.avx.movnt.")) { 579 Module *M = F->getParent(); 580 SmallVector<Metadata *, 1> Elts; 581 Elts.push_back( 582 ConstantAsMetadata::get(ConstantInt::get(Type::getInt32Ty(C), 1))); 583 MDNode *Node = MDNode::get(C, Elts); 584 585 Value *Arg0 = CI->getArgOperand(0); 586 Value *Arg1 = CI->getArgOperand(1); 587 588 // Convert the type of the pointer to a pointer to the stored type. 589 Value *BC = Builder.CreateBitCast(Arg0, 590 PointerType::getUnqual(Arg1->getType()), 591 "cast"); 592 StoreInst *SI = Builder.CreateAlignedStore(Arg1, BC, 32); 593 SI->setMetadata(M->getMDKindID("nontemporal"), Node); 594 595 // Remove intrinsic. 596 CI->eraseFromParent(); 597 return; 598 } else if (Name == "llvm.x86.sse2.storel.dq") { 599 Value *Arg0 = CI->getArgOperand(0); 600 Value *Arg1 = CI->getArgOperand(1); 601 602 Type *NewVecTy = VectorType::get(Type::getInt64Ty(C), 2); 603 Value *BC0 = Builder.CreateBitCast(Arg1, NewVecTy, "cast"); 604 Value *Elt = Builder.CreateExtractElement(BC0, (uint64_t)0); 605 Value *BC = Builder.CreateBitCast(Arg0, 606 PointerType::getUnqual(Elt->getType()), 607 "cast"); 608 Builder.CreateAlignedStore(Elt, BC, 1); 609 610 // Remove intrinsic. 611 CI->eraseFromParent(); 612 return; 613 } else if (Name.startswith("llvm.x86.sse.storeu.") || 614 Name.startswith("llvm.x86.sse2.storeu.") || 615 Name.startswith("llvm.x86.avx.storeu.")) { 616 Value *Arg0 = CI->getArgOperand(0); 617 Value *Arg1 = CI->getArgOperand(1); 618 619 Arg0 = Builder.CreateBitCast(Arg0, 620 PointerType::getUnqual(Arg1->getType()), 621 "cast"); 622 Builder.CreateAlignedStore(Arg1, Arg0, 1); 623 624 // Remove intrinsic. 625 CI->eraseFromParent(); 626 return; 627 } else if (Name.startswith("llvm.x86.avx512.mask.storeu.p") || 628 Name.startswith("llvm.x86.avx512.mask.storeu.b.") || 629 Name.startswith("llvm.x86.avx512.mask.storeu.w.") || 630 Name.startswith("llvm.x86.avx512.mask.storeu.d.") || 631 Name.startswith("llvm.x86.avx512.mask.storeu.q.")) { 632 UpgradeMaskedStore(Builder, C, CI->getArgOperand(0), CI->getArgOperand(1), 633 CI->getArgOperand(2), /*Aligned*/false); 634 635 // Remove intrinsic. 636 CI->eraseFromParent(); 637 return; 638 } else if (Name.startswith("llvm.x86.avx512.mask.store.p") || 639 Name.startswith("llvm.x86.avx512.mask.store.b.") || 640 Name.startswith("llvm.x86.avx512.mask.store.w.") || 641 Name.startswith("llvm.x86.avx512.mask.store.d.") || 642 Name.startswith("llvm.x86.avx512.mask.store.q.")) { 643 UpgradeMaskedStore(Builder, C, CI->getArgOperand(0), CI->getArgOperand(1), 644 CI->getArgOperand(2), /*Aligned*/true); 645 646 // Remove intrinsic. 647 CI->eraseFromParent(); 648 return; 649 } else if (Name.startswith("llvm.x86.avx512.mask.loadu.p") || 650 Name.startswith("llvm.x86.avx512.mask.loadu.b.") || 651 Name.startswith("llvm.x86.avx512.mask.loadu.w.") || 652 Name.startswith("llvm.x86.avx512.mask.loadu.d.") || 653 Name.startswith("llvm.x86.avx512.mask.loadu.q.")) { 654 Rep = UpgradeMaskedLoad(Builder, C, CI->getArgOperand(0), 655 CI->getArgOperand(1), CI->getArgOperand(2), 656 /*Aligned*/false); 657 } else if (Name.startswith("llvm.x86.avx512.mask.load.p") || 658 Name.startswith("llvm.x86.avx512.mask.load.b.") || 659 Name.startswith("llvm.x86.avx512.mask.load.w.") || 660 Name.startswith("llvm.x86.avx512.mask.load.d.") || 661 Name.startswith("llvm.x86.avx512.mask.load.q.")) { 662 Rep = UpgradeMaskedLoad(Builder, C, CI->getArgOperand(0), 663 CI->getArgOperand(1),CI->getArgOperand(2), 664 /*Aligned*/true); 665 } else if (Name.startswith("llvm.x86.xop.vpcom")) { 666 Intrinsic::ID intID; 667 if (Name.endswith("ub")) 668 intID = Intrinsic::x86_xop_vpcomub; 669 else if (Name.endswith("uw")) 670 intID = Intrinsic::x86_xop_vpcomuw; 671 else if (Name.endswith("ud")) 672 intID = Intrinsic::x86_xop_vpcomud; 673 else if (Name.endswith("uq")) 674 intID = Intrinsic::x86_xop_vpcomuq; 675 else if (Name.endswith("b")) 676 intID = Intrinsic::x86_xop_vpcomb; 677 else if (Name.endswith("w")) 678 intID = Intrinsic::x86_xop_vpcomw; 679 else if (Name.endswith("d")) 680 intID = Intrinsic::x86_xop_vpcomd; 681 else if (Name.endswith("q")) 682 intID = Intrinsic::x86_xop_vpcomq; 683 else 684 llvm_unreachable("Unknown suffix"); 685 686 Name = Name.substr(18); // strip off "llvm.x86.xop.vpcom" 687 unsigned Imm; 688 if (Name.startswith("lt")) 689 Imm = 0; 690 else if (Name.startswith("le")) 691 Imm = 1; 692 else if (Name.startswith("gt")) 693 Imm = 2; 694 else if (Name.startswith("ge")) 695 Imm = 3; 696 else if (Name.startswith("eq")) 697 Imm = 4; 698 else if (Name.startswith("ne")) 699 Imm = 5; 700 else if (Name.startswith("false")) 701 Imm = 6; 702 else if (Name.startswith("true")) 703 Imm = 7; 704 else 705 llvm_unreachable("Unknown condition"); 706 707 Function *VPCOM = Intrinsic::getDeclaration(F->getParent(), intID); 708 Rep = 709 Builder.CreateCall(VPCOM, {CI->getArgOperand(0), CI->getArgOperand(1), 710 Builder.getInt8(Imm)}); 711 } else if (Name == "llvm.x86.xop.vpcmov") { 712 Value *Arg0 = CI->getArgOperand(0); 713 Value *Arg1 = CI->getArgOperand(1); 714 Value *Sel = CI->getArgOperand(2); 715 unsigned NumElts = CI->getType()->getVectorNumElements(); 716 Constant *MinusOne = ConstantVector::getSplat(NumElts, Builder.getInt64(-1)); 717 Value *NotSel = Builder.CreateXor(Sel, MinusOne); 718 Value *Sel0 = Builder.CreateAnd(Arg0, Sel); 719 Value *Sel1 = Builder.CreateAnd(Arg1, NotSel); 720 Rep = Builder.CreateOr(Sel0, Sel1); 721 } else if (Name == "llvm.x86.sse42.crc32.64.8") { 722 Function *CRC32 = Intrinsic::getDeclaration(F->getParent(), 723 Intrinsic::x86_sse42_crc32_32_8); 724 Value *Trunc0 = Builder.CreateTrunc(CI->getArgOperand(0), Type::getInt32Ty(C)); 725 Rep = Builder.CreateCall(CRC32, {Trunc0, CI->getArgOperand(1)}); 726 Rep = Builder.CreateZExt(Rep, CI->getType(), ""); 727 } else if (Name.startswith("llvm.x86.avx.vbroadcast")) { 728 // Replace broadcasts with a series of insertelements. 729 Type *VecTy = CI->getType(); 730 Type *EltTy = VecTy->getVectorElementType(); 731 unsigned EltNum = VecTy->getVectorNumElements(); 732 Value *Cast = Builder.CreateBitCast(CI->getArgOperand(0), 733 EltTy->getPointerTo()); 734 Value *Load = Builder.CreateLoad(EltTy, Cast); 735 Type *I32Ty = Type::getInt32Ty(C); 736 Rep = UndefValue::get(VecTy); 737 for (unsigned I = 0; I < EltNum; ++I) 738 Rep = Builder.CreateInsertElement(Rep, Load, 739 ConstantInt::get(I32Ty, I)); 740 } else if (Name.startswith("llvm.x86.sse41.pmovsx") || 741 Name.startswith("llvm.x86.sse41.pmovzx") || 742 Name.startswith("llvm.x86.avx2.pmovsx") || 743 Name.startswith("llvm.x86.avx2.pmovzx")) { 744 VectorType *SrcTy = cast<VectorType>(CI->getArgOperand(0)->getType()); 745 VectorType *DstTy = cast<VectorType>(CI->getType()); 746 unsigned NumDstElts = DstTy->getNumElements(); 747 748 // Extract a subvector of the first NumDstElts lanes and sign/zero extend. 749 SmallVector<int, 8> ShuffleMask; 750 for (int i = 0; i != (int)NumDstElts; ++i) 751 ShuffleMask.push_back(i); 752 753 Value *SV = Builder.CreateShuffleVector( 754 CI->getArgOperand(0), UndefValue::get(SrcTy), ShuffleMask); 755 756 bool DoSext = (StringRef::npos != Name.find("pmovsx")); 757 Rep = DoSext ? Builder.CreateSExt(SV, DstTy) 758 : Builder.CreateZExt(SV, DstTy); 759 } else if (Name == "llvm.x86.avx2.vbroadcasti128") { 760 // Replace vbroadcasts with a vector shuffle. 761 Type *VT = VectorType::get(Type::getInt64Ty(C), 2); 762 Value *Op = Builder.CreatePointerCast(CI->getArgOperand(0), 763 PointerType::getUnqual(VT)); 764 Value *Load = Builder.CreateLoad(VT, Op); 765 const int Idxs[4] = { 0, 1, 0, 1 }; 766 Rep = Builder.CreateShuffleVector(Load, UndefValue::get(Load->getType()), 767 Idxs); 768 } else if (Name.startswith("llvm.x86.avx2.pbroadcast") || 769 Name.startswith("llvm.x86.avx2.vbroadcast")) { 770 // Replace vp?broadcasts with a vector shuffle. 771 Value *Op = CI->getArgOperand(0); 772 unsigned NumElts = CI->getType()->getVectorNumElements(); 773 Type *MaskTy = VectorType::get(Type::getInt32Ty(C), NumElts); 774 Rep = Builder.CreateShuffleVector(Op, UndefValue::get(Op->getType()), 775 Constant::getNullValue(MaskTy)); 776 } else if (Name.startswith("llvm.x86.avx512.mask.palignr.")) { 777 Rep = UpgradeX86PALIGNRIntrinsics(Builder, C, CI->getArgOperand(0), 778 CI->getArgOperand(1), 779 CI->getArgOperand(2), 780 CI->getArgOperand(3), 781 CI->getArgOperand(4)); 782 } else if (Name == "llvm.x86.sse2.psll.dq" || 783 Name == "llvm.x86.avx2.psll.dq") { 784 // 128/256-bit shift left specified in bits. 785 unsigned Shift = cast<ConstantInt>(CI->getArgOperand(1))->getZExtValue(); 786 Rep = UpgradeX86PSLLDQIntrinsics(Builder, C, CI->getArgOperand(0), 787 Shift / 8); // Shift is in bits. 788 } else if (Name == "llvm.x86.sse2.psrl.dq" || 789 Name == "llvm.x86.avx2.psrl.dq") { 790 // 128/256-bit shift right specified in bits. 791 unsigned Shift = cast<ConstantInt>(CI->getArgOperand(1))->getZExtValue(); 792 Rep = UpgradeX86PSRLDQIntrinsics(Builder, C, CI->getArgOperand(0), 793 Shift / 8); // Shift is in bits. 794 } else if (Name == "llvm.x86.sse2.psll.dq.bs" || 795 Name == "llvm.x86.avx2.psll.dq.bs") { 796 // 128/256-bit shift left specified in bytes. 797 unsigned Shift = cast<ConstantInt>(CI->getArgOperand(1))->getZExtValue(); 798 Rep = UpgradeX86PSLLDQIntrinsics(Builder, C, CI->getArgOperand(0), Shift); 799 } else if (Name == "llvm.x86.sse2.psrl.dq.bs" || 800 Name == "llvm.x86.avx2.psrl.dq.bs") { 801 // 128/256-bit shift right specified in bytes. 802 unsigned Shift = cast<ConstantInt>(CI->getArgOperand(1))->getZExtValue(); 803 Rep = UpgradeX86PSRLDQIntrinsics(Builder, C, CI->getArgOperand(0), Shift); 804 } else if (Name == "llvm.x86.sse41.pblendw" || 805 Name.startswith("llvm.x86.sse41.blendp") || 806 Name.startswith("llvm.x86.avx.blend.p") || 807 Name == "llvm.x86.avx2.pblendw" || 808 Name.startswith("llvm.x86.avx2.pblendd.")) { 809 Value *Op0 = CI->getArgOperand(0); 810 Value *Op1 = CI->getArgOperand(1); 811 unsigned Imm = cast <ConstantInt>(CI->getArgOperand(2))->getZExtValue(); 812 VectorType *VecTy = cast<VectorType>(CI->getType()); 813 unsigned NumElts = VecTy->getNumElements(); 814 815 SmallVector<Constant*, 16> Idxs; 816 for (unsigned i = 0; i != NumElts; ++i) { 817 unsigned Idx = ((Imm >> (i%8)) & 1) ? i + NumElts : i; 818 Idxs.push_back(Builder.getInt32(Idx)); 819 } 820 821 Rep = Builder.CreateShuffleVector(Op0, Op1, ConstantVector::get(Idxs)); 822 } else if (Name.startswith("llvm.x86.avx.vinsertf128.") || 823 Name == "llvm.x86.avx2.vinserti128") { 824 Value *Op0 = CI->getArgOperand(0); 825 Value *Op1 = CI->getArgOperand(1); 826 unsigned Imm = cast<ConstantInt>(CI->getArgOperand(2))->getZExtValue(); 827 VectorType *VecTy = cast<VectorType>(CI->getType()); 828 unsigned NumElts = VecTy->getNumElements(); 829 830 // Mask off the high bits of the immediate value; hardware ignores those. 831 Imm = Imm & 1; 832 833 // Extend the second operand into a vector that is twice as big. 834 Value *UndefV = UndefValue::get(Op1->getType()); 835 SmallVector<Constant*, 8> Idxs; 836 for (unsigned i = 0; i != NumElts; ++i) { 837 Idxs.push_back(Builder.getInt32(i)); 838 } 839 Rep = Builder.CreateShuffleVector(Op1, UndefV, ConstantVector::get(Idxs)); 840 841 // Insert the second operand into the first operand. 842 843 // Note that there is no guarantee that instruction lowering will actually 844 // produce a vinsertf128 instruction for the created shuffles. In 845 // particular, the 0 immediate case involves no lane changes, so it can 846 // be handled as a blend. 847 848 // Example of shuffle mask for 32-bit elements: 849 // Imm = 1 <i32 0, i32 1, i32 2, i32 3, i32 8, i32 9, i32 10, i32 11> 850 // Imm = 0 <i32 8, i32 9, i32 10, i32 11, i32 4, i32 5, i32 6, i32 7 > 851 852 SmallVector<Constant*, 8> Idxs2; 853 // The low half of the result is either the low half of the 1st operand 854 // or the low half of the 2nd operand (the inserted vector). 855 for (unsigned i = 0; i != NumElts / 2; ++i) { 856 unsigned Idx = Imm ? i : (i + NumElts); 857 Idxs2.push_back(Builder.getInt32(Idx)); 858 } 859 // The high half of the result is either the low half of the 2nd operand 860 // (the inserted vector) or the high half of the 1st operand. 861 for (unsigned i = NumElts / 2; i != NumElts; ++i) { 862 unsigned Idx = Imm ? (i + NumElts / 2) : i; 863 Idxs2.push_back(Builder.getInt32(Idx)); 864 } 865 Rep = Builder.CreateShuffleVector(Op0, Rep, ConstantVector::get(Idxs2)); 866 } else if (Name.startswith("llvm.x86.avx.vextractf128.") || 867 Name == "llvm.x86.avx2.vextracti128") { 868 Value *Op0 = CI->getArgOperand(0); 869 unsigned Imm = cast<ConstantInt>(CI->getArgOperand(1))->getZExtValue(); 870 VectorType *VecTy = cast<VectorType>(CI->getType()); 871 unsigned NumElts = VecTy->getNumElements(); 872 873 // Mask off the high bits of the immediate value; hardware ignores those. 874 Imm = Imm & 1; 875 876 // Get indexes for either the high half or low half of the input vector. 877 SmallVector<Constant*, 4> Idxs(NumElts); 878 for (unsigned i = 0; i != NumElts; ++i) { 879 unsigned Idx = Imm ? (i + NumElts) : i; 880 Idxs[i] = Builder.getInt32(Idx); 881 } 882 883 Value *UndefV = UndefValue::get(Op0->getType()); 884 Rep = Builder.CreateShuffleVector(Op0, UndefV, ConstantVector::get(Idxs)); 885 } else if (Name == "llvm.stackprotectorcheck") { 886 Rep = nullptr; 887 } else { 888 bool PD128 = false, PD256 = false, PS128 = false, PS256 = false; 889 if (Name == "llvm.x86.avx.vpermil.pd.256") 890 PD256 = true; 891 else if (Name == "llvm.x86.avx.vpermil.pd") 892 PD128 = true; 893 else if (Name == "llvm.x86.avx.vpermil.ps.256") 894 PS256 = true; 895 else if (Name == "llvm.x86.avx.vpermil.ps") 896 PS128 = true; 897 898 if (PD256 || PD128 || PS256 || PS128) { 899 Value *Op0 = CI->getArgOperand(0); 900 unsigned Imm = cast<ConstantInt>(CI->getArgOperand(1))->getZExtValue(); 901 SmallVector<Constant*, 8> Idxs; 902 903 if (PD128) 904 for (unsigned i = 0; i != 2; ++i) 905 Idxs.push_back(Builder.getInt32((Imm >> i) & 0x1)); 906 else if (PD256) 907 for (unsigned l = 0; l != 4; l+=2) 908 for (unsigned i = 0; i != 2; ++i) 909 Idxs.push_back(Builder.getInt32(((Imm >> (l+i)) & 0x1) + l)); 910 else if (PS128) 911 for (unsigned i = 0; i != 4; ++i) 912 Idxs.push_back(Builder.getInt32((Imm >> (2 * i)) & 0x3)); 913 else if (PS256) 914 for (unsigned l = 0; l != 8; l+=4) 915 for (unsigned i = 0; i != 4; ++i) 916 Idxs.push_back(Builder.getInt32(((Imm >> (2 * i)) & 0x3) + l)); 917 else 918 llvm_unreachable("Unexpected function"); 919 920 Rep = Builder.CreateShuffleVector(Op0, Op0, ConstantVector::get(Idxs)); 921 } else { 922 llvm_unreachable("Unknown function for CallInst upgrade."); 923 } 924 } 925 926 if (Rep) 927 CI->replaceAllUsesWith(Rep); 928 CI->eraseFromParent(); 929 return; 930 } 931 932 std::string Name = CI->getName(); 933 if (!Name.empty()) 934 CI->setName(Name + ".old"); 935 936 switch (NewFn->getIntrinsicID()) { 937 default: 938 llvm_unreachable("Unknown function for CallInst upgrade."); 939 940 case Intrinsic::arm_neon_vld1: 941 case Intrinsic::arm_neon_vld2: 942 case Intrinsic::arm_neon_vld3: 943 case Intrinsic::arm_neon_vld4: 944 case Intrinsic::arm_neon_vld2lane: 945 case Intrinsic::arm_neon_vld3lane: 946 case Intrinsic::arm_neon_vld4lane: 947 case Intrinsic::arm_neon_vst1: 948 case Intrinsic::arm_neon_vst2: 949 case Intrinsic::arm_neon_vst3: 950 case Intrinsic::arm_neon_vst4: 951 case Intrinsic::arm_neon_vst2lane: 952 case Intrinsic::arm_neon_vst3lane: 953 case Intrinsic::arm_neon_vst4lane: { 954 SmallVector<Value *, 4> Args(CI->arg_operands().begin(), 955 CI->arg_operands().end()); 956 CI->replaceAllUsesWith(Builder.CreateCall(NewFn, Args)); 957 CI->eraseFromParent(); 958 return; 959 } 960 961 case Intrinsic::ctlz: 962 case Intrinsic::cttz: 963 assert(CI->getNumArgOperands() == 1 && 964 "Mismatch between function args and call args"); 965 CI->replaceAllUsesWith(Builder.CreateCall( 966 NewFn, {CI->getArgOperand(0), Builder.getFalse()}, Name)); 967 CI->eraseFromParent(); 968 return; 969 970 case Intrinsic::objectsize: 971 CI->replaceAllUsesWith(Builder.CreateCall( 972 NewFn, {CI->getArgOperand(0), CI->getArgOperand(1)}, Name)); 973 CI->eraseFromParent(); 974 return; 975 976 case Intrinsic::ctpop: { 977 CI->replaceAllUsesWith(Builder.CreateCall(NewFn, {CI->getArgOperand(0)})); 978 CI->eraseFromParent(); 979 return; 980 } 981 982 case Intrinsic::x86_xop_vfrcz_ss: 983 case Intrinsic::x86_xop_vfrcz_sd: 984 CI->replaceAllUsesWith( 985 Builder.CreateCall(NewFn, {CI->getArgOperand(1)}, Name)); 986 CI->eraseFromParent(); 987 return; 988 989 case Intrinsic::x86_xop_vpermil2pd: 990 case Intrinsic::x86_xop_vpermil2ps: 991 case Intrinsic::x86_xop_vpermil2pd_256: 992 case Intrinsic::x86_xop_vpermil2ps_256: { 993 SmallVector<Value *, 4> Args(CI->arg_operands().begin(), 994 CI->arg_operands().end()); 995 VectorType *FltIdxTy = cast<VectorType>(Args[2]->getType()); 996 VectorType *IntIdxTy = VectorType::getInteger(FltIdxTy); 997 Args[2] = Builder.CreateBitCast(Args[2], IntIdxTy); 998 CI->replaceAllUsesWith(Builder.CreateCall(NewFn, Args, Name)); 999 CI->eraseFromParent(); 1000 return; 1001 } 1002 1003 case Intrinsic::x86_sse41_ptestc: 1004 case Intrinsic::x86_sse41_ptestz: 1005 case Intrinsic::x86_sse41_ptestnzc: { 1006 // The arguments for these intrinsics used to be v4f32, and changed 1007 // to v2i64. This is purely a nop, since those are bitwise intrinsics. 1008 // So, the only thing required is a bitcast for both arguments. 1009 // First, check the arguments have the old type. 1010 Value *Arg0 = CI->getArgOperand(0); 1011 if (Arg0->getType() != VectorType::get(Type::getFloatTy(C), 4)) 1012 return; 1013 1014 // Old intrinsic, add bitcasts 1015 Value *Arg1 = CI->getArgOperand(1); 1016 1017 Type *NewVecTy = VectorType::get(Type::getInt64Ty(C), 2); 1018 1019 Value *BC0 = Builder.CreateBitCast(Arg0, NewVecTy, "cast"); 1020 Value *BC1 = Builder.CreateBitCast(Arg1, NewVecTy, "cast"); 1021 1022 CallInst *NewCall = Builder.CreateCall(NewFn, {BC0, BC1}, Name); 1023 CI->replaceAllUsesWith(NewCall); 1024 CI->eraseFromParent(); 1025 return; 1026 } 1027 1028 case Intrinsic::x86_sse41_insertps: 1029 case Intrinsic::x86_sse41_dppd: 1030 case Intrinsic::x86_sse41_dpps: 1031 case Intrinsic::x86_sse41_mpsadbw: 1032 case Intrinsic::x86_avx_dp_ps_256: 1033 case Intrinsic::x86_avx2_mpsadbw: { 1034 // Need to truncate the last argument from i32 to i8 -- this argument models 1035 // an inherently 8-bit immediate operand to these x86 instructions. 1036 SmallVector<Value *, 4> Args(CI->arg_operands().begin(), 1037 CI->arg_operands().end()); 1038 1039 // Replace the last argument with a trunc. 1040 Args.back() = Builder.CreateTrunc(Args.back(), Type::getInt8Ty(C), "trunc"); 1041 1042 CallInst *NewCall = Builder.CreateCall(NewFn, Args); 1043 CI->replaceAllUsesWith(NewCall); 1044 CI->eraseFromParent(); 1045 return; 1046 } 1047 1048 case Intrinsic::thread_pointer: { 1049 CI->replaceAllUsesWith(Builder.CreateCall(NewFn, {})); 1050 CI->eraseFromParent(); 1051 return; 1052 } 1053 } 1054 } 1055 1056 void llvm::UpgradeCallsToIntrinsic(Function *F) { 1057 assert(F && "Illegal attempt to upgrade a non-existent intrinsic."); 1058 1059 // Check if this function should be upgraded and get the replacement function 1060 // if there is one. 1061 Function *NewFn; 1062 if (UpgradeIntrinsicFunction(F, NewFn)) { 1063 // Replace all users of the old function with the new function or new 1064 // instructions. This is not a range loop because the call is deleted. 1065 for (auto UI = F->user_begin(), UE = F->user_end(); UI != UE; ) 1066 if (CallInst *CI = dyn_cast<CallInst>(*UI++)) 1067 UpgradeIntrinsicCall(CI, NewFn); 1068 1069 // Remove old function, no longer used, from the module. 1070 F->eraseFromParent(); 1071 } 1072 } 1073 1074 void llvm::UpgradeInstWithTBAATag(Instruction *I) { 1075 MDNode *MD = I->getMetadata(LLVMContext::MD_tbaa); 1076 assert(MD && "UpgradeInstWithTBAATag should have a TBAA tag"); 1077 // Check if the tag uses struct-path aware TBAA format. 1078 if (isa<MDNode>(MD->getOperand(0)) && MD->getNumOperands() >= 3) 1079 return; 1080 1081 if (MD->getNumOperands() == 3) { 1082 Metadata *Elts[] = {MD->getOperand(0), MD->getOperand(1)}; 1083 MDNode *ScalarType = MDNode::get(I->getContext(), Elts); 1084 // Create a MDNode <ScalarType, ScalarType, offset 0, const> 1085 Metadata *Elts2[] = {ScalarType, ScalarType, 1086 ConstantAsMetadata::get(Constant::getNullValue( 1087 Type::getInt64Ty(I->getContext()))), 1088 MD->getOperand(2)}; 1089 I->setMetadata(LLVMContext::MD_tbaa, MDNode::get(I->getContext(), Elts2)); 1090 } else { 1091 // Create a MDNode <MD, MD, offset 0> 1092 Metadata *Elts[] = {MD, MD, ConstantAsMetadata::get(Constant::getNullValue( 1093 Type::getInt64Ty(I->getContext())))}; 1094 I->setMetadata(LLVMContext::MD_tbaa, MDNode::get(I->getContext(), Elts)); 1095 } 1096 } 1097 1098 Instruction *llvm::UpgradeBitCastInst(unsigned Opc, Value *V, Type *DestTy, 1099 Instruction *&Temp) { 1100 if (Opc != Instruction::BitCast) 1101 return nullptr; 1102 1103 Temp = nullptr; 1104 Type *SrcTy = V->getType(); 1105 if (SrcTy->isPtrOrPtrVectorTy() && DestTy->isPtrOrPtrVectorTy() && 1106 SrcTy->getPointerAddressSpace() != DestTy->getPointerAddressSpace()) { 1107 LLVMContext &Context = V->getContext(); 1108 1109 // We have no information about target data layout, so we assume that 1110 // the maximum pointer size is 64bit. 1111 Type *MidTy = Type::getInt64Ty(Context); 1112 Temp = CastInst::Create(Instruction::PtrToInt, V, MidTy); 1113 1114 return CastInst::Create(Instruction::IntToPtr, Temp, DestTy); 1115 } 1116 1117 return nullptr; 1118 } 1119 1120 Value *llvm::UpgradeBitCastExpr(unsigned Opc, Constant *C, Type *DestTy) { 1121 if (Opc != Instruction::BitCast) 1122 return nullptr; 1123 1124 Type *SrcTy = C->getType(); 1125 if (SrcTy->isPtrOrPtrVectorTy() && DestTy->isPtrOrPtrVectorTy() && 1126 SrcTy->getPointerAddressSpace() != DestTy->getPointerAddressSpace()) { 1127 LLVMContext &Context = C->getContext(); 1128 1129 // We have no information about target data layout, so we assume that 1130 // the maximum pointer size is 64bit. 1131 Type *MidTy = Type::getInt64Ty(Context); 1132 1133 return ConstantExpr::getIntToPtr(ConstantExpr::getPtrToInt(C, MidTy), 1134 DestTy); 1135 } 1136 1137 return nullptr; 1138 } 1139 1140 /// Check the debug info version number, if it is out-dated, drop the debug 1141 /// info. Return true if module is modified. 1142 bool llvm::UpgradeDebugInfo(Module &M) { 1143 unsigned Version = getDebugMetadataVersionFromModule(M); 1144 if (Version == DEBUG_METADATA_VERSION) 1145 return false; 1146 1147 bool RetCode = StripDebugInfo(M); 1148 if (RetCode) { 1149 DiagnosticInfoDebugMetadataVersion DiagVersion(M, Version); 1150 M.getContext().diagnose(DiagVersion); 1151 } 1152 return RetCode; 1153 } 1154 1155 bool llvm::UpgradeModuleFlags(Module &M) { 1156 const NamedMDNode *ModFlags = M.getModuleFlagsMetadata(); 1157 if (!ModFlags) 1158 return false; 1159 1160 bool HasObjCFlag = false, HasClassProperties = false; 1161 for (unsigned I = 0, E = ModFlags->getNumOperands(); I != E; ++I) { 1162 MDNode *Op = ModFlags->getOperand(I); 1163 if (Op->getNumOperands() < 2) 1164 continue; 1165 MDString *ID = dyn_cast_or_null<MDString>(Op->getOperand(1)); 1166 if (!ID) 1167 continue; 1168 if (ID->getString() == "Objective-C Image Info Version") 1169 HasObjCFlag = true; 1170 if (ID->getString() == "Objective-C Class Properties") 1171 HasClassProperties = true; 1172 } 1173 // "Objective-C Class Properties" is recently added for Objective-C. We 1174 // upgrade ObjC bitcodes to contain a "Objective-C Class Properties" module 1175 // flag of value 0, so we can correclty report error when trying to link 1176 // an ObjC bitcode without this module flag with an ObjC bitcode with this 1177 // module flag. 1178 if (HasObjCFlag && !HasClassProperties) { 1179 M.addModuleFlag(llvm::Module::Error, "Objective-C Class Properties", 1180 (uint32_t)0); 1181 return true; 1182 } 1183 return false; 1184 } 1185 1186 static bool isOldLoopArgument(Metadata *MD) { 1187 auto *T = dyn_cast_or_null<MDTuple>(MD); 1188 if (!T) 1189 return false; 1190 if (T->getNumOperands() < 1) 1191 return false; 1192 auto *S = dyn_cast_or_null<MDString>(T->getOperand(0)); 1193 if (!S) 1194 return false; 1195 return S->getString().startswith("llvm.vectorizer."); 1196 } 1197 1198 static MDString *upgradeLoopTag(LLVMContext &C, StringRef OldTag) { 1199 StringRef OldPrefix = "llvm.vectorizer."; 1200 assert(OldTag.startswith(OldPrefix) && "Expected old prefix"); 1201 1202 if (OldTag == "llvm.vectorizer.unroll") 1203 return MDString::get(C, "llvm.loop.interleave.count"); 1204 1205 return MDString::get( 1206 C, (Twine("llvm.loop.vectorize.") + OldTag.drop_front(OldPrefix.size())) 1207 .str()); 1208 } 1209 1210 static Metadata *upgradeLoopArgument(Metadata *MD) { 1211 auto *T = dyn_cast_or_null<MDTuple>(MD); 1212 if (!T) 1213 return MD; 1214 if (T->getNumOperands() < 1) 1215 return MD; 1216 auto *OldTag = dyn_cast_or_null<MDString>(T->getOperand(0)); 1217 if (!OldTag) 1218 return MD; 1219 if (!OldTag->getString().startswith("llvm.vectorizer.")) 1220 return MD; 1221 1222 // This has an old tag. Upgrade it. 1223 SmallVector<Metadata *, 8> Ops; 1224 Ops.reserve(T->getNumOperands()); 1225 Ops.push_back(upgradeLoopTag(T->getContext(), OldTag->getString())); 1226 for (unsigned I = 1, E = T->getNumOperands(); I != E; ++I) 1227 Ops.push_back(T->getOperand(I)); 1228 1229 return MDTuple::get(T->getContext(), Ops); 1230 } 1231 1232 MDNode *llvm::upgradeInstructionLoopAttachment(MDNode &N) { 1233 auto *T = dyn_cast<MDTuple>(&N); 1234 if (!T) 1235 return &N; 1236 1237 if (!llvm::any_of(T->operands(), isOldLoopArgument)) 1238 return &N; 1239 1240 SmallVector<Metadata *, 8> Ops; 1241 Ops.reserve(T->getNumOperands()); 1242 for (Metadata *MD : T->operands()) 1243 Ops.push_back(upgradeLoopArgument(MD)); 1244 1245 return MDTuple::get(T->getContext(), Ops); 1246 } 1247