1 //===-- AutoUpgrade.cpp - Implement auto-upgrade helper functions ---------===// 2 // 3 // The LLVM Compiler Infrastructure 4 // 5 // This file is distributed under the University of Illinois Open Source 6 // License. See LICENSE.TXT for details. 7 // 8 //===----------------------------------------------------------------------===// 9 // 10 // This file implements the auto-upgrade helper functions. 11 // This is where deprecated IR intrinsics and other IR features are updated to 12 // current specifications. 13 // 14 //===----------------------------------------------------------------------===// 15 16 #include "llvm/IR/AutoUpgrade.h" 17 #include "llvm/IR/CFG.h" 18 #include "llvm/IR/CallSite.h" 19 #include "llvm/IR/Constants.h" 20 #include "llvm/IR/DIBuilder.h" 21 #include "llvm/IR/DebugInfo.h" 22 #include "llvm/IR/DiagnosticInfo.h" 23 #include "llvm/IR/Function.h" 24 #include "llvm/IR/IRBuilder.h" 25 #include "llvm/IR/Instruction.h" 26 #include "llvm/IR/IntrinsicInst.h" 27 #include "llvm/IR/LLVMContext.h" 28 #include "llvm/IR/Module.h" 29 #include "llvm/Support/ErrorHandling.h" 30 #include "llvm/Support/Regex.h" 31 #include <cstring> 32 using namespace llvm; 33 34 // Upgrade the declarations of the SSE4.1 functions whose arguments have 35 // changed their type from v4f32 to v2i64. 36 static bool UpgradeSSE41Function(Function* F, Intrinsic::ID IID, 37 Function *&NewFn) { 38 // Check whether this is an old version of the function, which received 39 // v4f32 arguments. 40 Type *Arg0Type = F->getFunctionType()->getParamType(0); 41 if (Arg0Type != VectorType::get(Type::getFloatTy(F->getContext()), 4)) 42 return false; 43 44 // Yes, it's old, replace it with new version. 45 F->setName(F->getName() + ".old"); 46 NewFn = Intrinsic::getDeclaration(F->getParent(), IID); 47 return true; 48 } 49 50 // Upgrade the declarations of intrinsic functions whose 8-bit immediate mask 51 // arguments have changed their type from i32 to i8. 52 static bool UpgradeX86IntrinsicsWith8BitMask(Function *F, Intrinsic::ID IID, 53 Function *&NewFn) { 54 // Check that the last argument is an i32. 55 Type *LastArgType = F->getFunctionType()->getParamType( 56 F->getFunctionType()->getNumParams() - 1); 57 if (!LastArgType->isIntegerTy(32)) 58 return false; 59 60 // Move this function aside and map down. 61 F->setName(F->getName() + ".old"); 62 NewFn = Intrinsic::getDeclaration(F->getParent(), IID); 63 return true; 64 } 65 66 static bool UpgradeIntrinsicFunction1(Function *F, Function *&NewFn) { 67 assert(F && "Illegal to upgrade a non-existent Function."); 68 69 // Quickly eliminate it, if it's not a candidate. 70 StringRef Name = F->getName(); 71 if (Name.size() <= 8 || !Name.startswith("llvm.")) 72 return false; 73 Name = Name.substr(5); // Strip off "llvm." 74 75 switch (Name[0]) { 76 default: break; 77 case 'a': { 78 if (Name.startswith("arm.neon.vclz")) { 79 Type* args[2] = { 80 F->arg_begin()->getType(), 81 Type::getInt1Ty(F->getContext()) 82 }; 83 // Can't use Intrinsic::getDeclaration here as it adds a ".i1" to 84 // the end of the name. Change name from llvm.arm.neon.vclz.* to 85 // llvm.ctlz.* 86 FunctionType* fType = FunctionType::get(F->getReturnType(), args, false); 87 NewFn = Function::Create(fType, F->getLinkage(), 88 "llvm.ctlz." + Name.substr(14), F->getParent()); 89 return true; 90 } 91 if (Name.startswith("arm.neon.vcnt")) { 92 NewFn = Intrinsic::getDeclaration(F->getParent(), Intrinsic::ctpop, 93 F->arg_begin()->getType()); 94 return true; 95 } 96 Regex vldRegex("^arm\\.neon\\.vld([1234]|[234]lane)\\.v[a-z0-9]*$"); 97 if (vldRegex.match(Name)) { 98 auto fArgs = F->getFunctionType()->params(); 99 SmallVector<Type *, 4> Tys(fArgs.begin(), fArgs.end()); 100 // Can't use Intrinsic::getDeclaration here as the return types might 101 // then only be structurally equal. 102 FunctionType* fType = FunctionType::get(F->getReturnType(), Tys, false); 103 NewFn = Function::Create(fType, F->getLinkage(), 104 "llvm." + Name + ".p0i8", F->getParent()); 105 return true; 106 } 107 Regex vstRegex("^arm\\.neon\\.vst([1234]|[234]lane)\\.v[a-z0-9]*$"); 108 if (vstRegex.match(Name)) { 109 static const Intrinsic::ID StoreInts[] = {Intrinsic::arm_neon_vst1, 110 Intrinsic::arm_neon_vst2, 111 Intrinsic::arm_neon_vst3, 112 Intrinsic::arm_neon_vst4}; 113 114 static const Intrinsic::ID StoreLaneInts[] = { 115 Intrinsic::arm_neon_vst2lane, Intrinsic::arm_neon_vst3lane, 116 Intrinsic::arm_neon_vst4lane 117 }; 118 119 auto fArgs = F->getFunctionType()->params(); 120 Type *Tys[] = {fArgs[0], fArgs[1]}; 121 if (Name.find("lane") == StringRef::npos) 122 NewFn = Intrinsic::getDeclaration(F->getParent(), 123 StoreInts[fArgs.size() - 3], Tys); 124 else 125 NewFn = Intrinsic::getDeclaration(F->getParent(), 126 StoreLaneInts[fArgs.size() - 5], Tys); 127 return true; 128 } 129 if (Name == "aarch64.thread.pointer" || Name == "arm.thread.pointer") { 130 NewFn = Intrinsic::getDeclaration(F->getParent(), Intrinsic::thread_pointer); 131 return true; 132 } 133 break; 134 } 135 136 case 'c': { 137 if (Name.startswith("ctlz.") && F->arg_size() == 1) { 138 F->setName(Name + ".old"); 139 NewFn = Intrinsic::getDeclaration(F->getParent(), Intrinsic::ctlz, 140 F->arg_begin()->getType()); 141 return true; 142 } 143 if (Name.startswith("cttz.") && F->arg_size() == 1) { 144 F->setName(Name + ".old"); 145 NewFn = Intrinsic::getDeclaration(F->getParent(), Intrinsic::cttz, 146 F->arg_begin()->getType()); 147 return true; 148 } 149 break; 150 } 151 152 case 'o': 153 // We only need to change the name to match the mangling including the 154 // address space. 155 if (F->arg_size() == 2 && Name.startswith("objectsize.")) { 156 Type *Tys[2] = { F->getReturnType(), F->arg_begin()->getType() }; 157 if (F->getName() != Intrinsic::getName(Intrinsic::objectsize, Tys)) { 158 F->setName(Name + ".old"); 159 NewFn = Intrinsic::getDeclaration(F->getParent(), 160 Intrinsic::objectsize, Tys); 161 return true; 162 } 163 } 164 break; 165 166 case 's': 167 if (Name == "stackprotectorcheck") { 168 NewFn = nullptr; 169 return true; 170 } 171 172 case 'x': { 173 if (Name.startswith("x86.sse2.pcmpeq.") || 174 Name.startswith("x86.sse2.pcmpgt.") || 175 Name.startswith("x86.avx2.pcmpeq.") || 176 Name.startswith("x86.avx2.pcmpgt.") || 177 Name.startswith("x86.avx512.mask.pcmpeq.") || 178 Name.startswith("x86.avx512.mask.pcmpgt.") || 179 Name == "x86.sse41.pmaxsb" || 180 Name == "x86.sse2.pmaxs.w" || 181 Name == "x86.sse41.pmaxsd" || 182 Name == "x86.sse2.pmaxu.b" || 183 Name == "x86.sse41.pmaxuw" || 184 Name == "x86.sse41.pmaxud" || 185 Name == "x86.sse41.pminsb" || 186 Name == "x86.sse2.pmins.w" || 187 Name == "x86.sse41.pminsd" || 188 Name == "x86.sse2.pminu.b" || 189 Name == "x86.sse41.pminuw" || 190 Name == "x86.sse41.pminud" || 191 Name.startswith("x86.avx2.pmax") || 192 Name.startswith("x86.avx2.pmin") || 193 Name.startswith("x86.avx2.vbroadcast") || 194 Name.startswith("x86.avx2.pbroadcast") || 195 Name.startswith("x86.avx.vpermil.") || 196 Name.startswith("x86.sse2.pshuf") || 197 Name.startswith("x86.avx512.mask.pshuf.d.") || 198 Name.startswith("x86.avx512.mask.pshufl.w.") || 199 Name.startswith("x86.avx512.mask.pshufh.w.") || 200 Name.startswith("x86.sse41.pmovsx") || 201 Name.startswith("x86.sse41.pmovzx") || 202 Name.startswith("x86.avx2.pmovsx") || 203 Name.startswith("x86.avx2.pmovzx") || 204 Name == "x86.sse2.cvtdq2pd" || 205 Name == "x86.sse2.cvtps2pd" || 206 Name == "x86.avx.cvtdq2.pd.256" || 207 Name == "x86.avx.cvt.ps2.pd.256" || 208 Name == "x86.sse2.cvttps2dq" || 209 Name.startswith("x86.avx.cvtt.") || 210 Name.startswith("x86.avx.vinsertf128.") || 211 Name == "x86.avx2.vinserti128" || 212 Name.startswith("x86.avx.vextractf128.") || 213 Name == "x86.avx2.vextracti128" || 214 Name.startswith("x86.sse4a.movnt.") || 215 Name.startswith("x86.avx.movnt.") || 216 Name == "x86.sse2.storel.dq" || 217 Name.startswith("x86.sse.storeu.") || 218 Name.startswith("x86.sse2.storeu.") || 219 Name.startswith("x86.avx.storeu.") || 220 Name.startswith("x86.avx512.mask.storeu.p") || 221 Name.startswith("x86.avx512.mask.storeu.b.") || 222 Name.startswith("x86.avx512.mask.storeu.w.") || 223 Name.startswith("x86.avx512.mask.storeu.d.") || 224 Name.startswith("x86.avx512.mask.storeu.q.") || 225 Name.startswith("x86.avx512.mask.store.p") || 226 Name.startswith("x86.avx512.mask.store.b.") || 227 Name.startswith("x86.avx512.mask.store.w.") || 228 Name.startswith("x86.avx512.mask.store.d.") || 229 Name.startswith("x86.avx512.mask.store.q.") || 230 Name.startswith("x86.avx512.mask.loadu.p") || 231 Name.startswith("x86.avx512.mask.loadu.b.") || 232 Name.startswith("x86.avx512.mask.loadu.w.") || 233 Name.startswith("x86.avx512.mask.loadu.d.") || 234 Name.startswith("x86.avx512.mask.loadu.q.") || 235 Name.startswith("x86.avx512.mask.load.p") || 236 Name.startswith("x86.avx512.mask.load.b.") || 237 Name.startswith("x86.avx512.mask.load.w.") || 238 Name.startswith("x86.avx512.mask.load.d.") || 239 Name.startswith("x86.avx512.mask.load.q.") || 240 Name == "x86.sse42.crc32.64.8" || 241 Name.startswith("x86.avx.vbroadcast.s") || 242 Name.startswith("x86.avx512.mask.palignr.") || 243 Name.startswith("x86.sse2.psll.dq") || 244 Name.startswith("x86.sse2.psrl.dq") || 245 Name.startswith("x86.avx2.psll.dq") || 246 Name.startswith("x86.avx2.psrl.dq") || 247 Name.startswith("x86.avx512.psll.dq") || 248 Name.startswith("x86.avx512.psrl.dq") || 249 Name == "x86.sse41.pblendw" || 250 Name.startswith("x86.sse41.blendp") || 251 Name.startswith("x86.avx.blend.p") || 252 Name == "x86.avx2.pblendw" || 253 Name.startswith("x86.avx2.pblendd.") || 254 Name == "x86.avx2.vbroadcasti128" || 255 Name == "x86.xop.vpcmov" || 256 (Name.startswith("x86.xop.vpcom") && F->arg_size() == 2)) { 257 NewFn = nullptr; 258 return true; 259 } 260 // SSE4.1 ptest functions may have an old signature. 261 if (Name.startswith("x86.sse41.ptest")) { 262 if (Name == "x86.sse41.ptestc") 263 return UpgradeSSE41Function(F, Intrinsic::x86_sse41_ptestc, NewFn); 264 if (Name == "x86.sse41.ptestz") 265 return UpgradeSSE41Function(F, Intrinsic::x86_sse41_ptestz, NewFn); 266 if (Name == "x86.sse41.ptestnzc") 267 return UpgradeSSE41Function(F, Intrinsic::x86_sse41_ptestnzc, NewFn); 268 } 269 // Several blend and other instructions with masks used the wrong number of 270 // bits. 271 if (Name == "x86.sse41.insertps") 272 return UpgradeX86IntrinsicsWith8BitMask(F, Intrinsic::x86_sse41_insertps, 273 NewFn); 274 if (Name == "x86.sse41.dppd") 275 return UpgradeX86IntrinsicsWith8BitMask(F, Intrinsic::x86_sse41_dppd, 276 NewFn); 277 if (Name == "x86.sse41.dpps") 278 return UpgradeX86IntrinsicsWith8BitMask(F, Intrinsic::x86_sse41_dpps, 279 NewFn); 280 if (Name == "x86.sse41.mpsadbw") 281 return UpgradeX86IntrinsicsWith8BitMask(F, Intrinsic::x86_sse41_mpsadbw, 282 NewFn); 283 if (Name == "x86.avx.dp.ps.256") 284 return UpgradeX86IntrinsicsWith8BitMask(F, Intrinsic::x86_avx_dp_ps_256, 285 NewFn); 286 if (Name == "x86.avx2.mpsadbw") 287 return UpgradeX86IntrinsicsWith8BitMask(F, Intrinsic::x86_avx2_mpsadbw, 288 NewFn); 289 290 // frcz.ss/sd may need to have an argument dropped 291 if (Name.startswith("x86.xop.vfrcz.ss") && F->arg_size() == 2) { 292 F->setName(Name + ".old"); 293 NewFn = Intrinsic::getDeclaration(F->getParent(), 294 Intrinsic::x86_xop_vfrcz_ss); 295 return true; 296 } 297 if (Name.startswith("x86.xop.vfrcz.sd") && F->arg_size() == 2) { 298 F->setName(Name + ".old"); 299 NewFn = Intrinsic::getDeclaration(F->getParent(), 300 Intrinsic::x86_xop_vfrcz_sd); 301 return true; 302 } 303 // Fix the FMA4 intrinsics to remove the 4 304 if (Name.startswith("x86.fma4.")) { 305 F->setName("llvm.x86.fma" + Name.substr(8)); 306 NewFn = F; 307 return true; 308 } 309 // Upgrade any XOP PERMIL2 index operand still using a float/double vector. 310 if (Name.startswith("x86.xop.vpermil2")) { 311 auto Params = F->getFunctionType()->params(); 312 auto Idx = Params[2]; 313 if (Idx->getScalarType()->isFloatingPointTy()) { 314 F->setName(Name + ".old"); 315 unsigned IdxSize = Idx->getPrimitiveSizeInBits(); 316 unsigned EltSize = Idx->getScalarSizeInBits(); 317 Intrinsic::ID Permil2ID; 318 if (EltSize == 64 && IdxSize == 128) 319 Permil2ID = Intrinsic::x86_xop_vpermil2pd; 320 else if (EltSize == 32 && IdxSize == 128) 321 Permil2ID = Intrinsic::x86_xop_vpermil2ps; 322 else if (EltSize == 64 && IdxSize == 256) 323 Permil2ID = Intrinsic::x86_xop_vpermil2pd_256; 324 else 325 Permil2ID = Intrinsic::x86_xop_vpermil2ps_256; 326 NewFn = Intrinsic::getDeclaration(F->getParent(), Permil2ID); 327 return true; 328 } 329 } 330 break; 331 } 332 } 333 334 // This may not belong here. This function is effectively being overloaded 335 // to both detect an intrinsic which needs upgrading, and to provide the 336 // upgraded form of the intrinsic. We should perhaps have two separate 337 // functions for this. 338 return false; 339 } 340 341 bool llvm::UpgradeIntrinsicFunction(Function *F, Function *&NewFn) { 342 NewFn = nullptr; 343 bool Upgraded = UpgradeIntrinsicFunction1(F, NewFn); 344 assert(F != NewFn && "Intrinsic function upgraded to the same function"); 345 346 // Upgrade intrinsic attributes. This does not change the function. 347 if (NewFn) 348 F = NewFn; 349 if (Intrinsic::ID id = F->getIntrinsicID()) 350 F->setAttributes(Intrinsic::getAttributes(F->getContext(), id)); 351 return Upgraded; 352 } 353 354 bool llvm::UpgradeGlobalVariable(GlobalVariable *GV) { 355 // Nothing to do yet. 356 return false; 357 } 358 359 // Handles upgrading SSE2/AVX2/AVX512BW PSLLDQ intrinsics by converting them 360 // to byte shuffles. 361 static Value *UpgradeX86PSLLDQIntrinsics(IRBuilder<> &Builder, LLVMContext &C, 362 Value *Op, unsigned Shift) { 363 Type *ResultTy = Op->getType(); 364 unsigned NumElts = ResultTy->getVectorNumElements() * 8; 365 366 // Bitcast from a 64-bit element type to a byte element type. 367 Type *VecTy = VectorType::get(Type::getInt8Ty(C), NumElts); 368 Op = Builder.CreateBitCast(Op, VecTy, "cast"); 369 370 // We'll be shuffling in zeroes. 371 Value *Res = Constant::getNullValue(VecTy); 372 373 // If shift is less than 16, emit a shuffle to move the bytes. Otherwise, 374 // we'll just return the zero vector. 375 if (Shift < 16) { 376 uint32_t Idxs[64]; 377 // 256/512-bit version is split into 2/4 16-byte lanes. 378 for (unsigned l = 0; l != NumElts; l += 16) 379 for (unsigned i = 0; i != 16; ++i) { 380 unsigned Idx = NumElts + i - Shift; 381 if (Idx < NumElts) 382 Idx -= NumElts - 16; // end of lane, switch operand. 383 Idxs[l + i] = Idx + l; 384 } 385 386 Res = Builder.CreateShuffleVector(Res, Op, makeArrayRef(Idxs, NumElts)); 387 } 388 389 // Bitcast back to a 64-bit element type. 390 return Builder.CreateBitCast(Res, ResultTy, "cast"); 391 } 392 393 // Handles upgrading SSE2/AVX2/AVX512BW PSRLDQ intrinsics by converting them 394 // to byte shuffles. 395 static Value *UpgradeX86PSRLDQIntrinsics(IRBuilder<> &Builder, LLVMContext &C, 396 Value *Op, 397 unsigned Shift) { 398 Type *ResultTy = Op->getType(); 399 unsigned NumElts = ResultTy->getVectorNumElements() * 8; 400 401 // Bitcast from a 64-bit element type to a byte element type. 402 Type *VecTy = VectorType::get(Type::getInt8Ty(C), NumElts); 403 Op = Builder.CreateBitCast(Op, VecTy, "cast"); 404 405 // We'll be shuffling in zeroes. 406 Value *Res = Constant::getNullValue(VecTy); 407 408 // If shift is less than 16, emit a shuffle to move the bytes. Otherwise, 409 // we'll just return the zero vector. 410 if (Shift < 16) { 411 uint32_t Idxs[64]; 412 // 256/512-bit version is split into 2/4 16-byte lanes. 413 for (unsigned l = 0; l != NumElts; l += 16) 414 for (unsigned i = 0; i != 16; ++i) { 415 unsigned Idx = i + Shift; 416 if (Idx >= 16) 417 Idx += NumElts - 16; // end of lane, switch operand. 418 Idxs[l + i] = Idx + l; 419 } 420 421 Res = Builder.CreateShuffleVector(Op, Res, makeArrayRef(Idxs, NumElts)); 422 } 423 424 // Bitcast back to a 64-bit element type. 425 return Builder.CreateBitCast(Res, ResultTy, "cast"); 426 } 427 428 static Value *getX86MaskVec(IRBuilder<> &Builder, Value *Mask, 429 unsigned NumElts) { 430 llvm::VectorType *MaskTy = llvm::VectorType::get(Builder.getInt1Ty(), 431 cast<IntegerType>(Mask->getType())->getBitWidth()); 432 Mask = Builder.CreateBitCast(Mask, MaskTy); 433 434 // If we have less than 8 elements, then the starting mask was an i8 and 435 // we need to extract down to the right number of elements. 436 if (NumElts < 8) { 437 uint32_t Indices[4]; 438 for (unsigned i = 0; i != NumElts; ++i) 439 Indices[i] = i; 440 Mask = Builder.CreateShuffleVector(Mask, Mask, 441 makeArrayRef(Indices, NumElts), 442 "extract"); 443 } 444 445 return Mask; 446 } 447 448 static Value *EmitX86Select(IRBuilder<> &Builder, Value *Mask, 449 Value *Op0, Value *Op1) { 450 // If the mask is all ones just emit the align operation. 451 if (const auto *C = dyn_cast<Constant>(Mask)) 452 if (C->isAllOnesValue()) 453 return Op0; 454 455 Mask = getX86MaskVec(Builder, Mask, Op0->getType()->getVectorNumElements()); 456 return Builder.CreateSelect(Mask, Op0, Op1); 457 } 458 459 static Value *UpgradeX86PALIGNRIntrinsics(IRBuilder<> &Builder, LLVMContext &C, 460 Value *Op0, Value *Op1, Value *Shift, 461 Value *Passthru, Value *Mask) { 462 unsigned ShiftVal = cast<llvm::ConstantInt>(Shift)->getZExtValue(); 463 464 unsigned NumElts = Op0->getType()->getVectorNumElements(); 465 assert(NumElts % 16 == 0); 466 467 // If palignr is shifting the pair of vectors more than the size of two 468 // lanes, emit zero. 469 if (ShiftVal >= 32) 470 return llvm::Constant::getNullValue(Op0->getType()); 471 472 // If palignr is shifting the pair of input vectors more than one lane, 473 // but less than two lanes, convert to shifting in zeroes. 474 if (ShiftVal > 16) { 475 ShiftVal -= 16; 476 Op1 = Op0; 477 Op0 = llvm::Constant::getNullValue(Op0->getType()); 478 } 479 480 uint32_t Indices[64]; 481 // 256-bit palignr operates on 128-bit lanes so we need to handle that 482 for (unsigned l = 0; l != NumElts; l += 16) { 483 for (unsigned i = 0; i != 16; ++i) { 484 unsigned Idx = ShiftVal + i; 485 if (Idx >= 16) 486 Idx += NumElts - 16; // End of lane, switch operand. 487 Indices[l + i] = Idx + l; 488 } 489 } 490 491 Value *Align = Builder.CreateShuffleVector(Op1, Op0, 492 makeArrayRef(Indices, NumElts), 493 "palignr"); 494 495 return EmitX86Select(Builder, Mask, Align, Passthru); 496 } 497 498 static Value *UpgradeMaskedStore(IRBuilder<> &Builder, LLVMContext &C, 499 Value *Ptr, Value *Data, Value *Mask, 500 bool Aligned) { 501 // Cast the pointer to the right type. 502 Ptr = Builder.CreateBitCast(Ptr, 503 llvm::PointerType::getUnqual(Data->getType())); 504 unsigned Align = 505 Aligned ? cast<VectorType>(Data->getType())->getBitWidth() / 8 : 1; 506 507 // If the mask is all ones just emit a regular store. 508 if (const auto *C = dyn_cast<Constant>(Mask)) 509 if (C->isAllOnesValue()) 510 return Builder.CreateAlignedStore(Data, Ptr, Align); 511 512 // Convert the mask from an integer type to a vector of i1. 513 unsigned NumElts = Data->getType()->getVectorNumElements(); 514 Mask = getX86MaskVec(Builder, Mask, NumElts); 515 return Builder.CreateMaskedStore(Data, Ptr, Align, Mask); 516 } 517 518 static Value *UpgradeMaskedLoad(IRBuilder<> &Builder, LLVMContext &C, 519 Value *Ptr, Value *Passthru, Value *Mask, 520 bool Aligned) { 521 // Cast the pointer to the right type. 522 Ptr = Builder.CreateBitCast(Ptr, 523 llvm::PointerType::getUnqual(Passthru->getType())); 524 unsigned Align = 525 Aligned ? cast<VectorType>(Passthru->getType())->getBitWidth() / 8 : 1; 526 527 // If the mask is all ones just emit a regular store. 528 if (const auto *C = dyn_cast<Constant>(Mask)) 529 if (C->isAllOnesValue()) 530 return Builder.CreateAlignedLoad(Ptr, Align); 531 532 // Convert the mask from an integer type to a vector of i1. 533 unsigned NumElts = Passthru->getType()->getVectorNumElements(); 534 Mask = getX86MaskVec(Builder, Mask, NumElts); 535 return Builder.CreateMaskedLoad(Ptr, Align, Mask, Passthru); 536 } 537 538 static Value *upgradeIntMinMax(IRBuilder<> &Builder, CallInst &CI, 539 ICmpInst::Predicate Pred) { 540 Value *Op0 = CI.getArgOperand(0); 541 Value *Op1 = CI.getArgOperand(1); 542 Value *Cmp = Builder.CreateICmp(Pred, Op0, Op1); 543 return Builder.CreateSelect(Cmp, Op0, Op1); 544 } 545 546 static Value *upgradeMaskedCompare(IRBuilder<> &Builder, CallInst &CI, 547 ICmpInst::Predicate Pred) { 548 Value *Op0 = CI.getArgOperand(0); 549 unsigned NumElts = Op0->getType()->getVectorNumElements(); 550 Value *Cmp = Builder.CreateICmp(Pred, Op0, CI.getArgOperand(1)); 551 552 Value *Mask = CI.getArgOperand(2); 553 const auto *C = dyn_cast<Constant>(Mask); 554 if (!C || !C->isAllOnesValue()) 555 Cmp = Builder.CreateAnd(Cmp, getX86MaskVec(Builder, Mask, NumElts)); 556 557 if (NumElts < 8) { 558 uint32_t Indices[8]; 559 for (unsigned i = 0; i != NumElts; ++i) 560 Indices[i] = i; 561 for (unsigned i = NumElts; i != 8; ++i) 562 Indices[i] = NumElts; 563 Cmp = Builder.CreateShuffleVector(Cmp, UndefValue::get(Cmp->getType()), 564 Indices); 565 } 566 return Builder.CreateBitCast(Cmp, IntegerType::get(CI.getContext(), 567 std::max(NumElts, 8U))); 568 } 569 570 /// Upgrade a call to an old intrinsic. All argument and return casting must be 571 /// provided to seamlessly integrate with existing context. 572 void llvm::UpgradeIntrinsicCall(CallInst *CI, Function *NewFn) { 573 Function *F = CI->getCalledFunction(); 574 LLVMContext &C = CI->getContext(); 575 IRBuilder<> Builder(C); 576 Builder.SetInsertPoint(CI->getParent(), CI->getIterator()); 577 578 assert(F && "Intrinsic call is not direct?"); 579 580 if (!NewFn) { 581 // Get the Function's name. 582 StringRef Name = F->getName(); 583 584 Value *Rep; 585 // Upgrade packed integer vector compare intrinsics to compare instructions. 586 if (Name.startswith("llvm.x86.sse2.pcmpeq.") || 587 Name.startswith("llvm.x86.avx2.pcmpeq.")) { 588 Rep = Builder.CreateICmpEQ(CI->getArgOperand(0), CI->getArgOperand(1), 589 "pcmpeq"); 590 Rep = Builder.CreateSExt(Rep, CI->getType(), ""); 591 } else if (Name.startswith("llvm.x86.sse2.pcmpgt.") || 592 Name.startswith("llvm.x86.avx2.pcmpgt.")) { 593 Rep = Builder.CreateICmpSGT(CI->getArgOperand(0), CI->getArgOperand(1), 594 "pcmpgt"); 595 Rep = Builder.CreateSExt(Rep, CI->getType(), ""); 596 } else if (Name.startswith("llvm.x86.avx512.mask.pcmpeq.")) { 597 Rep = upgradeMaskedCompare(Builder, *CI, ICmpInst::ICMP_EQ); 598 } else if (Name.startswith("llvm.x86.avx512.mask.pcmpgt.")) { 599 Rep = upgradeMaskedCompare(Builder, *CI, ICmpInst::ICMP_SGT); 600 } else if (Name == "llvm.x86.sse41.pmaxsb" || 601 Name == "llvm.x86.sse2.pmaxs.w" || 602 Name == "llvm.x86.sse41.pmaxsd" || 603 Name.startswith("llvm.x86.avx2.pmaxs")) { 604 Rep = upgradeIntMinMax(Builder, *CI, ICmpInst::ICMP_SGT); 605 } else if (Name == "llvm.x86.sse2.pmaxu.b" || 606 Name == "llvm.x86.sse41.pmaxuw" || 607 Name == "llvm.x86.sse41.pmaxud" || 608 Name.startswith("llvm.x86.avx2.pmaxu")) { 609 Rep = upgradeIntMinMax(Builder, *CI, ICmpInst::ICMP_UGT); 610 } else if (Name == "llvm.x86.sse41.pminsb" || 611 Name == "llvm.x86.sse2.pmins.w" || 612 Name == "llvm.x86.sse41.pminsd" || 613 Name.startswith("llvm.x86.avx2.pmins")) { 614 Rep = upgradeIntMinMax(Builder, *CI, ICmpInst::ICMP_SLT); 615 } else if (Name == "llvm.x86.sse2.pminu.b" || 616 Name == "llvm.x86.sse41.pminuw" || 617 Name == "llvm.x86.sse41.pminud" || 618 Name.startswith("llvm.x86.avx2.pminu")) { 619 Rep = upgradeIntMinMax(Builder, *CI, ICmpInst::ICMP_ULT); 620 } else if (Name == "llvm.x86.sse2.cvtdq2pd" || 621 Name == "llvm.x86.sse2.cvtps2pd" || 622 Name == "llvm.x86.avx.cvtdq2.pd.256" || 623 Name == "llvm.x86.avx.cvt.ps2.pd.256") { 624 // Lossless i32/float to double conversion. 625 // Extract the bottom elements if necessary and convert to double vector. 626 Value *Src = CI->getArgOperand(0); 627 VectorType *SrcTy = cast<VectorType>(Src->getType()); 628 VectorType *DstTy = cast<VectorType>(CI->getType()); 629 Rep = CI->getArgOperand(0); 630 631 unsigned NumDstElts = DstTy->getNumElements(); 632 if (NumDstElts < SrcTy->getNumElements()) { 633 assert(NumDstElts == 2 && "Unexpected vector size"); 634 uint32_t ShuffleMask[2] = { 0, 1 }; 635 Rep = Builder.CreateShuffleVector(Rep, UndefValue::get(SrcTy), 636 ShuffleMask); 637 } 638 639 bool Int2Double = (StringRef::npos != Name.find("cvtdq2")); 640 if (Int2Double) 641 Rep = Builder.CreateSIToFP(Rep, DstTy, "cvtdq2pd"); 642 else 643 Rep = Builder.CreateFPExt(Rep, DstTy, "cvtps2pd"); 644 } else if (Name == "llvm.x86.sse2.cvttps2dq" || 645 Name.startswith("llvm.x86.avx.cvtt.")) { 646 // Truncation (round to zero) float/double to i32 vector conversion. 647 Value *Src = CI->getArgOperand(0); 648 VectorType *DstTy = cast<VectorType>(CI->getType()); 649 Rep = Builder.CreateFPToSI(Src, DstTy, "cvtt"); 650 } else if (Name.startswith("llvm.x86.sse4a.movnt.")) { 651 Module *M = F->getParent(); 652 SmallVector<Metadata *, 1> Elts; 653 Elts.push_back( 654 ConstantAsMetadata::get(ConstantInt::get(Type::getInt32Ty(C), 1))); 655 MDNode *Node = MDNode::get(C, Elts); 656 657 Value *Arg0 = CI->getArgOperand(0); 658 Value *Arg1 = CI->getArgOperand(1); 659 660 // Nontemporal (unaligned) store of the 0'th element of the float/double 661 // vector. 662 Type *SrcEltTy = cast<VectorType>(Arg1->getType())->getElementType(); 663 PointerType *EltPtrTy = PointerType::getUnqual(SrcEltTy); 664 Value *Addr = Builder.CreateBitCast(Arg0, EltPtrTy, "cast"); 665 Value *Extract = 666 Builder.CreateExtractElement(Arg1, (uint64_t)0, "extractelement"); 667 668 StoreInst *SI = Builder.CreateAlignedStore(Extract, Addr, 1); 669 SI->setMetadata(M->getMDKindID("nontemporal"), Node); 670 671 // Remove intrinsic. 672 CI->eraseFromParent(); 673 return; 674 } else if (Name.startswith("llvm.x86.avx.movnt.")) { 675 Module *M = F->getParent(); 676 SmallVector<Metadata *, 1> Elts; 677 Elts.push_back( 678 ConstantAsMetadata::get(ConstantInt::get(Type::getInt32Ty(C), 1))); 679 MDNode *Node = MDNode::get(C, Elts); 680 681 Value *Arg0 = CI->getArgOperand(0); 682 Value *Arg1 = CI->getArgOperand(1); 683 684 // Convert the type of the pointer to a pointer to the stored type. 685 Value *BC = Builder.CreateBitCast(Arg0, 686 PointerType::getUnqual(Arg1->getType()), 687 "cast"); 688 StoreInst *SI = Builder.CreateAlignedStore(Arg1, BC, 32); 689 SI->setMetadata(M->getMDKindID("nontemporal"), Node); 690 691 // Remove intrinsic. 692 CI->eraseFromParent(); 693 return; 694 } else if (Name == "llvm.x86.sse2.storel.dq") { 695 Value *Arg0 = CI->getArgOperand(0); 696 Value *Arg1 = CI->getArgOperand(1); 697 698 Type *NewVecTy = VectorType::get(Type::getInt64Ty(C), 2); 699 Value *BC0 = Builder.CreateBitCast(Arg1, NewVecTy, "cast"); 700 Value *Elt = Builder.CreateExtractElement(BC0, (uint64_t)0); 701 Value *BC = Builder.CreateBitCast(Arg0, 702 PointerType::getUnqual(Elt->getType()), 703 "cast"); 704 Builder.CreateAlignedStore(Elt, BC, 1); 705 706 // Remove intrinsic. 707 CI->eraseFromParent(); 708 return; 709 } else if (Name.startswith("llvm.x86.sse.storeu.") || 710 Name.startswith("llvm.x86.sse2.storeu.") || 711 Name.startswith("llvm.x86.avx.storeu.")) { 712 Value *Arg0 = CI->getArgOperand(0); 713 Value *Arg1 = CI->getArgOperand(1); 714 715 Arg0 = Builder.CreateBitCast(Arg0, 716 PointerType::getUnqual(Arg1->getType()), 717 "cast"); 718 Builder.CreateAlignedStore(Arg1, Arg0, 1); 719 720 // Remove intrinsic. 721 CI->eraseFromParent(); 722 return; 723 } else if (Name.startswith("llvm.x86.avx512.mask.storeu.p") || 724 Name.startswith("llvm.x86.avx512.mask.storeu.b.") || 725 Name.startswith("llvm.x86.avx512.mask.storeu.w.") || 726 Name.startswith("llvm.x86.avx512.mask.storeu.d.") || 727 Name.startswith("llvm.x86.avx512.mask.storeu.q.")) { 728 UpgradeMaskedStore(Builder, C, CI->getArgOperand(0), CI->getArgOperand(1), 729 CI->getArgOperand(2), /*Aligned*/false); 730 731 // Remove intrinsic. 732 CI->eraseFromParent(); 733 return; 734 } else if (Name.startswith("llvm.x86.avx512.mask.store.p") || 735 Name.startswith("llvm.x86.avx512.mask.store.b.") || 736 Name.startswith("llvm.x86.avx512.mask.store.w.") || 737 Name.startswith("llvm.x86.avx512.mask.store.d.") || 738 Name.startswith("llvm.x86.avx512.mask.store.q.")) { 739 UpgradeMaskedStore(Builder, C, CI->getArgOperand(0), CI->getArgOperand(1), 740 CI->getArgOperand(2), /*Aligned*/true); 741 742 // Remove intrinsic. 743 CI->eraseFromParent(); 744 return; 745 } else if (Name.startswith("llvm.x86.avx512.mask.loadu.p") || 746 Name.startswith("llvm.x86.avx512.mask.loadu.b.") || 747 Name.startswith("llvm.x86.avx512.mask.loadu.w.") || 748 Name.startswith("llvm.x86.avx512.mask.loadu.d.") || 749 Name.startswith("llvm.x86.avx512.mask.loadu.q.")) { 750 Rep = UpgradeMaskedLoad(Builder, C, CI->getArgOperand(0), 751 CI->getArgOperand(1), CI->getArgOperand(2), 752 /*Aligned*/false); 753 } else if (Name.startswith("llvm.x86.avx512.mask.load.p") || 754 Name.startswith("llvm.x86.avx512.mask.load.b.") || 755 Name.startswith("llvm.x86.avx512.mask.load.w.") || 756 Name.startswith("llvm.x86.avx512.mask.load.d.") || 757 Name.startswith("llvm.x86.avx512.mask.load.q.")) { 758 Rep = UpgradeMaskedLoad(Builder, C, CI->getArgOperand(0), 759 CI->getArgOperand(1),CI->getArgOperand(2), 760 /*Aligned*/true); 761 } else if (Name.startswith("llvm.x86.xop.vpcom")) { 762 Intrinsic::ID intID; 763 if (Name.endswith("ub")) 764 intID = Intrinsic::x86_xop_vpcomub; 765 else if (Name.endswith("uw")) 766 intID = Intrinsic::x86_xop_vpcomuw; 767 else if (Name.endswith("ud")) 768 intID = Intrinsic::x86_xop_vpcomud; 769 else if (Name.endswith("uq")) 770 intID = Intrinsic::x86_xop_vpcomuq; 771 else if (Name.endswith("b")) 772 intID = Intrinsic::x86_xop_vpcomb; 773 else if (Name.endswith("w")) 774 intID = Intrinsic::x86_xop_vpcomw; 775 else if (Name.endswith("d")) 776 intID = Intrinsic::x86_xop_vpcomd; 777 else if (Name.endswith("q")) 778 intID = Intrinsic::x86_xop_vpcomq; 779 else 780 llvm_unreachable("Unknown suffix"); 781 782 Name = Name.substr(18); // strip off "llvm.x86.xop.vpcom" 783 unsigned Imm; 784 if (Name.startswith("lt")) 785 Imm = 0; 786 else if (Name.startswith("le")) 787 Imm = 1; 788 else if (Name.startswith("gt")) 789 Imm = 2; 790 else if (Name.startswith("ge")) 791 Imm = 3; 792 else if (Name.startswith("eq")) 793 Imm = 4; 794 else if (Name.startswith("ne")) 795 Imm = 5; 796 else if (Name.startswith("false")) 797 Imm = 6; 798 else if (Name.startswith("true")) 799 Imm = 7; 800 else 801 llvm_unreachable("Unknown condition"); 802 803 Function *VPCOM = Intrinsic::getDeclaration(F->getParent(), intID); 804 Rep = 805 Builder.CreateCall(VPCOM, {CI->getArgOperand(0), CI->getArgOperand(1), 806 Builder.getInt8(Imm)}); 807 } else if (Name == "llvm.x86.xop.vpcmov") { 808 Value *Arg0 = CI->getArgOperand(0); 809 Value *Arg1 = CI->getArgOperand(1); 810 Value *Sel = CI->getArgOperand(2); 811 unsigned NumElts = CI->getType()->getVectorNumElements(); 812 Constant *MinusOne = ConstantVector::getSplat(NumElts, Builder.getInt64(-1)); 813 Value *NotSel = Builder.CreateXor(Sel, MinusOne); 814 Value *Sel0 = Builder.CreateAnd(Arg0, Sel); 815 Value *Sel1 = Builder.CreateAnd(Arg1, NotSel); 816 Rep = Builder.CreateOr(Sel0, Sel1); 817 } else if (Name == "llvm.x86.sse42.crc32.64.8") { 818 Function *CRC32 = Intrinsic::getDeclaration(F->getParent(), 819 Intrinsic::x86_sse42_crc32_32_8); 820 Value *Trunc0 = Builder.CreateTrunc(CI->getArgOperand(0), Type::getInt32Ty(C)); 821 Rep = Builder.CreateCall(CRC32, {Trunc0, CI->getArgOperand(1)}); 822 Rep = Builder.CreateZExt(Rep, CI->getType(), ""); 823 } else if (Name.startswith("llvm.x86.avx.vbroadcast")) { 824 // Replace broadcasts with a series of insertelements. 825 Type *VecTy = CI->getType(); 826 Type *EltTy = VecTy->getVectorElementType(); 827 unsigned EltNum = VecTy->getVectorNumElements(); 828 Value *Cast = Builder.CreateBitCast(CI->getArgOperand(0), 829 EltTy->getPointerTo()); 830 Value *Load = Builder.CreateLoad(EltTy, Cast); 831 Type *I32Ty = Type::getInt32Ty(C); 832 Rep = UndefValue::get(VecTy); 833 for (unsigned I = 0; I < EltNum; ++I) 834 Rep = Builder.CreateInsertElement(Rep, Load, 835 ConstantInt::get(I32Ty, I)); 836 } else if (Name.startswith("llvm.x86.sse41.pmovsx") || 837 Name.startswith("llvm.x86.sse41.pmovzx") || 838 Name.startswith("llvm.x86.avx2.pmovsx") || 839 Name.startswith("llvm.x86.avx2.pmovzx")) { 840 VectorType *SrcTy = cast<VectorType>(CI->getArgOperand(0)->getType()); 841 VectorType *DstTy = cast<VectorType>(CI->getType()); 842 unsigned NumDstElts = DstTy->getNumElements(); 843 844 // Extract a subvector of the first NumDstElts lanes and sign/zero extend. 845 SmallVector<uint32_t, 8> ShuffleMask(NumDstElts); 846 for (unsigned i = 0; i != NumDstElts; ++i) 847 ShuffleMask[i] = i; 848 849 Value *SV = Builder.CreateShuffleVector( 850 CI->getArgOperand(0), UndefValue::get(SrcTy), ShuffleMask); 851 852 bool DoSext = (StringRef::npos != Name.find("pmovsx")); 853 Rep = DoSext ? Builder.CreateSExt(SV, DstTy) 854 : Builder.CreateZExt(SV, DstTy); 855 } else if (Name == "llvm.x86.avx2.vbroadcasti128") { 856 // Replace vbroadcasts with a vector shuffle. 857 Type *VT = VectorType::get(Type::getInt64Ty(C), 2); 858 Value *Op = Builder.CreatePointerCast(CI->getArgOperand(0), 859 PointerType::getUnqual(VT)); 860 Value *Load = Builder.CreateLoad(VT, Op); 861 uint32_t Idxs[4] = { 0, 1, 0, 1 }; 862 Rep = Builder.CreateShuffleVector(Load, UndefValue::get(Load->getType()), 863 Idxs); 864 } else if (Name.startswith("llvm.x86.avx2.pbroadcast") || 865 Name.startswith("llvm.x86.avx2.vbroadcast")) { 866 // Replace vp?broadcasts with a vector shuffle. 867 Value *Op = CI->getArgOperand(0); 868 unsigned NumElts = CI->getType()->getVectorNumElements(); 869 Type *MaskTy = VectorType::get(Type::getInt32Ty(C), NumElts); 870 Rep = Builder.CreateShuffleVector(Op, UndefValue::get(Op->getType()), 871 Constant::getNullValue(MaskTy)); 872 } else if (Name.startswith("llvm.x86.avx512.mask.palignr.")) { 873 Rep = UpgradeX86PALIGNRIntrinsics(Builder, C, CI->getArgOperand(0), 874 CI->getArgOperand(1), 875 CI->getArgOperand(2), 876 CI->getArgOperand(3), 877 CI->getArgOperand(4)); 878 } else if (Name == "llvm.x86.sse2.psll.dq" || 879 Name == "llvm.x86.avx2.psll.dq") { 880 // 128/256-bit shift left specified in bits. 881 unsigned Shift = cast<ConstantInt>(CI->getArgOperand(1))->getZExtValue(); 882 Rep = UpgradeX86PSLLDQIntrinsics(Builder, C, CI->getArgOperand(0), 883 Shift / 8); // Shift is in bits. 884 } else if (Name == "llvm.x86.sse2.psrl.dq" || 885 Name == "llvm.x86.avx2.psrl.dq") { 886 // 128/256-bit shift right specified in bits. 887 unsigned Shift = cast<ConstantInt>(CI->getArgOperand(1))->getZExtValue(); 888 Rep = UpgradeX86PSRLDQIntrinsics(Builder, C, CI->getArgOperand(0), 889 Shift / 8); // Shift is in bits. 890 } else if (Name == "llvm.x86.sse2.psll.dq.bs" || 891 Name == "llvm.x86.avx2.psll.dq.bs" || 892 Name == "llvm.x86.avx512.psll.dq.512") { 893 // 128/256/512-bit shift left specified in bytes. 894 unsigned Shift = cast<ConstantInt>(CI->getArgOperand(1))->getZExtValue(); 895 Rep = UpgradeX86PSLLDQIntrinsics(Builder, C, CI->getArgOperand(0), Shift); 896 } else if (Name == "llvm.x86.sse2.psrl.dq.bs" || 897 Name == "llvm.x86.avx2.psrl.dq.bs" || 898 Name == "llvm.x86.avx512.psrl.dq.512") { 899 // 128/256/512-bit shift right specified in bytes. 900 unsigned Shift = cast<ConstantInt>(CI->getArgOperand(1))->getZExtValue(); 901 Rep = UpgradeX86PSRLDQIntrinsics(Builder, C, CI->getArgOperand(0), Shift); 902 } else if (Name == "llvm.x86.sse41.pblendw" || 903 Name.startswith("llvm.x86.sse41.blendp") || 904 Name.startswith("llvm.x86.avx.blend.p") || 905 Name == "llvm.x86.avx2.pblendw" || 906 Name.startswith("llvm.x86.avx2.pblendd.")) { 907 Value *Op0 = CI->getArgOperand(0); 908 Value *Op1 = CI->getArgOperand(1); 909 unsigned Imm = cast <ConstantInt>(CI->getArgOperand(2))->getZExtValue(); 910 VectorType *VecTy = cast<VectorType>(CI->getType()); 911 unsigned NumElts = VecTy->getNumElements(); 912 913 SmallVector<uint32_t, 16> Idxs(NumElts); 914 for (unsigned i = 0; i != NumElts; ++i) 915 Idxs[i] = ((Imm >> (i%8)) & 1) ? i + NumElts : i; 916 917 Rep = Builder.CreateShuffleVector(Op0, Op1, Idxs); 918 } else if (Name.startswith("llvm.x86.avx.vinsertf128.") || 919 Name == "llvm.x86.avx2.vinserti128") { 920 Value *Op0 = CI->getArgOperand(0); 921 Value *Op1 = CI->getArgOperand(1); 922 unsigned Imm = cast<ConstantInt>(CI->getArgOperand(2))->getZExtValue(); 923 VectorType *VecTy = cast<VectorType>(CI->getType()); 924 unsigned NumElts = VecTy->getNumElements(); 925 926 // Mask off the high bits of the immediate value; hardware ignores those. 927 Imm = Imm & 1; 928 929 // Extend the second operand into a vector that is twice as big. 930 Value *UndefV = UndefValue::get(Op1->getType()); 931 SmallVector<uint32_t, 8> Idxs(NumElts); 932 for (unsigned i = 0; i != NumElts; ++i) 933 Idxs[i] = i; 934 Rep = Builder.CreateShuffleVector(Op1, UndefV, Idxs); 935 936 // Insert the second operand into the first operand. 937 938 // Note that there is no guarantee that instruction lowering will actually 939 // produce a vinsertf128 instruction for the created shuffles. In 940 // particular, the 0 immediate case involves no lane changes, so it can 941 // be handled as a blend. 942 943 // Example of shuffle mask for 32-bit elements: 944 // Imm = 1 <i32 0, i32 1, i32 2, i32 3, i32 8, i32 9, i32 10, i32 11> 945 // Imm = 0 <i32 8, i32 9, i32 10, i32 11, i32 4, i32 5, i32 6, i32 7 > 946 947 // The low half of the result is either the low half of the 1st operand 948 // or the low half of the 2nd operand (the inserted vector). 949 for (unsigned i = 0; i != NumElts / 2; ++i) 950 Idxs[i] = Imm ? i : (i + NumElts); 951 // The high half of the result is either the low half of the 2nd operand 952 // (the inserted vector) or the high half of the 1st operand. 953 for (unsigned i = NumElts / 2; i != NumElts; ++i) 954 Idxs[i] = Imm ? (i + NumElts / 2) : i; 955 Rep = Builder.CreateShuffleVector(Op0, Rep, Idxs); 956 } else if (Name.startswith("llvm.x86.avx.vextractf128.") || 957 Name == "llvm.x86.avx2.vextracti128") { 958 Value *Op0 = CI->getArgOperand(0); 959 unsigned Imm = cast<ConstantInt>(CI->getArgOperand(1))->getZExtValue(); 960 VectorType *VecTy = cast<VectorType>(CI->getType()); 961 unsigned NumElts = VecTy->getNumElements(); 962 963 // Mask off the high bits of the immediate value; hardware ignores those. 964 Imm = Imm & 1; 965 966 // Get indexes for either the high half or low half of the input vector. 967 SmallVector<uint32_t, 4> Idxs(NumElts); 968 for (unsigned i = 0; i != NumElts; ++i) { 969 Idxs[i] = Imm ? (i + NumElts) : i; 970 } 971 972 Value *UndefV = UndefValue::get(Op0->getType()); 973 Rep = Builder.CreateShuffleVector(Op0, UndefV, Idxs); 974 } else if (Name == "llvm.stackprotectorcheck") { 975 Rep = nullptr; 976 } else if (Name.startswith("llvm.x86.avx.vpermil.") || 977 Name == "llvm.x86.sse2.pshuf.d" || 978 Name.startswith("llvm.x86.avx512.mask.pshuf.d.")) { 979 Value *Op0 = CI->getArgOperand(0); 980 unsigned Imm = cast<ConstantInt>(CI->getArgOperand(1))->getZExtValue(); 981 VectorType *VecTy = cast<VectorType>(CI->getType()); 982 unsigned NumElts = VecTy->getNumElements(); 983 // Calcuate the size of each index in the immediate. 984 unsigned IdxSize = 64 / VecTy->getScalarSizeInBits(); 985 unsigned IdxMask = ((1 << IdxSize) - 1); 986 987 SmallVector<uint32_t, 8> Idxs(NumElts); 988 // Lookup the bits for this element, wrapping around the immediate every 989 // 8-bits. Elements are grouped into sets of 2 or 4 elements so we need 990 // to offset by the first index of each group. 991 for (unsigned i = 0; i != NumElts; ++i) 992 Idxs[i] = ((Imm >> ((i * IdxSize) % 8)) & IdxMask) | (i & ~IdxMask); 993 994 Rep = Builder.CreateShuffleVector(Op0, Op0, Idxs); 995 996 if (CI->getNumArgOperands() == 4) 997 Rep = EmitX86Select(Builder, CI->getArgOperand(3), Rep, 998 CI->getArgOperand(2)); 999 } else if (Name == "llvm.x86.sse2.pshufl.w" || 1000 Name.startswith("llvm.x86.avx512.mask.pshufl.w.")) { 1001 Value *Op0 = CI->getArgOperand(0); 1002 unsigned Imm = cast<ConstantInt>(CI->getArgOperand(1))->getZExtValue(); 1003 unsigned NumElts = CI->getType()->getVectorNumElements(); 1004 1005 SmallVector<uint32_t, 16> Idxs(NumElts); 1006 for (unsigned l = 0; l != NumElts; l += 8) { 1007 for (unsigned i = 0; i != 4; ++i) 1008 Idxs[i + l] = ((Imm >> (2 * i)) & 0x3) + l; 1009 for (unsigned i = 4; i != 8; ++i) 1010 Idxs[i + l] = i + l; 1011 } 1012 1013 Rep = Builder.CreateShuffleVector(Op0, Op0, Idxs); 1014 1015 if (CI->getNumArgOperands() == 4) 1016 Rep = EmitX86Select(Builder, CI->getArgOperand(3), Rep, 1017 CI->getArgOperand(2)); 1018 } else if (Name == "llvm.x86.sse2.pshufh.w" || 1019 Name.startswith("llvm.x86.avx512.mask.pshufh.w.")) { 1020 Value *Op0 = CI->getArgOperand(0); 1021 unsigned Imm = cast<ConstantInt>(CI->getArgOperand(1))->getZExtValue(); 1022 unsigned NumElts = CI->getType()->getVectorNumElements(); 1023 1024 SmallVector<uint32_t, 16> Idxs(NumElts); 1025 for (unsigned l = 0; l != NumElts; l += 8) { 1026 for (unsigned i = 0; i != 4; ++i) 1027 Idxs[i + l] = i + l; 1028 for (unsigned i = 0; i != 4; ++i) 1029 Idxs[i + l + 4] = ((Imm >> (2 * i)) & 0x3) + 4 + l; 1030 } 1031 1032 Rep = Builder.CreateShuffleVector(Op0, Op0, Idxs); 1033 1034 if (CI->getNumArgOperands() == 4) 1035 Rep = EmitX86Select(Builder, CI->getArgOperand(3), Rep, 1036 CI->getArgOperand(2)); 1037 } else { 1038 llvm_unreachable("Unknown function for CallInst upgrade."); 1039 } 1040 1041 if (Rep) 1042 CI->replaceAllUsesWith(Rep); 1043 CI->eraseFromParent(); 1044 return; 1045 } 1046 1047 std::string Name = CI->getName(); 1048 if (!Name.empty()) 1049 CI->setName(Name + ".old"); 1050 1051 switch (NewFn->getIntrinsicID()) { 1052 default: 1053 llvm_unreachable("Unknown function for CallInst upgrade."); 1054 1055 case Intrinsic::arm_neon_vld1: 1056 case Intrinsic::arm_neon_vld2: 1057 case Intrinsic::arm_neon_vld3: 1058 case Intrinsic::arm_neon_vld4: 1059 case Intrinsic::arm_neon_vld2lane: 1060 case Intrinsic::arm_neon_vld3lane: 1061 case Intrinsic::arm_neon_vld4lane: 1062 case Intrinsic::arm_neon_vst1: 1063 case Intrinsic::arm_neon_vst2: 1064 case Intrinsic::arm_neon_vst3: 1065 case Intrinsic::arm_neon_vst4: 1066 case Intrinsic::arm_neon_vst2lane: 1067 case Intrinsic::arm_neon_vst3lane: 1068 case Intrinsic::arm_neon_vst4lane: { 1069 SmallVector<Value *, 4> Args(CI->arg_operands().begin(), 1070 CI->arg_operands().end()); 1071 CI->replaceAllUsesWith(Builder.CreateCall(NewFn, Args)); 1072 CI->eraseFromParent(); 1073 return; 1074 } 1075 1076 case Intrinsic::ctlz: 1077 case Intrinsic::cttz: 1078 assert(CI->getNumArgOperands() == 1 && 1079 "Mismatch between function args and call args"); 1080 CI->replaceAllUsesWith(Builder.CreateCall( 1081 NewFn, {CI->getArgOperand(0), Builder.getFalse()}, Name)); 1082 CI->eraseFromParent(); 1083 return; 1084 1085 case Intrinsic::objectsize: 1086 CI->replaceAllUsesWith(Builder.CreateCall( 1087 NewFn, {CI->getArgOperand(0), CI->getArgOperand(1)}, Name)); 1088 CI->eraseFromParent(); 1089 return; 1090 1091 case Intrinsic::ctpop: { 1092 CI->replaceAllUsesWith(Builder.CreateCall(NewFn, {CI->getArgOperand(0)})); 1093 CI->eraseFromParent(); 1094 return; 1095 } 1096 1097 case Intrinsic::x86_xop_vfrcz_ss: 1098 case Intrinsic::x86_xop_vfrcz_sd: 1099 CI->replaceAllUsesWith( 1100 Builder.CreateCall(NewFn, {CI->getArgOperand(1)}, Name)); 1101 CI->eraseFromParent(); 1102 return; 1103 1104 case Intrinsic::x86_xop_vpermil2pd: 1105 case Intrinsic::x86_xop_vpermil2ps: 1106 case Intrinsic::x86_xop_vpermil2pd_256: 1107 case Intrinsic::x86_xop_vpermil2ps_256: { 1108 SmallVector<Value *, 4> Args(CI->arg_operands().begin(), 1109 CI->arg_operands().end()); 1110 VectorType *FltIdxTy = cast<VectorType>(Args[2]->getType()); 1111 VectorType *IntIdxTy = VectorType::getInteger(FltIdxTy); 1112 Args[2] = Builder.CreateBitCast(Args[2], IntIdxTy); 1113 CI->replaceAllUsesWith(Builder.CreateCall(NewFn, Args, Name)); 1114 CI->eraseFromParent(); 1115 return; 1116 } 1117 1118 case Intrinsic::x86_sse41_ptestc: 1119 case Intrinsic::x86_sse41_ptestz: 1120 case Intrinsic::x86_sse41_ptestnzc: { 1121 // The arguments for these intrinsics used to be v4f32, and changed 1122 // to v2i64. This is purely a nop, since those are bitwise intrinsics. 1123 // So, the only thing required is a bitcast for both arguments. 1124 // First, check the arguments have the old type. 1125 Value *Arg0 = CI->getArgOperand(0); 1126 if (Arg0->getType() != VectorType::get(Type::getFloatTy(C), 4)) 1127 return; 1128 1129 // Old intrinsic, add bitcasts 1130 Value *Arg1 = CI->getArgOperand(1); 1131 1132 Type *NewVecTy = VectorType::get(Type::getInt64Ty(C), 2); 1133 1134 Value *BC0 = Builder.CreateBitCast(Arg0, NewVecTy, "cast"); 1135 Value *BC1 = Builder.CreateBitCast(Arg1, NewVecTy, "cast"); 1136 1137 CallInst *NewCall = Builder.CreateCall(NewFn, {BC0, BC1}, Name); 1138 CI->replaceAllUsesWith(NewCall); 1139 CI->eraseFromParent(); 1140 return; 1141 } 1142 1143 case Intrinsic::x86_sse41_insertps: 1144 case Intrinsic::x86_sse41_dppd: 1145 case Intrinsic::x86_sse41_dpps: 1146 case Intrinsic::x86_sse41_mpsadbw: 1147 case Intrinsic::x86_avx_dp_ps_256: 1148 case Intrinsic::x86_avx2_mpsadbw: { 1149 // Need to truncate the last argument from i32 to i8 -- this argument models 1150 // an inherently 8-bit immediate operand to these x86 instructions. 1151 SmallVector<Value *, 4> Args(CI->arg_operands().begin(), 1152 CI->arg_operands().end()); 1153 1154 // Replace the last argument with a trunc. 1155 Args.back() = Builder.CreateTrunc(Args.back(), Type::getInt8Ty(C), "trunc"); 1156 1157 CallInst *NewCall = Builder.CreateCall(NewFn, Args); 1158 CI->replaceAllUsesWith(NewCall); 1159 CI->eraseFromParent(); 1160 return; 1161 } 1162 1163 case Intrinsic::thread_pointer: { 1164 CI->replaceAllUsesWith(Builder.CreateCall(NewFn, {})); 1165 CI->eraseFromParent(); 1166 return; 1167 } 1168 } 1169 } 1170 1171 void llvm::UpgradeCallsToIntrinsic(Function *F) { 1172 assert(F && "Illegal attempt to upgrade a non-existent intrinsic."); 1173 1174 // Check if this function should be upgraded and get the replacement function 1175 // if there is one. 1176 Function *NewFn; 1177 if (UpgradeIntrinsicFunction(F, NewFn)) { 1178 // Replace all users of the old function with the new function or new 1179 // instructions. This is not a range loop because the call is deleted. 1180 for (auto UI = F->user_begin(), UE = F->user_end(); UI != UE; ) 1181 if (CallInst *CI = dyn_cast<CallInst>(*UI++)) 1182 UpgradeIntrinsicCall(CI, NewFn); 1183 1184 // Remove old function, no longer used, from the module. 1185 F->eraseFromParent(); 1186 } 1187 } 1188 1189 void llvm::UpgradeInstWithTBAATag(Instruction *I) { 1190 MDNode *MD = I->getMetadata(LLVMContext::MD_tbaa); 1191 assert(MD && "UpgradeInstWithTBAATag should have a TBAA tag"); 1192 // Check if the tag uses struct-path aware TBAA format. 1193 if (isa<MDNode>(MD->getOperand(0)) && MD->getNumOperands() >= 3) 1194 return; 1195 1196 if (MD->getNumOperands() == 3) { 1197 Metadata *Elts[] = {MD->getOperand(0), MD->getOperand(1)}; 1198 MDNode *ScalarType = MDNode::get(I->getContext(), Elts); 1199 // Create a MDNode <ScalarType, ScalarType, offset 0, const> 1200 Metadata *Elts2[] = {ScalarType, ScalarType, 1201 ConstantAsMetadata::get(Constant::getNullValue( 1202 Type::getInt64Ty(I->getContext()))), 1203 MD->getOperand(2)}; 1204 I->setMetadata(LLVMContext::MD_tbaa, MDNode::get(I->getContext(), Elts2)); 1205 } else { 1206 // Create a MDNode <MD, MD, offset 0> 1207 Metadata *Elts[] = {MD, MD, ConstantAsMetadata::get(Constant::getNullValue( 1208 Type::getInt64Ty(I->getContext())))}; 1209 I->setMetadata(LLVMContext::MD_tbaa, MDNode::get(I->getContext(), Elts)); 1210 } 1211 } 1212 1213 Instruction *llvm::UpgradeBitCastInst(unsigned Opc, Value *V, Type *DestTy, 1214 Instruction *&Temp) { 1215 if (Opc != Instruction::BitCast) 1216 return nullptr; 1217 1218 Temp = nullptr; 1219 Type *SrcTy = V->getType(); 1220 if (SrcTy->isPtrOrPtrVectorTy() && DestTy->isPtrOrPtrVectorTy() && 1221 SrcTy->getPointerAddressSpace() != DestTy->getPointerAddressSpace()) { 1222 LLVMContext &Context = V->getContext(); 1223 1224 // We have no information about target data layout, so we assume that 1225 // the maximum pointer size is 64bit. 1226 Type *MidTy = Type::getInt64Ty(Context); 1227 Temp = CastInst::Create(Instruction::PtrToInt, V, MidTy); 1228 1229 return CastInst::Create(Instruction::IntToPtr, Temp, DestTy); 1230 } 1231 1232 return nullptr; 1233 } 1234 1235 Value *llvm::UpgradeBitCastExpr(unsigned Opc, Constant *C, Type *DestTy) { 1236 if (Opc != Instruction::BitCast) 1237 return nullptr; 1238 1239 Type *SrcTy = C->getType(); 1240 if (SrcTy->isPtrOrPtrVectorTy() && DestTy->isPtrOrPtrVectorTy() && 1241 SrcTy->getPointerAddressSpace() != DestTy->getPointerAddressSpace()) { 1242 LLVMContext &Context = C->getContext(); 1243 1244 // We have no information about target data layout, so we assume that 1245 // the maximum pointer size is 64bit. 1246 Type *MidTy = Type::getInt64Ty(Context); 1247 1248 return ConstantExpr::getIntToPtr(ConstantExpr::getPtrToInt(C, MidTy), 1249 DestTy); 1250 } 1251 1252 return nullptr; 1253 } 1254 1255 /// Check the debug info version number, if it is out-dated, drop the debug 1256 /// info. Return true if module is modified. 1257 bool llvm::UpgradeDebugInfo(Module &M) { 1258 unsigned Version = getDebugMetadataVersionFromModule(M); 1259 if (Version == DEBUG_METADATA_VERSION) 1260 return false; 1261 1262 bool RetCode = StripDebugInfo(M); 1263 if (RetCode) { 1264 DiagnosticInfoDebugMetadataVersion DiagVersion(M, Version); 1265 M.getContext().diagnose(DiagVersion); 1266 } 1267 return RetCode; 1268 } 1269 1270 bool llvm::UpgradeModuleFlags(Module &M) { 1271 const NamedMDNode *ModFlags = M.getModuleFlagsMetadata(); 1272 if (!ModFlags) 1273 return false; 1274 1275 bool HasObjCFlag = false, HasClassProperties = false; 1276 for (unsigned I = 0, E = ModFlags->getNumOperands(); I != E; ++I) { 1277 MDNode *Op = ModFlags->getOperand(I); 1278 if (Op->getNumOperands() < 2) 1279 continue; 1280 MDString *ID = dyn_cast_or_null<MDString>(Op->getOperand(1)); 1281 if (!ID) 1282 continue; 1283 if (ID->getString() == "Objective-C Image Info Version") 1284 HasObjCFlag = true; 1285 if (ID->getString() == "Objective-C Class Properties") 1286 HasClassProperties = true; 1287 } 1288 // "Objective-C Class Properties" is recently added for Objective-C. We 1289 // upgrade ObjC bitcodes to contain a "Objective-C Class Properties" module 1290 // flag of value 0, so we can correclty report error when trying to link 1291 // an ObjC bitcode without this module flag with an ObjC bitcode with this 1292 // module flag. 1293 if (HasObjCFlag && !HasClassProperties) { 1294 M.addModuleFlag(llvm::Module::Error, "Objective-C Class Properties", 1295 (uint32_t)0); 1296 return true; 1297 } 1298 return false; 1299 } 1300 1301 static bool isOldLoopArgument(Metadata *MD) { 1302 auto *T = dyn_cast_or_null<MDTuple>(MD); 1303 if (!T) 1304 return false; 1305 if (T->getNumOperands() < 1) 1306 return false; 1307 auto *S = dyn_cast_or_null<MDString>(T->getOperand(0)); 1308 if (!S) 1309 return false; 1310 return S->getString().startswith("llvm.vectorizer."); 1311 } 1312 1313 static MDString *upgradeLoopTag(LLVMContext &C, StringRef OldTag) { 1314 StringRef OldPrefix = "llvm.vectorizer."; 1315 assert(OldTag.startswith(OldPrefix) && "Expected old prefix"); 1316 1317 if (OldTag == "llvm.vectorizer.unroll") 1318 return MDString::get(C, "llvm.loop.interleave.count"); 1319 1320 return MDString::get( 1321 C, (Twine("llvm.loop.vectorize.") + OldTag.drop_front(OldPrefix.size())) 1322 .str()); 1323 } 1324 1325 static Metadata *upgradeLoopArgument(Metadata *MD) { 1326 auto *T = dyn_cast_or_null<MDTuple>(MD); 1327 if (!T) 1328 return MD; 1329 if (T->getNumOperands() < 1) 1330 return MD; 1331 auto *OldTag = dyn_cast_or_null<MDString>(T->getOperand(0)); 1332 if (!OldTag) 1333 return MD; 1334 if (!OldTag->getString().startswith("llvm.vectorizer.")) 1335 return MD; 1336 1337 // This has an old tag. Upgrade it. 1338 SmallVector<Metadata *, 8> Ops; 1339 Ops.reserve(T->getNumOperands()); 1340 Ops.push_back(upgradeLoopTag(T->getContext(), OldTag->getString())); 1341 for (unsigned I = 1, E = T->getNumOperands(); I != E; ++I) 1342 Ops.push_back(T->getOperand(I)); 1343 1344 return MDTuple::get(T->getContext(), Ops); 1345 } 1346 1347 MDNode *llvm::upgradeInstructionLoopAttachment(MDNode &N) { 1348 auto *T = dyn_cast<MDTuple>(&N); 1349 if (!T) 1350 return &N; 1351 1352 if (!llvm::any_of(T->operands(), isOldLoopArgument)) 1353 return &N; 1354 1355 SmallVector<Metadata *, 8> Ops; 1356 Ops.reserve(T->getNumOperands()); 1357 for (Metadata *MD : T->operands()) 1358 Ops.push_back(upgradeLoopArgument(MD)); 1359 1360 return MDTuple::get(T->getContext(), Ops); 1361 } 1362