1 //===-- AutoUpgrade.cpp - Implement auto-upgrade helper functions ---------===// 2 // 3 // The LLVM Compiler Infrastructure 4 // 5 // This file is distributed under the University of Illinois Open Source 6 // License. See LICENSE.TXT for details. 7 // 8 //===----------------------------------------------------------------------===// 9 // 10 // This file implements the auto-upgrade helper functions. 11 // This is where deprecated IR intrinsics and other IR features are updated to 12 // current specifications. 13 // 14 //===----------------------------------------------------------------------===// 15 16 #include "llvm/IR/AutoUpgrade.h" 17 #include "llvm/IR/CFG.h" 18 #include "llvm/IR/CallSite.h" 19 #include "llvm/IR/Constants.h" 20 #include "llvm/IR/DIBuilder.h" 21 #include "llvm/IR/DebugInfo.h" 22 #include "llvm/IR/DiagnosticInfo.h" 23 #include "llvm/IR/Function.h" 24 #include "llvm/IR/IRBuilder.h" 25 #include "llvm/IR/Instruction.h" 26 #include "llvm/IR/IntrinsicInst.h" 27 #include "llvm/IR/LLVMContext.h" 28 #include "llvm/IR/Module.h" 29 #include "llvm/Support/ErrorHandling.h" 30 #include "llvm/Support/Regex.h" 31 #include <cstring> 32 using namespace llvm; 33 34 // Upgrade the declarations of the SSE4.1 functions whose arguments have 35 // changed their type from v4f32 to v2i64. 36 static bool UpgradeSSE41Function(Function* F, Intrinsic::ID IID, 37 Function *&NewFn) { 38 // Check whether this is an old version of the function, which received 39 // v4f32 arguments. 40 Type *Arg0Type = F->getFunctionType()->getParamType(0); 41 if (Arg0Type != VectorType::get(Type::getFloatTy(F->getContext()), 4)) 42 return false; 43 44 // Yes, it's old, replace it with new version. 45 F->setName(F->getName() + ".old"); 46 NewFn = Intrinsic::getDeclaration(F->getParent(), IID); 47 return true; 48 } 49 50 // Upgrade the declarations of intrinsic functions whose 8-bit immediate mask 51 // arguments have changed their type from i32 to i8. 52 static bool UpgradeX86IntrinsicsWith8BitMask(Function *F, Intrinsic::ID IID, 53 Function *&NewFn) { 54 // Check that the last argument is an i32. 55 Type *LastArgType = F->getFunctionType()->getParamType( 56 F->getFunctionType()->getNumParams() - 1); 57 if (!LastArgType->isIntegerTy(32)) 58 return false; 59 60 // Move this function aside and map down. 61 F->setName(F->getName() + ".old"); 62 NewFn = Intrinsic::getDeclaration(F->getParent(), IID); 63 return true; 64 } 65 66 static bool UpgradeIntrinsicFunction1(Function *F, Function *&NewFn) { 67 assert(F && "Illegal to upgrade a non-existent Function."); 68 69 // Quickly eliminate it, if it's not a candidate. 70 StringRef Name = F->getName(); 71 if (Name.size() <= 8 || !Name.startswith("llvm.")) 72 return false; 73 Name = Name.substr(5); // Strip off "llvm." 74 75 switch (Name[0]) { 76 default: break; 77 case 'a': { 78 if (Name.startswith("arm.neon.vclz")) { 79 Type* args[2] = { 80 F->arg_begin()->getType(), 81 Type::getInt1Ty(F->getContext()) 82 }; 83 // Can't use Intrinsic::getDeclaration here as it adds a ".i1" to 84 // the end of the name. Change name from llvm.arm.neon.vclz.* to 85 // llvm.ctlz.* 86 FunctionType* fType = FunctionType::get(F->getReturnType(), args, false); 87 NewFn = Function::Create(fType, F->getLinkage(), 88 "llvm.ctlz." + Name.substr(14), F->getParent()); 89 return true; 90 } 91 if (Name.startswith("arm.neon.vcnt")) { 92 NewFn = Intrinsic::getDeclaration(F->getParent(), Intrinsic::ctpop, 93 F->arg_begin()->getType()); 94 return true; 95 } 96 Regex vldRegex("^arm\\.neon\\.vld([1234]|[234]lane)\\.v[a-z0-9]*$"); 97 if (vldRegex.match(Name)) { 98 auto fArgs = F->getFunctionType()->params(); 99 SmallVector<Type *, 4> Tys(fArgs.begin(), fArgs.end()); 100 // Can't use Intrinsic::getDeclaration here as the return types might 101 // then only be structurally equal. 102 FunctionType* fType = FunctionType::get(F->getReturnType(), Tys, false); 103 NewFn = Function::Create(fType, F->getLinkage(), 104 "llvm." + Name + ".p0i8", F->getParent()); 105 return true; 106 } 107 Regex vstRegex("^arm\\.neon\\.vst([1234]|[234]lane)\\.v[a-z0-9]*$"); 108 if (vstRegex.match(Name)) { 109 static const Intrinsic::ID StoreInts[] = {Intrinsic::arm_neon_vst1, 110 Intrinsic::arm_neon_vst2, 111 Intrinsic::arm_neon_vst3, 112 Intrinsic::arm_neon_vst4}; 113 114 static const Intrinsic::ID StoreLaneInts[] = { 115 Intrinsic::arm_neon_vst2lane, Intrinsic::arm_neon_vst3lane, 116 Intrinsic::arm_neon_vst4lane 117 }; 118 119 auto fArgs = F->getFunctionType()->params(); 120 Type *Tys[] = {fArgs[0], fArgs[1]}; 121 if (Name.find("lane") == StringRef::npos) 122 NewFn = Intrinsic::getDeclaration(F->getParent(), 123 StoreInts[fArgs.size() - 3], Tys); 124 else 125 NewFn = Intrinsic::getDeclaration(F->getParent(), 126 StoreLaneInts[fArgs.size() - 5], Tys); 127 return true; 128 } 129 if (Name == "aarch64.thread.pointer" || Name == "arm.thread.pointer") { 130 NewFn = Intrinsic::getDeclaration(F->getParent(), Intrinsic::thread_pointer); 131 return true; 132 } 133 break; 134 } 135 136 case 'c': { 137 if (Name.startswith("ctlz.") && F->arg_size() == 1) { 138 F->setName(Name + ".old"); 139 NewFn = Intrinsic::getDeclaration(F->getParent(), Intrinsic::ctlz, 140 F->arg_begin()->getType()); 141 return true; 142 } 143 if (Name.startswith("cttz.") && F->arg_size() == 1) { 144 F->setName(Name + ".old"); 145 NewFn = Intrinsic::getDeclaration(F->getParent(), Intrinsic::cttz, 146 F->arg_begin()->getType()); 147 return true; 148 } 149 break; 150 } 151 152 case 'm': { 153 if (Name.startswith("masked.load.")) { 154 Type *Tys[] = { F->getReturnType(), F->arg_begin()->getType() }; 155 if (F->getName() != Intrinsic::getName(Intrinsic::masked_load, Tys)) { 156 F->setName(Name + ".old"); 157 NewFn = Intrinsic::getDeclaration(F->getParent(), 158 Intrinsic::masked_load, 159 Tys); 160 return true; 161 } 162 } 163 if (Name.startswith("masked.store.")) { 164 auto Args = F->getFunctionType()->params(); 165 Type *Tys[] = { Args[0], Args[1] }; 166 if (F->getName() != Intrinsic::getName(Intrinsic::masked_store, Tys)) { 167 F->setName(Name + ".old"); 168 NewFn = Intrinsic::getDeclaration(F->getParent(), 169 Intrinsic::masked_store, 170 Tys); 171 return true; 172 } 173 } 174 break; 175 } 176 177 case 'o': 178 // We only need to change the name to match the mangling including the 179 // address space. 180 if (F->arg_size() == 2 && Name.startswith("objectsize.")) { 181 Type *Tys[2] = { F->getReturnType(), F->arg_begin()->getType() }; 182 if (F->getName() != Intrinsic::getName(Intrinsic::objectsize, Tys)) { 183 F->setName(Name + ".old"); 184 NewFn = Intrinsic::getDeclaration(F->getParent(), 185 Intrinsic::objectsize, Tys); 186 return true; 187 } 188 } 189 break; 190 191 case 's': 192 if (Name == "stackprotectorcheck") { 193 NewFn = nullptr; 194 return true; 195 } 196 197 case 'x': { 198 if (Name.startswith("x86.sse2.pcmpeq.") || 199 Name.startswith("x86.sse2.pcmpgt.") || 200 Name.startswith("x86.avx2.pcmpeq.") || 201 Name.startswith("x86.avx2.pcmpgt.") || 202 Name.startswith("x86.avx512.mask.pcmpeq.") || 203 Name.startswith("x86.avx512.mask.pcmpgt.") || 204 Name == "x86.sse41.pmaxsb" || 205 Name == "x86.sse2.pmaxs.w" || 206 Name == "x86.sse41.pmaxsd" || 207 Name == "x86.sse2.pmaxu.b" || 208 Name == "x86.sse41.pmaxuw" || 209 Name == "x86.sse41.pmaxud" || 210 Name == "x86.sse41.pminsb" || 211 Name == "x86.sse2.pmins.w" || 212 Name == "x86.sse41.pminsd" || 213 Name == "x86.sse2.pminu.b" || 214 Name == "x86.sse41.pminuw" || 215 Name == "x86.sse41.pminud" || 216 Name.startswith("x86.avx2.pmax") || 217 Name.startswith("x86.avx2.pmin") || 218 Name.startswith("x86.avx2.vbroadcast") || 219 Name.startswith("x86.avx2.pbroadcast") || 220 Name.startswith("x86.avx.vpermil.") || 221 Name.startswith("x86.sse2.pshuf") || 222 Name.startswith("x86.avx512.mask.pshuf.d.") || 223 Name.startswith("x86.avx512.mask.pshufl.w.") || 224 Name.startswith("x86.avx512.mask.pshufh.w.") || 225 Name.startswith("x86.avx512.mask.punpckl") || 226 Name.startswith("x86.avx512.mask.punpckh") || 227 Name.startswith("x86.avx512.mask.unpckl.") || 228 Name.startswith("x86.avx512.mask.unpckh.") || 229 Name.startswith("x86.sse41.pmovsx") || 230 Name.startswith("x86.sse41.pmovzx") || 231 Name.startswith("x86.avx2.pmovsx") || 232 Name.startswith("x86.avx2.pmovzx") || 233 Name == "x86.sse2.cvtdq2pd" || 234 Name == "x86.sse2.cvtps2pd" || 235 Name == "x86.avx.cvtdq2.pd.256" || 236 Name == "x86.avx.cvt.ps2.pd.256" || 237 Name == "x86.sse2.cvttps2dq" || 238 Name.startswith("x86.avx.cvtt.") || 239 Name.startswith("x86.avx.vinsertf128.") || 240 Name == "x86.avx2.vinserti128" || 241 Name.startswith("x86.avx.vextractf128.") || 242 Name == "x86.avx2.vextracti128" || 243 Name.startswith("x86.sse4a.movnt.") || 244 Name.startswith("x86.avx.movnt.") || 245 Name == "x86.sse2.storel.dq" || 246 Name.startswith("x86.sse.storeu.") || 247 Name.startswith("x86.sse2.storeu.") || 248 Name.startswith("x86.avx.storeu.") || 249 Name.startswith("x86.avx512.mask.storeu.p") || 250 Name.startswith("x86.avx512.mask.storeu.b.") || 251 Name.startswith("x86.avx512.mask.storeu.w.") || 252 Name.startswith("x86.avx512.mask.storeu.d.") || 253 Name.startswith("x86.avx512.mask.storeu.q.") || 254 Name.startswith("x86.avx512.mask.store.p") || 255 Name.startswith("x86.avx512.mask.store.b.") || 256 Name.startswith("x86.avx512.mask.store.w.") || 257 Name.startswith("x86.avx512.mask.store.d.") || 258 Name.startswith("x86.avx512.mask.store.q.") || 259 Name.startswith("x86.avx512.mask.loadu.p") || 260 Name.startswith("x86.avx512.mask.loadu.b.") || 261 Name.startswith("x86.avx512.mask.loadu.w.") || 262 Name.startswith("x86.avx512.mask.loadu.d.") || 263 Name.startswith("x86.avx512.mask.loadu.q.") || 264 Name.startswith("x86.avx512.mask.load.p") || 265 Name.startswith("x86.avx512.mask.load.b.") || 266 Name.startswith("x86.avx512.mask.load.w.") || 267 Name.startswith("x86.avx512.mask.load.d.") || 268 Name.startswith("x86.avx512.mask.load.q.") || 269 Name == "x86.sse42.crc32.64.8" || 270 Name.startswith("x86.avx.vbroadcast.s") || 271 Name.startswith("x86.avx512.mask.palignr.") || 272 Name.startswith("x86.sse2.psll.dq") || 273 Name.startswith("x86.sse2.psrl.dq") || 274 Name.startswith("x86.avx2.psll.dq") || 275 Name.startswith("x86.avx2.psrl.dq") || 276 Name.startswith("x86.avx512.psll.dq") || 277 Name.startswith("x86.avx512.psrl.dq") || 278 Name == "x86.sse41.pblendw" || 279 Name.startswith("x86.sse41.blendp") || 280 Name.startswith("x86.avx.blend.p") || 281 Name == "x86.avx2.pblendw" || 282 Name.startswith("x86.avx2.pblendd.") || 283 Name == "x86.avx2.vbroadcasti128" || 284 Name == "x86.xop.vpcmov" || 285 (Name.startswith("x86.xop.vpcom") && F->arg_size() == 2)) { 286 NewFn = nullptr; 287 return true; 288 } 289 // SSE4.1 ptest functions may have an old signature. 290 if (Name.startswith("x86.sse41.ptest")) { 291 if (Name == "x86.sse41.ptestc") 292 return UpgradeSSE41Function(F, Intrinsic::x86_sse41_ptestc, NewFn); 293 if (Name == "x86.sse41.ptestz") 294 return UpgradeSSE41Function(F, Intrinsic::x86_sse41_ptestz, NewFn); 295 if (Name == "x86.sse41.ptestnzc") 296 return UpgradeSSE41Function(F, Intrinsic::x86_sse41_ptestnzc, NewFn); 297 } 298 // Several blend and other instructions with masks used the wrong number of 299 // bits. 300 if (Name == "x86.sse41.insertps") 301 return UpgradeX86IntrinsicsWith8BitMask(F, Intrinsic::x86_sse41_insertps, 302 NewFn); 303 if (Name == "x86.sse41.dppd") 304 return UpgradeX86IntrinsicsWith8BitMask(F, Intrinsic::x86_sse41_dppd, 305 NewFn); 306 if (Name == "x86.sse41.dpps") 307 return UpgradeX86IntrinsicsWith8BitMask(F, Intrinsic::x86_sse41_dpps, 308 NewFn); 309 if (Name == "x86.sse41.mpsadbw") 310 return UpgradeX86IntrinsicsWith8BitMask(F, Intrinsic::x86_sse41_mpsadbw, 311 NewFn); 312 if (Name == "x86.avx.dp.ps.256") 313 return UpgradeX86IntrinsicsWith8BitMask(F, Intrinsic::x86_avx_dp_ps_256, 314 NewFn); 315 if (Name == "x86.avx2.mpsadbw") 316 return UpgradeX86IntrinsicsWith8BitMask(F, Intrinsic::x86_avx2_mpsadbw, 317 NewFn); 318 319 // frcz.ss/sd may need to have an argument dropped 320 if (Name.startswith("x86.xop.vfrcz.ss") && F->arg_size() == 2) { 321 F->setName(Name + ".old"); 322 NewFn = Intrinsic::getDeclaration(F->getParent(), 323 Intrinsic::x86_xop_vfrcz_ss); 324 return true; 325 } 326 if (Name.startswith("x86.xop.vfrcz.sd") && F->arg_size() == 2) { 327 F->setName(Name + ".old"); 328 NewFn = Intrinsic::getDeclaration(F->getParent(), 329 Intrinsic::x86_xop_vfrcz_sd); 330 return true; 331 } 332 // Fix the FMA4 intrinsics to remove the 4 333 if (Name.startswith("x86.fma4.")) { 334 F->setName("llvm.x86.fma" + Name.substr(8)); 335 NewFn = F; 336 return true; 337 } 338 // Upgrade any XOP PERMIL2 index operand still using a float/double vector. 339 if (Name.startswith("x86.xop.vpermil2")) { 340 auto Params = F->getFunctionType()->params(); 341 auto Idx = Params[2]; 342 if (Idx->getScalarType()->isFloatingPointTy()) { 343 F->setName(Name + ".old"); 344 unsigned IdxSize = Idx->getPrimitiveSizeInBits(); 345 unsigned EltSize = Idx->getScalarSizeInBits(); 346 Intrinsic::ID Permil2ID; 347 if (EltSize == 64 && IdxSize == 128) 348 Permil2ID = Intrinsic::x86_xop_vpermil2pd; 349 else if (EltSize == 32 && IdxSize == 128) 350 Permil2ID = Intrinsic::x86_xop_vpermil2ps; 351 else if (EltSize == 64 && IdxSize == 256) 352 Permil2ID = Intrinsic::x86_xop_vpermil2pd_256; 353 else 354 Permil2ID = Intrinsic::x86_xop_vpermil2ps_256; 355 NewFn = Intrinsic::getDeclaration(F->getParent(), Permil2ID); 356 return true; 357 } 358 } 359 break; 360 } 361 } 362 363 // This may not belong here. This function is effectively being overloaded 364 // to both detect an intrinsic which needs upgrading, and to provide the 365 // upgraded form of the intrinsic. We should perhaps have two separate 366 // functions for this. 367 return false; 368 } 369 370 bool llvm::UpgradeIntrinsicFunction(Function *F, Function *&NewFn) { 371 NewFn = nullptr; 372 bool Upgraded = UpgradeIntrinsicFunction1(F, NewFn); 373 assert(F != NewFn && "Intrinsic function upgraded to the same function"); 374 375 // Upgrade intrinsic attributes. This does not change the function. 376 if (NewFn) 377 F = NewFn; 378 if (Intrinsic::ID id = F->getIntrinsicID()) 379 F->setAttributes(Intrinsic::getAttributes(F->getContext(), id)); 380 return Upgraded; 381 } 382 383 bool llvm::UpgradeGlobalVariable(GlobalVariable *GV) { 384 // Nothing to do yet. 385 return false; 386 } 387 388 // Handles upgrading SSE2/AVX2/AVX512BW PSLLDQ intrinsics by converting them 389 // to byte shuffles. 390 static Value *UpgradeX86PSLLDQIntrinsics(IRBuilder<> &Builder, LLVMContext &C, 391 Value *Op, unsigned Shift) { 392 Type *ResultTy = Op->getType(); 393 unsigned NumElts = ResultTy->getVectorNumElements() * 8; 394 395 // Bitcast from a 64-bit element type to a byte element type. 396 Type *VecTy = VectorType::get(Type::getInt8Ty(C), NumElts); 397 Op = Builder.CreateBitCast(Op, VecTy, "cast"); 398 399 // We'll be shuffling in zeroes. 400 Value *Res = Constant::getNullValue(VecTy); 401 402 // If shift is less than 16, emit a shuffle to move the bytes. Otherwise, 403 // we'll just return the zero vector. 404 if (Shift < 16) { 405 uint32_t Idxs[64]; 406 // 256/512-bit version is split into 2/4 16-byte lanes. 407 for (unsigned l = 0; l != NumElts; l += 16) 408 for (unsigned i = 0; i != 16; ++i) { 409 unsigned Idx = NumElts + i - Shift; 410 if (Idx < NumElts) 411 Idx -= NumElts - 16; // end of lane, switch operand. 412 Idxs[l + i] = Idx + l; 413 } 414 415 Res = Builder.CreateShuffleVector(Res, Op, makeArrayRef(Idxs, NumElts)); 416 } 417 418 // Bitcast back to a 64-bit element type. 419 return Builder.CreateBitCast(Res, ResultTy, "cast"); 420 } 421 422 // Handles upgrading SSE2/AVX2/AVX512BW PSRLDQ intrinsics by converting them 423 // to byte shuffles. 424 static Value *UpgradeX86PSRLDQIntrinsics(IRBuilder<> &Builder, LLVMContext &C, 425 Value *Op, 426 unsigned Shift) { 427 Type *ResultTy = Op->getType(); 428 unsigned NumElts = ResultTy->getVectorNumElements() * 8; 429 430 // Bitcast from a 64-bit element type to a byte element type. 431 Type *VecTy = VectorType::get(Type::getInt8Ty(C), NumElts); 432 Op = Builder.CreateBitCast(Op, VecTy, "cast"); 433 434 // We'll be shuffling in zeroes. 435 Value *Res = Constant::getNullValue(VecTy); 436 437 // If shift is less than 16, emit a shuffle to move the bytes. Otherwise, 438 // we'll just return the zero vector. 439 if (Shift < 16) { 440 uint32_t Idxs[64]; 441 // 256/512-bit version is split into 2/4 16-byte lanes. 442 for (unsigned l = 0; l != NumElts; l += 16) 443 for (unsigned i = 0; i != 16; ++i) { 444 unsigned Idx = i + Shift; 445 if (Idx >= 16) 446 Idx += NumElts - 16; // end of lane, switch operand. 447 Idxs[l + i] = Idx + l; 448 } 449 450 Res = Builder.CreateShuffleVector(Op, Res, makeArrayRef(Idxs, NumElts)); 451 } 452 453 // Bitcast back to a 64-bit element type. 454 return Builder.CreateBitCast(Res, ResultTy, "cast"); 455 } 456 457 static Value *getX86MaskVec(IRBuilder<> &Builder, Value *Mask, 458 unsigned NumElts) { 459 llvm::VectorType *MaskTy = llvm::VectorType::get(Builder.getInt1Ty(), 460 cast<IntegerType>(Mask->getType())->getBitWidth()); 461 Mask = Builder.CreateBitCast(Mask, MaskTy); 462 463 // If we have less than 8 elements, then the starting mask was an i8 and 464 // we need to extract down to the right number of elements. 465 if (NumElts < 8) { 466 uint32_t Indices[4]; 467 for (unsigned i = 0; i != NumElts; ++i) 468 Indices[i] = i; 469 Mask = Builder.CreateShuffleVector(Mask, Mask, 470 makeArrayRef(Indices, NumElts), 471 "extract"); 472 } 473 474 return Mask; 475 } 476 477 static Value *EmitX86Select(IRBuilder<> &Builder, Value *Mask, 478 Value *Op0, Value *Op1) { 479 // If the mask is all ones just emit the align operation. 480 if (const auto *C = dyn_cast<Constant>(Mask)) 481 if (C->isAllOnesValue()) 482 return Op0; 483 484 Mask = getX86MaskVec(Builder, Mask, Op0->getType()->getVectorNumElements()); 485 return Builder.CreateSelect(Mask, Op0, Op1); 486 } 487 488 static Value *UpgradeX86PALIGNRIntrinsics(IRBuilder<> &Builder, LLVMContext &C, 489 Value *Op0, Value *Op1, Value *Shift, 490 Value *Passthru, Value *Mask) { 491 unsigned ShiftVal = cast<llvm::ConstantInt>(Shift)->getZExtValue(); 492 493 unsigned NumElts = Op0->getType()->getVectorNumElements(); 494 assert(NumElts % 16 == 0); 495 496 // If palignr is shifting the pair of vectors more than the size of two 497 // lanes, emit zero. 498 if (ShiftVal >= 32) 499 return llvm::Constant::getNullValue(Op0->getType()); 500 501 // If palignr is shifting the pair of input vectors more than one lane, 502 // but less than two lanes, convert to shifting in zeroes. 503 if (ShiftVal > 16) { 504 ShiftVal -= 16; 505 Op1 = Op0; 506 Op0 = llvm::Constant::getNullValue(Op0->getType()); 507 } 508 509 uint32_t Indices[64]; 510 // 256-bit palignr operates on 128-bit lanes so we need to handle that 511 for (unsigned l = 0; l != NumElts; l += 16) { 512 for (unsigned i = 0; i != 16; ++i) { 513 unsigned Idx = ShiftVal + i; 514 if (Idx >= 16) 515 Idx += NumElts - 16; // End of lane, switch operand. 516 Indices[l + i] = Idx + l; 517 } 518 } 519 520 Value *Align = Builder.CreateShuffleVector(Op1, Op0, 521 makeArrayRef(Indices, NumElts), 522 "palignr"); 523 524 return EmitX86Select(Builder, Mask, Align, Passthru); 525 } 526 527 static Value *UpgradeMaskedStore(IRBuilder<> &Builder, LLVMContext &C, 528 Value *Ptr, Value *Data, Value *Mask, 529 bool Aligned) { 530 // Cast the pointer to the right type. 531 Ptr = Builder.CreateBitCast(Ptr, 532 llvm::PointerType::getUnqual(Data->getType())); 533 unsigned Align = 534 Aligned ? cast<VectorType>(Data->getType())->getBitWidth() / 8 : 1; 535 536 // If the mask is all ones just emit a regular store. 537 if (const auto *C = dyn_cast<Constant>(Mask)) 538 if (C->isAllOnesValue()) 539 return Builder.CreateAlignedStore(Data, Ptr, Align); 540 541 // Convert the mask from an integer type to a vector of i1. 542 unsigned NumElts = Data->getType()->getVectorNumElements(); 543 Mask = getX86MaskVec(Builder, Mask, NumElts); 544 return Builder.CreateMaskedStore(Data, Ptr, Align, Mask); 545 } 546 547 static Value *UpgradeMaskedLoad(IRBuilder<> &Builder, LLVMContext &C, 548 Value *Ptr, Value *Passthru, Value *Mask, 549 bool Aligned) { 550 // Cast the pointer to the right type. 551 Ptr = Builder.CreateBitCast(Ptr, 552 llvm::PointerType::getUnqual(Passthru->getType())); 553 unsigned Align = 554 Aligned ? cast<VectorType>(Passthru->getType())->getBitWidth() / 8 : 1; 555 556 // If the mask is all ones just emit a regular store. 557 if (const auto *C = dyn_cast<Constant>(Mask)) 558 if (C->isAllOnesValue()) 559 return Builder.CreateAlignedLoad(Ptr, Align); 560 561 // Convert the mask from an integer type to a vector of i1. 562 unsigned NumElts = Passthru->getType()->getVectorNumElements(); 563 Mask = getX86MaskVec(Builder, Mask, NumElts); 564 return Builder.CreateMaskedLoad(Ptr, Align, Mask, Passthru); 565 } 566 567 static Value *upgradeIntMinMax(IRBuilder<> &Builder, CallInst &CI, 568 ICmpInst::Predicate Pred) { 569 Value *Op0 = CI.getArgOperand(0); 570 Value *Op1 = CI.getArgOperand(1); 571 Value *Cmp = Builder.CreateICmp(Pred, Op0, Op1); 572 return Builder.CreateSelect(Cmp, Op0, Op1); 573 } 574 575 static Value *upgradeMaskedCompare(IRBuilder<> &Builder, CallInst &CI, 576 ICmpInst::Predicate Pred) { 577 Value *Op0 = CI.getArgOperand(0); 578 unsigned NumElts = Op0->getType()->getVectorNumElements(); 579 Value *Cmp = Builder.CreateICmp(Pred, Op0, CI.getArgOperand(1)); 580 581 Value *Mask = CI.getArgOperand(2); 582 const auto *C = dyn_cast<Constant>(Mask); 583 if (!C || !C->isAllOnesValue()) 584 Cmp = Builder.CreateAnd(Cmp, getX86MaskVec(Builder, Mask, NumElts)); 585 586 if (NumElts < 8) { 587 uint32_t Indices[8]; 588 for (unsigned i = 0; i != NumElts; ++i) 589 Indices[i] = i; 590 for (unsigned i = NumElts; i != 8; ++i) 591 Indices[i] = NumElts; 592 Cmp = Builder.CreateShuffleVector(Cmp, UndefValue::get(Cmp->getType()), 593 Indices); 594 } 595 return Builder.CreateBitCast(Cmp, IntegerType::get(CI.getContext(), 596 std::max(NumElts, 8U))); 597 } 598 599 /// Upgrade a call to an old intrinsic. All argument and return casting must be 600 /// provided to seamlessly integrate with existing context. 601 void llvm::UpgradeIntrinsicCall(CallInst *CI, Function *NewFn) { 602 Function *F = CI->getCalledFunction(); 603 LLVMContext &C = CI->getContext(); 604 IRBuilder<> Builder(C); 605 Builder.SetInsertPoint(CI->getParent(), CI->getIterator()); 606 607 assert(F && "Intrinsic call is not direct?"); 608 609 if (!NewFn) { 610 // Get the Function's name. 611 StringRef Name = F->getName(); 612 613 Value *Rep; 614 // Upgrade packed integer vector compare intrinsics to compare instructions. 615 if (Name.startswith("llvm.x86.sse2.pcmpeq.") || 616 Name.startswith("llvm.x86.avx2.pcmpeq.")) { 617 Rep = Builder.CreateICmpEQ(CI->getArgOperand(0), CI->getArgOperand(1), 618 "pcmpeq"); 619 Rep = Builder.CreateSExt(Rep, CI->getType(), ""); 620 } else if (Name.startswith("llvm.x86.sse2.pcmpgt.") || 621 Name.startswith("llvm.x86.avx2.pcmpgt.")) { 622 Rep = Builder.CreateICmpSGT(CI->getArgOperand(0), CI->getArgOperand(1), 623 "pcmpgt"); 624 Rep = Builder.CreateSExt(Rep, CI->getType(), ""); 625 } else if (Name.startswith("llvm.x86.avx512.mask.pcmpeq.")) { 626 Rep = upgradeMaskedCompare(Builder, *CI, ICmpInst::ICMP_EQ); 627 } else if (Name.startswith("llvm.x86.avx512.mask.pcmpgt.")) { 628 Rep = upgradeMaskedCompare(Builder, *CI, ICmpInst::ICMP_SGT); 629 } else if (Name == "llvm.x86.sse41.pmaxsb" || 630 Name == "llvm.x86.sse2.pmaxs.w" || 631 Name == "llvm.x86.sse41.pmaxsd" || 632 Name.startswith("llvm.x86.avx2.pmaxs")) { 633 Rep = upgradeIntMinMax(Builder, *CI, ICmpInst::ICMP_SGT); 634 } else if (Name == "llvm.x86.sse2.pmaxu.b" || 635 Name == "llvm.x86.sse41.pmaxuw" || 636 Name == "llvm.x86.sse41.pmaxud" || 637 Name.startswith("llvm.x86.avx2.pmaxu")) { 638 Rep = upgradeIntMinMax(Builder, *CI, ICmpInst::ICMP_UGT); 639 } else if (Name == "llvm.x86.sse41.pminsb" || 640 Name == "llvm.x86.sse2.pmins.w" || 641 Name == "llvm.x86.sse41.pminsd" || 642 Name.startswith("llvm.x86.avx2.pmins")) { 643 Rep = upgradeIntMinMax(Builder, *CI, ICmpInst::ICMP_SLT); 644 } else if (Name == "llvm.x86.sse2.pminu.b" || 645 Name == "llvm.x86.sse41.pminuw" || 646 Name == "llvm.x86.sse41.pminud" || 647 Name.startswith("llvm.x86.avx2.pminu")) { 648 Rep = upgradeIntMinMax(Builder, *CI, ICmpInst::ICMP_ULT); 649 } else if (Name == "llvm.x86.sse2.cvtdq2pd" || 650 Name == "llvm.x86.sse2.cvtps2pd" || 651 Name == "llvm.x86.avx.cvtdq2.pd.256" || 652 Name == "llvm.x86.avx.cvt.ps2.pd.256") { 653 // Lossless i32/float to double conversion. 654 // Extract the bottom elements if necessary and convert to double vector. 655 Value *Src = CI->getArgOperand(0); 656 VectorType *SrcTy = cast<VectorType>(Src->getType()); 657 VectorType *DstTy = cast<VectorType>(CI->getType()); 658 Rep = CI->getArgOperand(0); 659 660 unsigned NumDstElts = DstTy->getNumElements(); 661 if (NumDstElts < SrcTy->getNumElements()) { 662 assert(NumDstElts == 2 && "Unexpected vector size"); 663 uint32_t ShuffleMask[2] = { 0, 1 }; 664 Rep = Builder.CreateShuffleVector(Rep, UndefValue::get(SrcTy), 665 ShuffleMask); 666 } 667 668 bool Int2Double = (StringRef::npos != Name.find("cvtdq2")); 669 if (Int2Double) 670 Rep = Builder.CreateSIToFP(Rep, DstTy, "cvtdq2pd"); 671 else 672 Rep = Builder.CreateFPExt(Rep, DstTy, "cvtps2pd"); 673 } else if (Name == "llvm.x86.sse2.cvttps2dq" || 674 Name.startswith("llvm.x86.avx.cvtt.")) { 675 // Truncation (round to zero) float/double to i32 vector conversion. 676 Value *Src = CI->getArgOperand(0); 677 VectorType *DstTy = cast<VectorType>(CI->getType()); 678 Rep = Builder.CreateFPToSI(Src, DstTy, "cvtt"); 679 } else if (Name.startswith("llvm.x86.sse4a.movnt.")) { 680 Module *M = F->getParent(); 681 SmallVector<Metadata *, 1> Elts; 682 Elts.push_back( 683 ConstantAsMetadata::get(ConstantInt::get(Type::getInt32Ty(C), 1))); 684 MDNode *Node = MDNode::get(C, Elts); 685 686 Value *Arg0 = CI->getArgOperand(0); 687 Value *Arg1 = CI->getArgOperand(1); 688 689 // Nontemporal (unaligned) store of the 0'th element of the float/double 690 // vector. 691 Type *SrcEltTy = cast<VectorType>(Arg1->getType())->getElementType(); 692 PointerType *EltPtrTy = PointerType::getUnqual(SrcEltTy); 693 Value *Addr = Builder.CreateBitCast(Arg0, EltPtrTy, "cast"); 694 Value *Extract = 695 Builder.CreateExtractElement(Arg1, (uint64_t)0, "extractelement"); 696 697 StoreInst *SI = Builder.CreateAlignedStore(Extract, Addr, 1); 698 SI->setMetadata(M->getMDKindID("nontemporal"), Node); 699 700 // Remove intrinsic. 701 CI->eraseFromParent(); 702 return; 703 } else if (Name.startswith("llvm.x86.avx.movnt.")) { 704 Module *M = F->getParent(); 705 SmallVector<Metadata *, 1> Elts; 706 Elts.push_back( 707 ConstantAsMetadata::get(ConstantInt::get(Type::getInt32Ty(C), 1))); 708 MDNode *Node = MDNode::get(C, Elts); 709 710 Value *Arg0 = CI->getArgOperand(0); 711 Value *Arg1 = CI->getArgOperand(1); 712 713 // Convert the type of the pointer to a pointer to the stored type. 714 Value *BC = Builder.CreateBitCast(Arg0, 715 PointerType::getUnqual(Arg1->getType()), 716 "cast"); 717 StoreInst *SI = Builder.CreateAlignedStore(Arg1, BC, 32); 718 SI->setMetadata(M->getMDKindID("nontemporal"), Node); 719 720 // Remove intrinsic. 721 CI->eraseFromParent(); 722 return; 723 } else if (Name == "llvm.x86.sse2.storel.dq") { 724 Value *Arg0 = CI->getArgOperand(0); 725 Value *Arg1 = CI->getArgOperand(1); 726 727 Type *NewVecTy = VectorType::get(Type::getInt64Ty(C), 2); 728 Value *BC0 = Builder.CreateBitCast(Arg1, NewVecTy, "cast"); 729 Value *Elt = Builder.CreateExtractElement(BC0, (uint64_t)0); 730 Value *BC = Builder.CreateBitCast(Arg0, 731 PointerType::getUnqual(Elt->getType()), 732 "cast"); 733 Builder.CreateAlignedStore(Elt, BC, 1); 734 735 // Remove intrinsic. 736 CI->eraseFromParent(); 737 return; 738 } else if (Name.startswith("llvm.x86.sse.storeu.") || 739 Name.startswith("llvm.x86.sse2.storeu.") || 740 Name.startswith("llvm.x86.avx.storeu.")) { 741 Value *Arg0 = CI->getArgOperand(0); 742 Value *Arg1 = CI->getArgOperand(1); 743 744 Arg0 = Builder.CreateBitCast(Arg0, 745 PointerType::getUnqual(Arg1->getType()), 746 "cast"); 747 Builder.CreateAlignedStore(Arg1, Arg0, 1); 748 749 // Remove intrinsic. 750 CI->eraseFromParent(); 751 return; 752 } else if (Name.startswith("llvm.x86.avx512.mask.storeu.p") || 753 Name.startswith("llvm.x86.avx512.mask.storeu.b.") || 754 Name.startswith("llvm.x86.avx512.mask.storeu.w.") || 755 Name.startswith("llvm.x86.avx512.mask.storeu.d.") || 756 Name.startswith("llvm.x86.avx512.mask.storeu.q.")) { 757 UpgradeMaskedStore(Builder, C, CI->getArgOperand(0), CI->getArgOperand(1), 758 CI->getArgOperand(2), /*Aligned*/false); 759 760 // Remove intrinsic. 761 CI->eraseFromParent(); 762 return; 763 } else if (Name.startswith("llvm.x86.avx512.mask.store.p") || 764 Name.startswith("llvm.x86.avx512.mask.store.b.") || 765 Name.startswith("llvm.x86.avx512.mask.store.w.") || 766 Name.startswith("llvm.x86.avx512.mask.store.d.") || 767 Name.startswith("llvm.x86.avx512.mask.store.q.")) { 768 UpgradeMaskedStore(Builder, C, CI->getArgOperand(0), CI->getArgOperand(1), 769 CI->getArgOperand(2), /*Aligned*/true); 770 771 // Remove intrinsic. 772 CI->eraseFromParent(); 773 return; 774 } else if (Name.startswith("llvm.x86.avx512.mask.loadu.p") || 775 Name.startswith("llvm.x86.avx512.mask.loadu.b.") || 776 Name.startswith("llvm.x86.avx512.mask.loadu.w.") || 777 Name.startswith("llvm.x86.avx512.mask.loadu.d.") || 778 Name.startswith("llvm.x86.avx512.mask.loadu.q.")) { 779 Rep = UpgradeMaskedLoad(Builder, C, CI->getArgOperand(0), 780 CI->getArgOperand(1), CI->getArgOperand(2), 781 /*Aligned*/false); 782 } else if (Name.startswith("llvm.x86.avx512.mask.load.p") || 783 Name.startswith("llvm.x86.avx512.mask.load.b.") || 784 Name.startswith("llvm.x86.avx512.mask.load.w.") || 785 Name.startswith("llvm.x86.avx512.mask.load.d.") || 786 Name.startswith("llvm.x86.avx512.mask.load.q.")) { 787 Rep = UpgradeMaskedLoad(Builder, C, CI->getArgOperand(0), 788 CI->getArgOperand(1),CI->getArgOperand(2), 789 /*Aligned*/true); 790 } else if (Name.startswith("llvm.x86.xop.vpcom")) { 791 Intrinsic::ID intID; 792 if (Name.endswith("ub")) 793 intID = Intrinsic::x86_xop_vpcomub; 794 else if (Name.endswith("uw")) 795 intID = Intrinsic::x86_xop_vpcomuw; 796 else if (Name.endswith("ud")) 797 intID = Intrinsic::x86_xop_vpcomud; 798 else if (Name.endswith("uq")) 799 intID = Intrinsic::x86_xop_vpcomuq; 800 else if (Name.endswith("b")) 801 intID = Intrinsic::x86_xop_vpcomb; 802 else if (Name.endswith("w")) 803 intID = Intrinsic::x86_xop_vpcomw; 804 else if (Name.endswith("d")) 805 intID = Intrinsic::x86_xop_vpcomd; 806 else if (Name.endswith("q")) 807 intID = Intrinsic::x86_xop_vpcomq; 808 else 809 llvm_unreachable("Unknown suffix"); 810 811 Name = Name.substr(18); // strip off "llvm.x86.xop.vpcom" 812 unsigned Imm; 813 if (Name.startswith("lt")) 814 Imm = 0; 815 else if (Name.startswith("le")) 816 Imm = 1; 817 else if (Name.startswith("gt")) 818 Imm = 2; 819 else if (Name.startswith("ge")) 820 Imm = 3; 821 else if (Name.startswith("eq")) 822 Imm = 4; 823 else if (Name.startswith("ne")) 824 Imm = 5; 825 else if (Name.startswith("false")) 826 Imm = 6; 827 else if (Name.startswith("true")) 828 Imm = 7; 829 else 830 llvm_unreachable("Unknown condition"); 831 832 Function *VPCOM = Intrinsic::getDeclaration(F->getParent(), intID); 833 Rep = 834 Builder.CreateCall(VPCOM, {CI->getArgOperand(0), CI->getArgOperand(1), 835 Builder.getInt8(Imm)}); 836 } else if (Name == "llvm.x86.xop.vpcmov") { 837 Value *Arg0 = CI->getArgOperand(0); 838 Value *Arg1 = CI->getArgOperand(1); 839 Value *Sel = CI->getArgOperand(2); 840 unsigned NumElts = CI->getType()->getVectorNumElements(); 841 Constant *MinusOne = ConstantVector::getSplat(NumElts, Builder.getInt64(-1)); 842 Value *NotSel = Builder.CreateXor(Sel, MinusOne); 843 Value *Sel0 = Builder.CreateAnd(Arg0, Sel); 844 Value *Sel1 = Builder.CreateAnd(Arg1, NotSel); 845 Rep = Builder.CreateOr(Sel0, Sel1); 846 } else if (Name == "llvm.x86.sse42.crc32.64.8") { 847 Function *CRC32 = Intrinsic::getDeclaration(F->getParent(), 848 Intrinsic::x86_sse42_crc32_32_8); 849 Value *Trunc0 = Builder.CreateTrunc(CI->getArgOperand(0), Type::getInt32Ty(C)); 850 Rep = Builder.CreateCall(CRC32, {Trunc0, CI->getArgOperand(1)}); 851 Rep = Builder.CreateZExt(Rep, CI->getType(), ""); 852 } else if (Name.startswith("llvm.x86.avx.vbroadcast")) { 853 // Replace broadcasts with a series of insertelements. 854 Type *VecTy = CI->getType(); 855 Type *EltTy = VecTy->getVectorElementType(); 856 unsigned EltNum = VecTy->getVectorNumElements(); 857 Value *Cast = Builder.CreateBitCast(CI->getArgOperand(0), 858 EltTy->getPointerTo()); 859 Value *Load = Builder.CreateLoad(EltTy, Cast); 860 Type *I32Ty = Type::getInt32Ty(C); 861 Rep = UndefValue::get(VecTy); 862 for (unsigned I = 0; I < EltNum; ++I) 863 Rep = Builder.CreateInsertElement(Rep, Load, 864 ConstantInt::get(I32Ty, I)); 865 } else if (Name.startswith("llvm.x86.sse41.pmovsx") || 866 Name.startswith("llvm.x86.sse41.pmovzx") || 867 Name.startswith("llvm.x86.avx2.pmovsx") || 868 Name.startswith("llvm.x86.avx2.pmovzx")) { 869 VectorType *SrcTy = cast<VectorType>(CI->getArgOperand(0)->getType()); 870 VectorType *DstTy = cast<VectorType>(CI->getType()); 871 unsigned NumDstElts = DstTy->getNumElements(); 872 873 // Extract a subvector of the first NumDstElts lanes and sign/zero extend. 874 SmallVector<uint32_t, 8> ShuffleMask(NumDstElts); 875 for (unsigned i = 0; i != NumDstElts; ++i) 876 ShuffleMask[i] = i; 877 878 Value *SV = Builder.CreateShuffleVector( 879 CI->getArgOperand(0), UndefValue::get(SrcTy), ShuffleMask); 880 881 bool DoSext = (StringRef::npos != Name.find("pmovsx")); 882 Rep = DoSext ? Builder.CreateSExt(SV, DstTy) 883 : Builder.CreateZExt(SV, DstTy); 884 } else if (Name == "llvm.x86.avx2.vbroadcasti128") { 885 // Replace vbroadcasts with a vector shuffle. 886 Type *VT = VectorType::get(Type::getInt64Ty(C), 2); 887 Value *Op = Builder.CreatePointerCast(CI->getArgOperand(0), 888 PointerType::getUnqual(VT)); 889 Value *Load = Builder.CreateLoad(VT, Op); 890 uint32_t Idxs[4] = { 0, 1, 0, 1 }; 891 Rep = Builder.CreateShuffleVector(Load, UndefValue::get(Load->getType()), 892 Idxs); 893 } else if (Name.startswith("llvm.x86.avx2.pbroadcast") || 894 Name.startswith("llvm.x86.avx2.vbroadcast")) { 895 // Replace vp?broadcasts with a vector shuffle. 896 Value *Op = CI->getArgOperand(0); 897 unsigned NumElts = CI->getType()->getVectorNumElements(); 898 Type *MaskTy = VectorType::get(Type::getInt32Ty(C), NumElts); 899 Rep = Builder.CreateShuffleVector(Op, UndefValue::get(Op->getType()), 900 Constant::getNullValue(MaskTy)); 901 } else if (Name.startswith("llvm.x86.avx512.mask.palignr.")) { 902 Rep = UpgradeX86PALIGNRIntrinsics(Builder, C, CI->getArgOperand(0), 903 CI->getArgOperand(1), 904 CI->getArgOperand(2), 905 CI->getArgOperand(3), 906 CI->getArgOperand(4)); 907 } else if (Name == "llvm.x86.sse2.psll.dq" || 908 Name == "llvm.x86.avx2.psll.dq") { 909 // 128/256-bit shift left specified in bits. 910 unsigned Shift = cast<ConstantInt>(CI->getArgOperand(1))->getZExtValue(); 911 Rep = UpgradeX86PSLLDQIntrinsics(Builder, C, CI->getArgOperand(0), 912 Shift / 8); // Shift is in bits. 913 } else if (Name == "llvm.x86.sse2.psrl.dq" || 914 Name == "llvm.x86.avx2.psrl.dq") { 915 // 128/256-bit shift right specified in bits. 916 unsigned Shift = cast<ConstantInt>(CI->getArgOperand(1))->getZExtValue(); 917 Rep = UpgradeX86PSRLDQIntrinsics(Builder, C, CI->getArgOperand(0), 918 Shift / 8); // Shift is in bits. 919 } else if (Name == "llvm.x86.sse2.psll.dq.bs" || 920 Name == "llvm.x86.avx2.psll.dq.bs" || 921 Name == "llvm.x86.avx512.psll.dq.512") { 922 // 128/256/512-bit shift left specified in bytes. 923 unsigned Shift = cast<ConstantInt>(CI->getArgOperand(1))->getZExtValue(); 924 Rep = UpgradeX86PSLLDQIntrinsics(Builder, C, CI->getArgOperand(0), Shift); 925 } else if (Name == "llvm.x86.sse2.psrl.dq.bs" || 926 Name == "llvm.x86.avx2.psrl.dq.bs" || 927 Name == "llvm.x86.avx512.psrl.dq.512") { 928 // 128/256/512-bit shift right specified in bytes. 929 unsigned Shift = cast<ConstantInt>(CI->getArgOperand(1))->getZExtValue(); 930 Rep = UpgradeX86PSRLDQIntrinsics(Builder, C, CI->getArgOperand(0), Shift); 931 } else if (Name == "llvm.x86.sse41.pblendw" || 932 Name.startswith("llvm.x86.sse41.blendp") || 933 Name.startswith("llvm.x86.avx.blend.p") || 934 Name == "llvm.x86.avx2.pblendw" || 935 Name.startswith("llvm.x86.avx2.pblendd.")) { 936 Value *Op0 = CI->getArgOperand(0); 937 Value *Op1 = CI->getArgOperand(1); 938 unsigned Imm = cast <ConstantInt>(CI->getArgOperand(2))->getZExtValue(); 939 VectorType *VecTy = cast<VectorType>(CI->getType()); 940 unsigned NumElts = VecTy->getNumElements(); 941 942 SmallVector<uint32_t, 16> Idxs(NumElts); 943 for (unsigned i = 0; i != NumElts; ++i) 944 Idxs[i] = ((Imm >> (i%8)) & 1) ? i + NumElts : i; 945 946 Rep = Builder.CreateShuffleVector(Op0, Op1, Idxs); 947 } else if (Name.startswith("llvm.x86.avx.vinsertf128.") || 948 Name == "llvm.x86.avx2.vinserti128") { 949 Value *Op0 = CI->getArgOperand(0); 950 Value *Op1 = CI->getArgOperand(1); 951 unsigned Imm = cast<ConstantInt>(CI->getArgOperand(2))->getZExtValue(); 952 VectorType *VecTy = cast<VectorType>(CI->getType()); 953 unsigned NumElts = VecTy->getNumElements(); 954 955 // Mask off the high bits of the immediate value; hardware ignores those. 956 Imm = Imm & 1; 957 958 // Extend the second operand into a vector that is twice as big. 959 Value *UndefV = UndefValue::get(Op1->getType()); 960 SmallVector<uint32_t, 8> Idxs(NumElts); 961 for (unsigned i = 0; i != NumElts; ++i) 962 Idxs[i] = i; 963 Rep = Builder.CreateShuffleVector(Op1, UndefV, Idxs); 964 965 // Insert the second operand into the first operand. 966 967 // Note that there is no guarantee that instruction lowering will actually 968 // produce a vinsertf128 instruction for the created shuffles. In 969 // particular, the 0 immediate case involves no lane changes, so it can 970 // be handled as a blend. 971 972 // Example of shuffle mask for 32-bit elements: 973 // Imm = 1 <i32 0, i32 1, i32 2, i32 3, i32 8, i32 9, i32 10, i32 11> 974 // Imm = 0 <i32 8, i32 9, i32 10, i32 11, i32 4, i32 5, i32 6, i32 7 > 975 976 // The low half of the result is either the low half of the 1st operand 977 // or the low half of the 2nd operand (the inserted vector). 978 for (unsigned i = 0; i != NumElts / 2; ++i) 979 Idxs[i] = Imm ? i : (i + NumElts); 980 // The high half of the result is either the low half of the 2nd operand 981 // (the inserted vector) or the high half of the 1st operand. 982 for (unsigned i = NumElts / 2; i != NumElts; ++i) 983 Idxs[i] = Imm ? (i + NumElts / 2) : i; 984 Rep = Builder.CreateShuffleVector(Op0, Rep, Idxs); 985 } else if (Name.startswith("llvm.x86.avx.vextractf128.") || 986 Name == "llvm.x86.avx2.vextracti128") { 987 Value *Op0 = CI->getArgOperand(0); 988 unsigned Imm = cast<ConstantInt>(CI->getArgOperand(1))->getZExtValue(); 989 VectorType *VecTy = cast<VectorType>(CI->getType()); 990 unsigned NumElts = VecTy->getNumElements(); 991 992 // Mask off the high bits of the immediate value; hardware ignores those. 993 Imm = Imm & 1; 994 995 // Get indexes for either the high half or low half of the input vector. 996 SmallVector<uint32_t, 4> Idxs(NumElts); 997 for (unsigned i = 0; i != NumElts; ++i) { 998 Idxs[i] = Imm ? (i + NumElts) : i; 999 } 1000 1001 Value *UndefV = UndefValue::get(Op0->getType()); 1002 Rep = Builder.CreateShuffleVector(Op0, UndefV, Idxs); 1003 } else if (Name == "llvm.stackprotectorcheck") { 1004 Rep = nullptr; 1005 } else if (Name.startswith("llvm.x86.avx.vpermil.") || 1006 Name == "llvm.x86.sse2.pshuf.d" || 1007 Name.startswith("llvm.x86.avx512.mask.pshuf.d.")) { 1008 Value *Op0 = CI->getArgOperand(0); 1009 unsigned Imm = cast<ConstantInt>(CI->getArgOperand(1))->getZExtValue(); 1010 VectorType *VecTy = cast<VectorType>(CI->getType()); 1011 unsigned NumElts = VecTy->getNumElements(); 1012 // Calcuate the size of each index in the immediate. 1013 unsigned IdxSize = 64 / VecTy->getScalarSizeInBits(); 1014 unsigned IdxMask = ((1 << IdxSize) - 1); 1015 1016 SmallVector<uint32_t, 8> Idxs(NumElts); 1017 // Lookup the bits for this element, wrapping around the immediate every 1018 // 8-bits. Elements are grouped into sets of 2 or 4 elements so we need 1019 // to offset by the first index of each group. 1020 for (unsigned i = 0; i != NumElts; ++i) 1021 Idxs[i] = ((Imm >> ((i * IdxSize) % 8)) & IdxMask) | (i & ~IdxMask); 1022 1023 Rep = Builder.CreateShuffleVector(Op0, Op0, Idxs); 1024 1025 if (CI->getNumArgOperands() == 4) 1026 Rep = EmitX86Select(Builder, CI->getArgOperand(3), Rep, 1027 CI->getArgOperand(2)); 1028 } else if (Name == "llvm.x86.sse2.pshufl.w" || 1029 Name.startswith("llvm.x86.avx512.mask.pshufl.w.")) { 1030 Value *Op0 = CI->getArgOperand(0); 1031 unsigned Imm = cast<ConstantInt>(CI->getArgOperand(1))->getZExtValue(); 1032 unsigned NumElts = CI->getType()->getVectorNumElements(); 1033 1034 SmallVector<uint32_t, 16> Idxs(NumElts); 1035 for (unsigned l = 0; l != NumElts; l += 8) { 1036 for (unsigned i = 0; i != 4; ++i) 1037 Idxs[i + l] = ((Imm >> (2 * i)) & 0x3) + l; 1038 for (unsigned i = 4; i != 8; ++i) 1039 Idxs[i + l] = i + l; 1040 } 1041 1042 Rep = Builder.CreateShuffleVector(Op0, Op0, Idxs); 1043 1044 if (CI->getNumArgOperands() == 4) 1045 Rep = EmitX86Select(Builder, CI->getArgOperand(3), Rep, 1046 CI->getArgOperand(2)); 1047 } else if (Name == "llvm.x86.sse2.pshufh.w" || 1048 Name.startswith("llvm.x86.avx512.mask.pshufh.w.")) { 1049 Value *Op0 = CI->getArgOperand(0); 1050 unsigned Imm = cast<ConstantInt>(CI->getArgOperand(1))->getZExtValue(); 1051 unsigned NumElts = CI->getType()->getVectorNumElements(); 1052 1053 SmallVector<uint32_t, 16> Idxs(NumElts); 1054 for (unsigned l = 0; l != NumElts; l += 8) { 1055 for (unsigned i = 0; i != 4; ++i) 1056 Idxs[i + l] = i + l; 1057 for (unsigned i = 0; i != 4; ++i) 1058 Idxs[i + l + 4] = ((Imm >> (2 * i)) & 0x3) + 4 + l; 1059 } 1060 1061 Rep = Builder.CreateShuffleVector(Op0, Op0, Idxs); 1062 1063 if (CI->getNumArgOperands() == 4) 1064 Rep = EmitX86Select(Builder, CI->getArgOperand(3), Rep, 1065 CI->getArgOperand(2)); 1066 } else if (Name.startswith("llvm.x86.avx512.mask.punpckl") || 1067 Name.startswith("llvm.x86.avx512.mask.unpckl.")) { 1068 Value *Op0 = CI->getArgOperand(0); 1069 Value *Op1 = CI->getArgOperand(1); 1070 int NumElts = CI->getType()->getVectorNumElements(); 1071 int NumLaneElts = 128/CI->getType()->getScalarSizeInBits(); 1072 1073 SmallVector<uint32_t, 64> Idxs(NumElts); 1074 for (int l = 0; l != NumElts; l += NumLaneElts) 1075 for (int i = 0; i != NumLaneElts; ++i) 1076 Idxs[i + l] = l + (i / 2) + NumElts * (i % 2); 1077 1078 Rep = Builder.CreateShuffleVector(Op0, Op1, Idxs); 1079 1080 Rep = EmitX86Select(Builder, CI->getArgOperand(3), Rep, 1081 CI->getArgOperand(2)); 1082 } else if (Name.startswith("llvm.x86.avx512.mask.punpckh") || 1083 Name.startswith("llvm.x86.avx512.mask.unpckh.")) { 1084 Value *Op0 = CI->getArgOperand(0); 1085 Value *Op1 = CI->getArgOperand(1); 1086 int NumElts = CI->getType()->getVectorNumElements(); 1087 int NumLaneElts = 128/CI->getType()->getScalarSizeInBits(); 1088 1089 SmallVector<uint32_t, 64> Idxs(NumElts); 1090 for (int l = 0; l != NumElts; l += NumLaneElts) 1091 for (int i = 0; i != NumLaneElts; ++i) 1092 Idxs[i + l] = (NumLaneElts / 2) + l + (i / 2) + NumElts * (i % 2); 1093 1094 Rep = Builder.CreateShuffleVector(Op0, Op1, Idxs); 1095 1096 Rep = EmitX86Select(Builder, CI->getArgOperand(3), Rep, 1097 CI->getArgOperand(2)); 1098 } else { 1099 llvm_unreachable("Unknown function for CallInst upgrade."); 1100 } 1101 1102 if (Rep) 1103 CI->replaceAllUsesWith(Rep); 1104 CI->eraseFromParent(); 1105 return; 1106 } 1107 1108 std::string Name = CI->getName(); 1109 if (!Name.empty()) 1110 CI->setName(Name + ".old"); 1111 1112 switch (NewFn->getIntrinsicID()) { 1113 default: 1114 llvm_unreachable("Unknown function for CallInst upgrade."); 1115 1116 case Intrinsic::arm_neon_vld1: 1117 case Intrinsic::arm_neon_vld2: 1118 case Intrinsic::arm_neon_vld3: 1119 case Intrinsic::arm_neon_vld4: 1120 case Intrinsic::arm_neon_vld2lane: 1121 case Intrinsic::arm_neon_vld3lane: 1122 case Intrinsic::arm_neon_vld4lane: 1123 case Intrinsic::arm_neon_vst1: 1124 case Intrinsic::arm_neon_vst2: 1125 case Intrinsic::arm_neon_vst3: 1126 case Intrinsic::arm_neon_vst4: 1127 case Intrinsic::arm_neon_vst2lane: 1128 case Intrinsic::arm_neon_vst3lane: 1129 case Intrinsic::arm_neon_vst4lane: { 1130 SmallVector<Value *, 4> Args(CI->arg_operands().begin(), 1131 CI->arg_operands().end()); 1132 CI->replaceAllUsesWith(Builder.CreateCall(NewFn, Args)); 1133 CI->eraseFromParent(); 1134 return; 1135 } 1136 1137 case Intrinsic::ctlz: 1138 case Intrinsic::cttz: 1139 assert(CI->getNumArgOperands() == 1 && 1140 "Mismatch between function args and call args"); 1141 CI->replaceAllUsesWith(Builder.CreateCall( 1142 NewFn, {CI->getArgOperand(0), Builder.getFalse()}, Name)); 1143 CI->eraseFromParent(); 1144 return; 1145 1146 case Intrinsic::objectsize: 1147 CI->replaceAllUsesWith(Builder.CreateCall( 1148 NewFn, {CI->getArgOperand(0), CI->getArgOperand(1)}, Name)); 1149 CI->eraseFromParent(); 1150 return; 1151 1152 case Intrinsic::ctpop: { 1153 CI->replaceAllUsesWith(Builder.CreateCall(NewFn, {CI->getArgOperand(0)})); 1154 CI->eraseFromParent(); 1155 return; 1156 } 1157 1158 case Intrinsic::x86_xop_vfrcz_ss: 1159 case Intrinsic::x86_xop_vfrcz_sd: 1160 CI->replaceAllUsesWith( 1161 Builder.CreateCall(NewFn, {CI->getArgOperand(1)}, Name)); 1162 CI->eraseFromParent(); 1163 return; 1164 1165 case Intrinsic::x86_xop_vpermil2pd: 1166 case Intrinsic::x86_xop_vpermil2ps: 1167 case Intrinsic::x86_xop_vpermil2pd_256: 1168 case Intrinsic::x86_xop_vpermil2ps_256: { 1169 SmallVector<Value *, 4> Args(CI->arg_operands().begin(), 1170 CI->arg_operands().end()); 1171 VectorType *FltIdxTy = cast<VectorType>(Args[2]->getType()); 1172 VectorType *IntIdxTy = VectorType::getInteger(FltIdxTy); 1173 Args[2] = Builder.CreateBitCast(Args[2], IntIdxTy); 1174 CI->replaceAllUsesWith(Builder.CreateCall(NewFn, Args, Name)); 1175 CI->eraseFromParent(); 1176 return; 1177 } 1178 1179 case Intrinsic::x86_sse41_ptestc: 1180 case Intrinsic::x86_sse41_ptestz: 1181 case Intrinsic::x86_sse41_ptestnzc: { 1182 // The arguments for these intrinsics used to be v4f32, and changed 1183 // to v2i64. This is purely a nop, since those are bitwise intrinsics. 1184 // So, the only thing required is a bitcast for both arguments. 1185 // First, check the arguments have the old type. 1186 Value *Arg0 = CI->getArgOperand(0); 1187 if (Arg0->getType() != VectorType::get(Type::getFloatTy(C), 4)) 1188 return; 1189 1190 // Old intrinsic, add bitcasts 1191 Value *Arg1 = CI->getArgOperand(1); 1192 1193 Type *NewVecTy = VectorType::get(Type::getInt64Ty(C), 2); 1194 1195 Value *BC0 = Builder.CreateBitCast(Arg0, NewVecTy, "cast"); 1196 Value *BC1 = Builder.CreateBitCast(Arg1, NewVecTy, "cast"); 1197 1198 CallInst *NewCall = Builder.CreateCall(NewFn, {BC0, BC1}, Name); 1199 CI->replaceAllUsesWith(NewCall); 1200 CI->eraseFromParent(); 1201 return; 1202 } 1203 1204 case Intrinsic::x86_sse41_insertps: 1205 case Intrinsic::x86_sse41_dppd: 1206 case Intrinsic::x86_sse41_dpps: 1207 case Intrinsic::x86_sse41_mpsadbw: 1208 case Intrinsic::x86_avx_dp_ps_256: 1209 case Intrinsic::x86_avx2_mpsadbw: { 1210 // Need to truncate the last argument from i32 to i8 -- this argument models 1211 // an inherently 8-bit immediate operand to these x86 instructions. 1212 SmallVector<Value *, 4> Args(CI->arg_operands().begin(), 1213 CI->arg_operands().end()); 1214 1215 // Replace the last argument with a trunc. 1216 Args.back() = Builder.CreateTrunc(Args.back(), Type::getInt8Ty(C), "trunc"); 1217 1218 CallInst *NewCall = Builder.CreateCall(NewFn, Args); 1219 CI->replaceAllUsesWith(NewCall); 1220 CI->eraseFromParent(); 1221 return; 1222 } 1223 1224 case Intrinsic::thread_pointer: { 1225 CI->replaceAllUsesWith(Builder.CreateCall(NewFn, {})); 1226 CI->eraseFromParent(); 1227 return; 1228 } 1229 1230 case Intrinsic::masked_load: 1231 case Intrinsic::masked_store: { 1232 SmallVector<Value *, 4> Args(CI->arg_operands().begin(), 1233 CI->arg_operands().end()); 1234 CI->replaceAllUsesWith(Builder.CreateCall(NewFn, Args)); 1235 CI->eraseFromParent(); 1236 return; 1237 } 1238 } 1239 } 1240 1241 void llvm::UpgradeCallsToIntrinsic(Function *F) { 1242 assert(F && "Illegal attempt to upgrade a non-existent intrinsic."); 1243 1244 // Check if this function should be upgraded and get the replacement function 1245 // if there is one. 1246 Function *NewFn; 1247 if (UpgradeIntrinsicFunction(F, NewFn)) { 1248 // Replace all users of the old function with the new function or new 1249 // instructions. This is not a range loop because the call is deleted. 1250 for (auto UI = F->user_begin(), UE = F->user_end(); UI != UE; ) 1251 if (CallInst *CI = dyn_cast<CallInst>(*UI++)) 1252 UpgradeIntrinsicCall(CI, NewFn); 1253 1254 // Remove old function, no longer used, from the module. 1255 F->eraseFromParent(); 1256 } 1257 } 1258 1259 void llvm::UpgradeInstWithTBAATag(Instruction *I) { 1260 MDNode *MD = I->getMetadata(LLVMContext::MD_tbaa); 1261 assert(MD && "UpgradeInstWithTBAATag should have a TBAA tag"); 1262 // Check if the tag uses struct-path aware TBAA format. 1263 if (isa<MDNode>(MD->getOperand(0)) && MD->getNumOperands() >= 3) 1264 return; 1265 1266 if (MD->getNumOperands() == 3) { 1267 Metadata *Elts[] = {MD->getOperand(0), MD->getOperand(1)}; 1268 MDNode *ScalarType = MDNode::get(I->getContext(), Elts); 1269 // Create a MDNode <ScalarType, ScalarType, offset 0, const> 1270 Metadata *Elts2[] = {ScalarType, ScalarType, 1271 ConstantAsMetadata::get(Constant::getNullValue( 1272 Type::getInt64Ty(I->getContext()))), 1273 MD->getOperand(2)}; 1274 I->setMetadata(LLVMContext::MD_tbaa, MDNode::get(I->getContext(), Elts2)); 1275 } else { 1276 // Create a MDNode <MD, MD, offset 0> 1277 Metadata *Elts[] = {MD, MD, ConstantAsMetadata::get(Constant::getNullValue( 1278 Type::getInt64Ty(I->getContext())))}; 1279 I->setMetadata(LLVMContext::MD_tbaa, MDNode::get(I->getContext(), Elts)); 1280 } 1281 } 1282 1283 Instruction *llvm::UpgradeBitCastInst(unsigned Opc, Value *V, Type *DestTy, 1284 Instruction *&Temp) { 1285 if (Opc != Instruction::BitCast) 1286 return nullptr; 1287 1288 Temp = nullptr; 1289 Type *SrcTy = V->getType(); 1290 if (SrcTy->isPtrOrPtrVectorTy() && DestTy->isPtrOrPtrVectorTy() && 1291 SrcTy->getPointerAddressSpace() != DestTy->getPointerAddressSpace()) { 1292 LLVMContext &Context = V->getContext(); 1293 1294 // We have no information about target data layout, so we assume that 1295 // the maximum pointer size is 64bit. 1296 Type *MidTy = Type::getInt64Ty(Context); 1297 Temp = CastInst::Create(Instruction::PtrToInt, V, MidTy); 1298 1299 return CastInst::Create(Instruction::IntToPtr, Temp, DestTy); 1300 } 1301 1302 return nullptr; 1303 } 1304 1305 Value *llvm::UpgradeBitCastExpr(unsigned Opc, Constant *C, Type *DestTy) { 1306 if (Opc != Instruction::BitCast) 1307 return nullptr; 1308 1309 Type *SrcTy = C->getType(); 1310 if (SrcTy->isPtrOrPtrVectorTy() && DestTy->isPtrOrPtrVectorTy() && 1311 SrcTy->getPointerAddressSpace() != DestTy->getPointerAddressSpace()) { 1312 LLVMContext &Context = C->getContext(); 1313 1314 // We have no information about target data layout, so we assume that 1315 // the maximum pointer size is 64bit. 1316 Type *MidTy = Type::getInt64Ty(Context); 1317 1318 return ConstantExpr::getIntToPtr(ConstantExpr::getPtrToInt(C, MidTy), 1319 DestTy); 1320 } 1321 1322 return nullptr; 1323 } 1324 1325 /// Check the debug info version number, if it is out-dated, drop the debug 1326 /// info. Return true if module is modified. 1327 bool llvm::UpgradeDebugInfo(Module &M) { 1328 unsigned Version = getDebugMetadataVersionFromModule(M); 1329 if (Version == DEBUG_METADATA_VERSION) 1330 return false; 1331 1332 bool RetCode = StripDebugInfo(M); 1333 if (RetCode) { 1334 DiagnosticInfoDebugMetadataVersion DiagVersion(M, Version); 1335 M.getContext().diagnose(DiagVersion); 1336 } 1337 return RetCode; 1338 } 1339 1340 bool llvm::UpgradeModuleFlags(Module &M) { 1341 const NamedMDNode *ModFlags = M.getModuleFlagsMetadata(); 1342 if (!ModFlags) 1343 return false; 1344 1345 bool HasObjCFlag = false, HasClassProperties = false; 1346 for (unsigned I = 0, E = ModFlags->getNumOperands(); I != E; ++I) { 1347 MDNode *Op = ModFlags->getOperand(I); 1348 if (Op->getNumOperands() < 2) 1349 continue; 1350 MDString *ID = dyn_cast_or_null<MDString>(Op->getOperand(1)); 1351 if (!ID) 1352 continue; 1353 if (ID->getString() == "Objective-C Image Info Version") 1354 HasObjCFlag = true; 1355 if (ID->getString() == "Objective-C Class Properties") 1356 HasClassProperties = true; 1357 } 1358 // "Objective-C Class Properties" is recently added for Objective-C. We 1359 // upgrade ObjC bitcodes to contain a "Objective-C Class Properties" module 1360 // flag of value 0, so we can correclty report error when trying to link 1361 // an ObjC bitcode without this module flag with an ObjC bitcode with this 1362 // module flag. 1363 if (HasObjCFlag && !HasClassProperties) { 1364 M.addModuleFlag(llvm::Module::Error, "Objective-C Class Properties", 1365 (uint32_t)0); 1366 return true; 1367 } 1368 return false; 1369 } 1370 1371 static bool isOldLoopArgument(Metadata *MD) { 1372 auto *T = dyn_cast_or_null<MDTuple>(MD); 1373 if (!T) 1374 return false; 1375 if (T->getNumOperands() < 1) 1376 return false; 1377 auto *S = dyn_cast_or_null<MDString>(T->getOperand(0)); 1378 if (!S) 1379 return false; 1380 return S->getString().startswith("llvm.vectorizer."); 1381 } 1382 1383 static MDString *upgradeLoopTag(LLVMContext &C, StringRef OldTag) { 1384 StringRef OldPrefix = "llvm.vectorizer."; 1385 assert(OldTag.startswith(OldPrefix) && "Expected old prefix"); 1386 1387 if (OldTag == "llvm.vectorizer.unroll") 1388 return MDString::get(C, "llvm.loop.interleave.count"); 1389 1390 return MDString::get( 1391 C, (Twine("llvm.loop.vectorize.") + OldTag.drop_front(OldPrefix.size())) 1392 .str()); 1393 } 1394 1395 static Metadata *upgradeLoopArgument(Metadata *MD) { 1396 auto *T = dyn_cast_or_null<MDTuple>(MD); 1397 if (!T) 1398 return MD; 1399 if (T->getNumOperands() < 1) 1400 return MD; 1401 auto *OldTag = dyn_cast_or_null<MDString>(T->getOperand(0)); 1402 if (!OldTag) 1403 return MD; 1404 if (!OldTag->getString().startswith("llvm.vectorizer.")) 1405 return MD; 1406 1407 // This has an old tag. Upgrade it. 1408 SmallVector<Metadata *, 8> Ops; 1409 Ops.reserve(T->getNumOperands()); 1410 Ops.push_back(upgradeLoopTag(T->getContext(), OldTag->getString())); 1411 for (unsigned I = 1, E = T->getNumOperands(); I != E; ++I) 1412 Ops.push_back(T->getOperand(I)); 1413 1414 return MDTuple::get(T->getContext(), Ops); 1415 } 1416 1417 MDNode *llvm::upgradeInstructionLoopAttachment(MDNode &N) { 1418 auto *T = dyn_cast<MDTuple>(&N); 1419 if (!T) 1420 return &N; 1421 1422 if (!llvm::any_of(T->operands(), isOldLoopArgument)) 1423 return &N; 1424 1425 SmallVector<Metadata *, 8> Ops; 1426 Ops.reserve(T->getNumOperands()); 1427 for (Metadata *MD : T->operands()) 1428 Ops.push_back(upgradeLoopArgument(MD)); 1429 1430 return MDTuple::get(T->getContext(), Ops); 1431 } 1432