1 //===-- AutoUpgrade.cpp - Implement auto-upgrade helper functions ---------===// 2 // 3 // The LLVM Compiler Infrastructure 4 // 5 // This file is distributed under the University of Illinois Open Source 6 // License. See LICENSE.TXT for details. 7 // 8 //===----------------------------------------------------------------------===// 9 // 10 // This file implements the auto-upgrade helper functions. 11 // This is where deprecated IR intrinsics and other IR features are updated to 12 // current specifications. 13 // 14 //===----------------------------------------------------------------------===// 15 16 #include "llvm/IR/AutoUpgrade.h" 17 #include "llvm/IR/CFG.h" 18 #include "llvm/IR/CallSite.h" 19 #include "llvm/IR/Constants.h" 20 #include "llvm/IR/DIBuilder.h" 21 #include "llvm/IR/DebugInfo.h" 22 #include "llvm/IR/DiagnosticInfo.h" 23 #include "llvm/IR/Function.h" 24 #include "llvm/IR/IRBuilder.h" 25 #include "llvm/IR/Instruction.h" 26 #include "llvm/IR/IntrinsicInst.h" 27 #include "llvm/IR/LLVMContext.h" 28 #include "llvm/IR/Module.h" 29 #include "llvm/Support/ErrorHandling.h" 30 #include "llvm/Support/Regex.h" 31 #include <cstring> 32 using namespace llvm; 33 34 // Upgrade the declarations of the SSE4.1 functions whose arguments have 35 // changed their type from v4f32 to v2i64. 36 static bool UpgradeSSE41Function(Function* F, Intrinsic::ID IID, 37 Function *&NewFn) { 38 // Check whether this is an old version of the function, which received 39 // v4f32 arguments. 40 Type *Arg0Type = F->getFunctionType()->getParamType(0); 41 if (Arg0Type != VectorType::get(Type::getFloatTy(F->getContext()), 4)) 42 return false; 43 44 // Yes, it's old, replace it with new version. 45 F->setName(F->getName() + ".old"); 46 NewFn = Intrinsic::getDeclaration(F->getParent(), IID); 47 return true; 48 } 49 50 // Upgrade the declarations of intrinsic functions whose 8-bit immediate mask 51 // arguments have changed their type from i32 to i8. 52 static bool UpgradeX86IntrinsicsWith8BitMask(Function *F, Intrinsic::ID IID, 53 Function *&NewFn) { 54 // Check that the last argument is an i32. 55 Type *LastArgType = F->getFunctionType()->getParamType( 56 F->getFunctionType()->getNumParams() - 1); 57 if (!LastArgType->isIntegerTy(32)) 58 return false; 59 60 // Move this function aside and map down. 61 F->setName(F->getName() + ".old"); 62 NewFn = Intrinsic::getDeclaration(F->getParent(), IID); 63 return true; 64 } 65 66 static bool UpgradeIntrinsicFunction1(Function *F, Function *&NewFn) { 67 assert(F && "Illegal to upgrade a non-existent Function."); 68 69 // Quickly eliminate it, if it's not a candidate. 70 StringRef Name = F->getName(); 71 if (Name.size() <= 8 || !Name.startswith("llvm.")) 72 return false; 73 Name = Name.substr(5); // Strip off "llvm." 74 75 switch (Name[0]) { 76 default: break; 77 case 'a': { 78 if (Name.startswith("arm.neon.vclz")) { 79 Type* args[2] = { 80 F->arg_begin()->getType(), 81 Type::getInt1Ty(F->getContext()) 82 }; 83 // Can't use Intrinsic::getDeclaration here as it adds a ".i1" to 84 // the end of the name. Change name from llvm.arm.neon.vclz.* to 85 // llvm.ctlz.* 86 FunctionType* fType = FunctionType::get(F->getReturnType(), args, false); 87 NewFn = Function::Create(fType, F->getLinkage(), 88 "llvm.ctlz." + Name.substr(14), F->getParent()); 89 return true; 90 } 91 if (Name.startswith("arm.neon.vcnt")) { 92 NewFn = Intrinsic::getDeclaration(F->getParent(), Intrinsic::ctpop, 93 F->arg_begin()->getType()); 94 return true; 95 } 96 Regex vldRegex("^arm\\.neon\\.vld([1234]|[234]lane)\\.v[a-z0-9]*$"); 97 if (vldRegex.match(Name)) { 98 auto fArgs = F->getFunctionType()->params(); 99 SmallVector<Type *, 4> Tys(fArgs.begin(), fArgs.end()); 100 // Can't use Intrinsic::getDeclaration here as the return types might 101 // then only be structurally equal. 102 FunctionType* fType = FunctionType::get(F->getReturnType(), Tys, false); 103 NewFn = Function::Create(fType, F->getLinkage(), 104 "llvm." + Name + ".p0i8", F->getParent()); 105 return true; 106 } 107 Regex vstRegex("^arm\\.neon\\.vst([1234]|[234]lane)\\.v[a-z0-9]*$"); 108 if (vstRegex.match(Name)) { 109 static const Intrinsic::ID StoreInts[] = {Intrinsic::arm_neon_vst1, 110 Intrinsic::arm_neon_vst2, 111 Intrinsic::arm_neon_vst3, 112 Intrinsic::arm_neon_vst4}; 113 114 static const Intrinsic::ID StoreLaneInts[] = { 115 Intrinsic::arm_neon_vst2lane, Intrinsic::arm_neon_vst3lane, 116 Intrinsic::arm_neon_vst4lane 117 }; 118 119 auto fArgs = F->getFunctionType()->params(); 120 Type *Tys[] = {fArgs[0], fArgs[1]}; 121 if (Name.find("lane") == StringRef::npos) 122 NewFn = Intrinsic::getDeclaration(F->getParent(), 123 StoreInts[fArgs.size() - 3], Tys); 124 else 125 NewFn = Intrinsic::getDeclaration(F->getParent(), 126 StoreLaneInts[fArgs.size() - 5], Tys); 127 return true; 128 } 129 if (Name == "aarch64.thread.pointer" || Name == "arm.thread.pointer") { 130 NewFn = Intrinsic::getDeclaration(F->getParent(), Intrinsic::thread_pointer); 131 return true; 132 } 133 break; 134 } 135 136 case 'c': { 137 if (Name.startswith("ctlz.") && F->arg_size() == 1) { 138 F->setName(Name + ".old"); 139 NewFn = Intrinsic::getDeclaration(F->getParent(), Intrinsic::ctlz, 140 F->arg_begin()->getType()); 141 return true; 142 } 143 if (Name.startswith("cttz.") && F->arg_size() == 1) { 144 F->setName(Name + ".old"); 145 NewFn = Intrinsic::getDeclaration(F->getParent(), Intrinsic::cttz, 146 F->arg_begin()->getType()); 147 return true; 148 } 149 break; 150 } 151 152 case 'o': 153 // We only need to change the name to match the mangling including the 154 // address space. 155 if (F->arg_size() == 2 && Name.startswith("objectsize.")) { 156 Type *Tys[2] = { F->getReturnType(), F->arg_begin()->getType() }; 157 if (F->getName() != Intrinsic::getName(Intrinsic::objectsize, Tys)) { 158 F->setName(Name + ".old"); 159 NewFn = Intrinsic::getDeclaration(F->getParent(), 160 Intrinsic::objectsize, Tys); 161 return true; 162 } 163 } 164 break; 165 166 case 's': 167 if (Name == "stackprotectorcheck") { 168 NewFn = nullptr; 169 return true; 170 } 171 172 case 'x': { 173 if (Name.startswith("x86.sse2.pcmpeq.") || 174 Name.startswith("x86.sse2.pcmpgt.") || 175 Name.startswith("x86.avx2.pcmpeq.") || 176 Name.startswith("x86.avx2.pcmpgt.") || 177 Name.startswith("x86.avx2.vbroadcast") || 178 Name.startswith("x86.avx2.pbroadcast") || 179 Name.startswith("x86.avx.vpermil.") || 180 Name.startswith("x86.sse41.pmovsx") || 181 Name == "x86.avx.vinsertf128.pd.256" || 182 Name == "x86.avx.vinsertf128.ps.256" || 183 Name == "x86.avx.vinsertf128.si.256" || 184 Name == "x86.avx2.vinserti128" || 185 Name == "x86.avx.vextractf128.pd.256" || 186 Name == "x86.avx.vextractf128.ps.256" || 187 Name == "x86.avx.vextractf128.si.256" || 188 Name == "x86.avx2.vextracti128" || 189 Name == "x86.avx.movnt.dq.256" || 190 Name == "x86.avx.movnt.pd.256" || 191 Name == "x86.avx.movnt.ps.256" || 192 Name == "x86.sse42.crc32.64.8" || 193 Name == "x86.avx.vbroadcast.ss" || 194 Name == "x86.avx.vbroadcast.ss.256" || 195 Name == "x86.avx.vbroadcast.sd.256" || 196 Name == "x86.sse2.psll.dq" || 197 Name == "x86.sse2.psrl.dq" || 198 Name == "x86.avx2.psll.dq" || 199 Name == "x86.avx2.psrl.dq" || 200 Name == "x86.sse2.psll.dq.bs" || 201 Name == "x86.sse2.psrl.dq.bs" || 202 Name == "x86.avx2.psll.dq.bs" || 203 Name == "x86.avx2.psrl.dq.bs" || 204 Name == "x86.sse41.pblendw" || 205 Name == "x86.sse41.blendpd" || 206 Name == "x86.sse41.blendps" || 207 Name == "x86.avx.blend.pd.256" || 208 Name == "x86.avx.blend.ps.256" || 209 Name == "x86.avx2.pblendw" || 210 Name == "x86.avx2.pblendd.128" || 211 Name == "x86.avx2.pblendd.256" || 212 Name == "x86.avx2.vbroadcasti128" || 213 Name == "x86.xop.vpcmov" || 214 (Name.startswith("x86.xop.vpcom") && F->arg_size() == 2)) { 215 NewFn = nullptr; 216 return true; 217 } 218 // SSE4.1 ptest functions may have an old signature. 219 if (Name.startswith("x86.sse41.ptest")) { 220 if (Name == "x86.sse41.ptestc") 221 return UpgradeSSE41Function(F, Intrinsic::x86_sse41_ptestc, NewFn); 222 if (Name == "x86.sse41.ptestz") 223 return UpgradeSSE41Function(F, Intrinsic::x86_sse41_ptestz, NewFn); 224 if (Name == "x86.sse41.ptestnzc") 225 return UpgradeSSE41Function(F, Intrinsic::x86_sse41_ptestnzc, NewFn); 226 } 227 // Several blend and other instructions with masks used the wrong number of 228 // bits. 229 if (Name == "x86.sse41.insertps") 230 return UpgradeX86IntrinsicsWith8BitMask(F, Intrinsic::x86_sse41_insertps, 231 NewFn); 232 if (Name == "x86.sse41.dppd") 233 return UpgradeX86IntrinsicsWith8BitMask(F, Intrinsic::x86_sse41_dppd, 234 NewFn); 235 if (Name == "x86.sse41.dpps") 236 return UpgradeX86IntrinsicsWith8BitMask(F, Intrinsic::x86_sse41_dpps, 237 NewFn); 238 if (Name == "x86.sse41.mpsadbw") 239 return UpgradeX86IntrinsicsWith8BitMask(F, Intrinsic::x86_sse41_mpsadbw, 240 NewFn); 241 if (Name == "x86.avx.dp.ps.256") 242 return UpgradeX86IntrinsicsWith8BitMask(F, Intrinsic::x86_avx_dp_ps_256, 243 NewFn); 244 if (Name == "x86.avx2.mpsadbw") 245 return UpgradeX86IntrinsicsWith8BitMask(F, Intrinsic::x86_avx2_mpsadbw, 246 NewFn); 247 248 // frcz.ss/sd may need to have an argument dropped 249 if (Name.startswith("x86.xop.vfrcz.ss") && F->arg_size() == 2) { 250 F->setName(Name + ".old"); 251 NewFn = Intrinsic::getDeclaration(F->getParent(), 252 Intrinsic::x86_xop_vfrcz_ss); 253 return true; 254 } 255 if (Name.startswith("x86.xop.vfrcz.sd") && F->arg_size() == 2) { 256 F->setName(Name + ".old"); 257 NewFn = Intrinsic::getDeclaration(F->getParent(), 258 Intrinsic::x86_xop_vfrcz_sd); 259 return true; 260 } 261 // Fix the FMA4 intrinsics to remove the 4 262 if (Name.startswith("x86.fma4.")) { 263 F->setName("llvm.x86.fma" + Name.substr(8)); 264 NewFn = F; 265 return true; 266 } 267 break; 268 } 269 } 270 271 // This may not belong here. This function is effectively being overloaded 272 // to both detect an intrinsic which needs upgrading, and to provide the 273 // upgraded form of the intrinsic. We should perhaps have two separate 274 // functions for this. 275 return false; 276 } 277 278 bool llvm::UpgradeIntrinsicFunction(Function *F, Function *&NewFn) { 279 NewFn = nullptr; 280 bool Upgraded = UpgradeIntrinsicFunction1(F, NewFn); 281 assert(F != NewFn && "Intrinsic function upgraded to the same function"); 282 283 // Upgrade intrinsic attributes. This does not change the function. 284 if (NewFn) 285 F = NewFn; 286 if (Intrinsic::ID id = F->getIntrinsicID()) 287 F->setAttributes(Intrinsic::getAttributes(F->getContext(), id)); 288 return Upgraded; 289 } 290 291 bool llvm::UpgradeGlobalVariable(GlobalVariable *GV) { 292 // Nothing to do yet. 293 return false; 294 } 295 296 // Handles upgrading SSE2 and AVX2 PSLLDQ intrinsics by converting them 297 // to byte shuffles. 298 static Value *UpgradeX86PSLLDQIntrinsics(IRBuilder<> &Builder, LLVMContext &C, 299 Value *Op, unsigned NumLanes, 300 unsigned Shift) { 301 // Each lane is 16 bytes. 302 unsigned NumElts = NumLanes * 16; 303 304 // Bitcast from a 64-bit element type to a byte element type. 305 Op = Builder.CreateBitCast(Op, 306 VectorType::get(Type::getInt8Ty(C), NumElts), 307 "cast"); 308 // We'll be shuffling in zeroes. 309 Value *Res = ConstantVector::getSplat(NumElts, Builder.getInt8(0)); 310 311 // If shift is less than 16, emit a shuffle to move the bytes. Otherwise, 312 // we'll just return the zero vector. 313 if (Shift < 16) { 314 SmallVector<Constant*, 32> Idxs; 315 // 256-bit version is split into two 16-byte lanes. 316 for (unsigned l = 0; l != NumElts; l += 16) 317 for (unsigned i = 0; i != 16; ++i) { 318 unsigned Idx = NumElts + i - Shift; 319 if (Idx < NumElts) 320 Idx -= NumElts - 16; // end of lane, switch operand. 321 Idxs.push_back(Builder.getInt32(Idx + l)); 322 } 323 324 Res = Builder.CreateShuffleVector(Res, Op, ConstantVector::get(Idxs)); 325 } 326 327 // Bitcast back to a 64-bit element type. 328 return Builder.CreateBitCast(Res, 329 VectorType::get(Type::getInt64Ty(C), 2*NumLanes), 330 "cast"); 331 } 332 333 // Handles upgrading SSE2 and AVX2 PSRLDQ intrinsics by converting them 334 // to byte shuffles. 335 static Value *UpgradeX86PSRLDQIntrinsics(IRBuilder<> &Builder, LLVMContext &C, 336 Value *Op, unsigned NumLanes, 337 unsigned Shift) { 338 // Each lane is 16 bytes. 339 unsigned NumElts = NumLanes * 16; 340 341 // Bitcast from a 64-bit element type to a byte element type. 342 Op = Builder.CreateBitCast(Op, 343 VectorType::get(Type::getInt8Ty(C), NumElts), 344 "cast"); 345 // We'll be shuffling in zeroes. 346 Value *Res = ConstantVector::getSplat(NumElts, Builder.getInt8(0)); 347 348 // If shift is less than 16, emit a shuffle to move the bytes. Otherwise, 349 // we'll just return the zero vector. 350 if (Shift < 16) { 351 SmallVector<Constant*, 32> Idxs; 352 // 256-bit version is split into two 16-byte lanes. 353 for (unsigned l = 0; l != NumElts; l += 16) 354 for (unsigned i = 0; i != 16; ++i) { 355 unsigned Idx = i + Shift; 356 if (Idx >= 16) 357 Idx += NumElts - 16; // end of lane, switch operand. 358 Idxs.push_back(Builder.getInt32(Idx + l)); 359 } 360 361 Res = Builder.CreateShuffleVector(Op, Res, ConstantVector::get(Idxs)); 362 } 363 364 // Bitcast back to a 64-bit element type. 365 return Builder.CreateBitCast(Res, 366 VectorType::get(Type::getInt64Ty(C), 2*NumLanes), 367 "cast"); 368 } 369 370 // UpgradeIntrinsicCall - Upgrade a call to an old intrinsic to be a call the 371 // upgraded intrinsic. All argument and return casting must be provided in 372 // order to seamlessly integrate with existing context. 373 void llvm::UpgradeIntrinsicCall(CallInst *CI, Function *NewFn) { 374 Function *F = CI->getCalledFunction(); 375 LLVMContext &C = CI->getContext(); 376 IRBuilder<> Builder(C); 377 Builder.SetInsertPoint(CI->getParent(), CI->getIterator()); 378 379 assert(F && "Intrinsic call is not direct?"); 380 381 if (!NewFn) { 382 // Get the Function's name. 383 StringRef Name = F->getName(); 384 385 Value *Rep; 386 // Upgrade packed integer vector compares intrinsics to compare instructions 387 if (Name.startswith("llvm.x86.sse2.pcmpeq.") || 388 Name.startswith("llvm.x86.avx2.pcmpeq.")) { 389 Rep = Builder.CreateICmpEQ(CI->getArgOperand(0), CI->getArgOperand(1), 390 "pcmpeq"); 391 // need to sign extend since icmp returns vector of i1 392 Rep = Builder.CreateSExt(Rep, CI->getType(), ""); 393 } else if (Name.startswith("llvm.x86.sse2.pcmpgt.") || 394 Name.startswith("llvm.x86.avx2.pcmpgt.")) { 395 Rep = Builder.CreateICmpSGT(CI->getArgOperand(0), CI->getArgOperand(1), 396 "pcmpgt"); 397 // need to sign extend since icmp returns vector of i1 398 Rep = Builder.CreateSExt(Rep, CI->getType(), ""); 399 } else if (Name == "llvm.x86.avx.movnt.dq.256" || 400 Name == "llvm.x86.avx.movnt.ps.256" || 401 Name == "llvm.x86.avx.movnt.pd.256") { 402 IRBuilder<> Builder(C); 403 Builder.SetInsertPoint(CI->getParent(), CI->getIterator()); 404 405 Module *M = F->getParent(); 406 SmallVector<Metadata *, 1> Elts; 407 Elts.push_back( 408 ConstantAsMetadata::get(ConstantInt::get(Type::getInt32Ty(C), 1))); 409 MDNode *Node = MDNode::get(C, Elts); 410 411 Value *Arg0 = CI->getArgOperand(0); 412 Value *Arg1 = CI->getArgOperand(1); 413 414 // Convert the type of the pointer to a pointer to the stored type. 415 Value *BC = Builder.CreateBitCast(Arg0, 416 PointerType::getUnqual(Arg1->getType()), 417 "cast"); 418 StoreInst *SI = Builder.CreateStore(Arg1, BC); 419 SI->setMetadata(M->getMDKindID("nontemporal"), Node); 420 SI->setAlignment(32); 421 422 // Remove intrinsic. 423 CI->eraseFromParent(); 424 return; 425 } else if (Name.startswith("llvm.x86.xop.vpcom")) { 426 Intrinsic::ID intID; 427 if (Name.endswith("ub")) 428 intID = Intrinsic::x86_xop_vpcomub; 429 else if (Name.endswith("uw")) 430 intID = Intrinsic::x86_xop_vpcomuw; 431 else if (Name.endswith("ud")) 432 intID = Intrinsic::x86_xop_vpcomud; 433 else if (Name.endswith("uq")) 434 intID = Intrinsic::x86_xop_vpcomuq; 435 else if (Name.endswith("b")) 436 intID = Intrinsic::x86_xop_vpcomb; 437 else if (Name.endswith("w")) 438 intID = Intrinsic::x86_xop_vpcomw; 439 else if (Name.endswith("d")) 440 intID = Intrinsic::x86_xop_vpcomd; 441 else if (Name.endswith("q")) 442 intID = Intrinsic::x86_xop_vpcomq; 443 else 444 llvm_unreachable("Unknown suffix"); 445 446 Name = Name.substr(18); // strip off "llvm.x86.xop.vpcom" 447 unsigned Imm; 448 if (Name.startswith("lt")) 449 Imm = 0; 450 else if (Name.startswith("le")) 451 Imm = 1; 452 else if (Name.startswith("gt")) 453 Imm = 2; 454 else if (Name.startswith("ge")) 455 Imm = 3; 456 else if (Name.startswith("eq")) 457 Imm = 4; 458 else if (Name.startswith("ne")) 459 Imm = 5; 460 else if (Name.startswith("false")) 461 Imm = 6; 462 else if (Name.startswith("true")) 463 Imm = 7; 464 else 465 llvm_unreachable("Unknown condition"); 466 467 Function *VPCOM = Intrinsic::getDeclaration(F->getParent(), intID); 468 Rep = 469 Builder.CreateCall(VPCOM, {CI->getArgOperand(0), CI->getArgOperand(1), 470 Builder.getInt8(Imm)}); 471 } else if (Name == "llvm.x86.xop.vpcmov") { 472 Value *Arg0 = CI->getArgOperand(0); 473 Value *Arg1 = CI->getArgOperand(1); 474 Value *Sel = CI->getArgOperand(2); 475 unsigned NumElts = CI->getType()->getVectorNumElements(); 476 Constant *MinusOne = ConstantVector::getSplat(NumElts, Builder.getInt64(-1)); 477 Value *NotSel = Builder.CreateXor(Sel, MinusOne); 478 Value *Sel0 = Builder.CreateAnd(Arg0, Sel); 479 Value *Sel1 = Builder.CreateAnd(Arg1, NotSel); 480 Rep = Builder.CreateOr(Sel0, Sel1); 481 } else if (Name == "llvm.x86.sse42.crc32.64.8") { 482 Function *CRC32 = Intrinsic::getDeclaration(F->getParent(), 483 Intrinsic::x86_sse42_crc32_32_8); 484 Value *Trunc0 = Builder.CreateTrunc(CI->getArgOperand(0), Type::getInt32Ty(C)); 485 Rep = Builder.CreateCall(CRC32, {Trunc0, CI->getArgOperand(1)}); 486 Rep = Builder.CreateZExt(Rep, CI->getType(), ""); 487 } else if (Name.startswith("llvm.x86.avx.vbroadcast")) { 488 // Replace broadcasts with a series of insertelements. 489 Type *VecTy = CI->getType(); 490 Type *EltTy = VecTy->getVectorElementType(); 491 unsigned EltNum = VecTy->getVectorNumElements(); 492 Value *Cast = Builder.CreateBitCast(CI->getArgOperand(0), 493 EltTy->getPointerTo()); 494 Value *Load = Builder.CreateLoad(EltTy, Cast); 495 Type *I32Ty = Type::getInt32Ty(C); 496 Rep = UndefValue::get(VecTy); 497 for (unsigned I = 0; I < EltNum; ++I) 498 Rep = Builder.CreateInsertElement(Rep, Load, 499 ConstantInt::get(I32Ty, I)); 500 } else if (Name.startswith("llvm.x86.sse41.pmovsx")) { 501 VectorType *SrcTy = cast<VectorType>(CI->getArgOperand(0)->getType()); 502 VectorType *DstTy = cast<VectorType>(CI->getType()); 503 unsigned NumDstElts = DstTy->getNumElements(); 504 505 // Extract a subvector of the first NumDstElts lanes and sign extend. 506 SmallVector<int, 8> ShuffleMask; 507 for (int i = 0; i != (int)NumDstElts; ++i) 508 ShuffleMask.push_back(i); 509 510 Value *SV = Builder.CreateShuffleVector( 511 CI->getArgOperand(0), UndefValue::get(SrcTy), ShuffleMask); 512 Rep = Builder.CreateSExt(SV, DstTy); 513 } else if (Name == "llvm.x86.avx2.vbroadcasti128") { 514 // Replace vbroadcasts with a vector shuffle. 515 Type *VT = VectorType::get(Type::getInt64Ty(C), 2); 516 Value *Op = Builder.CreatePointerCast(CI->getArgOperand(0), 517 PointerType::getUnqual(VT)); 518 Value *Load = Builder.CreateLoad(VT, Op); 519 const int Idxs[4] = { 0, 1, 0, 1 }; 520 Rep = Builder.CreateShuffleVector(Load, UndefValue::get(Load->getType()), 521 Idxs); 522 } else if (Name.startswith("llvm.x86.avx2.pbroadcast") || 523 Name.startswith("llvm.x86.avx2.vbroadcast")) { 524 // Replace vp?broadcasts with a vector shuffle. 525 Value *Op = CI->getArgOperand(0); 526 unsigned NumElts = CI->getType()->getVectorNumElements(); 527 Type *MaskTy = VectorType::get(Type::getInt32Ty(C), NumElts); 528 Rep = Builder.CreateShuffleVector(Op, UndefValue::get(Op->getType()), 529 Constant::getNullValue(MaskTy)); 530 } else if (Name == "llvm.x86.sse2.psll.dq") { 531 // 128-bit shift left specified in bits. 532 unsigned Shift = cast<ConstantInt>(CI->getArgOperand(1))->getZExtValue(); 533 Rep = UpgradeX86PSLLDQIntrinsics(Builder, C, CI->getArgOperand(0), 1, 534 Shift / 8); // Shift is in bits. 535 } else if (Name == "llvm.x86.sse2.psrl.dq") { 536 // 128-bit shift right specified in bits. 537 unsigned Shift = cast<ConstantInt>(CI->getArgOperand(1))->getZExtValue(); 538 Rep = UpgradeX86PSRLDQIntrinsics(Builder, C, CI->getArgOperand(0), 1, 539 Shift / 8); // Shift is in bits. 540 } else if (Name == "llvm.x86.avx2.psll.dq") { 541 // 256-bit shift left specified in bits. 542 unsigned Shift = cast<ConstantInt>(CI->getArgOperand(1))->getZExtValue(); 543 Rep = UpgradeX86PSLLDQIntrinsics(Builder, C, CI->getArgOperand(0), 2, 544 Shift / 8); // Shift is in bits. 545 } else if (Name == "llvm.x86.avx2.psrl.dq") { 546 // 256-bit shift right specified in bits. 547 unsigned Shift = cast<ConstantInt>(CI->getArgOperand(1))->getZExtValue(); 548 Rep = UpgradeX86PSRLDQIntrinsics(Builder, C, CI->getArgOperand(0), 2, 549 Shift / 8); // Shift is in bits. 550 } else if (Name == "llvm.x86.sse2.psll.dq.bs") { 551 // 128-bit shift left specified in bytes. 552 unsigned Shift = cast<ConstantInt>(CI->getArgOperand(1))->getZExtValue(); 553 Rep = UpgradeX86PSLLDQIntrinsics(Builder, C, CI->getArgOperand(0), 1, 554 Shift); 555 } else if (Name == "llvm.x86.sse2.psrl.dq.bs") { 556 // 128-bit shift right specified in bytes. 557 unsigned Shift = cast<ConstantInt>(CI->getArgOperand(1))->getZExtValue(); 558 Rep = UpgradeX86PSRLDQIntrinsics(Builder, C, CI->getArgOperand(0), 1, 559 Shift); 560 } else if (Name == "llvm.x86.avx2.psll.dq.bs") { 561 // 256-bit shift left specified in bytes. 562 unsigned Shift = cast<ConstantInt>(CI->getArgOperand(1))->getZExtValue(); 563 Rep = UpgradeX86PSLLDQIntrinsics(Builder, C, CI->getArgOperand(0), 2, 564 Shift); 565 } else if (Name == "llvm.x86.avx2.psrl.dq.bs") { 566 // 256-bit shift right specified in bytes. 567 unsigned Shift = cast<ConstantInt>(CI->getArgOperand(1))->getZExtValue(); 568 Rep = UpgradeX86PSRLDQIntrinsics(Builder, C, CI->getArgOperand(0), 2, 569 Shift); 570 } else if (Name == "llvm.x86.sse41.pblendw" || 571 Name == "llvm.x86.sse41.blendpd" || 572 Name == "llvm.x86.sse41.blendps" || 573 Name == "llvm.x86.avx.blend.pd.256" || 574 Name == "llvm.x86.avx.blend.ps.256" || 575 Name == "llvm.x86.avx2.pblendw" || 576 Name == "llvm.x86.avx2.pblendd.128" || 577 Name == "llvm.x86.avx2.pblendd.256") { 578 Value *Op0 = CI->getArgOperand(0); 579 Value *Op1 = CI->getArgOperand(1); 580 unsigned Imm = cast <ConstantInt>(CI->getArgOperand(2))->getZExtValue(); 581 VectorType *VecTy = cast<VectorType>(CI->getType()); 582 unsigned NumElts = VecTy->getNumElements(); 583 584 SmallVector<Constant*, 16> Idxs; 585 for (unsigned i = 0; i != NumElts; ++i) { 586 unsigned Idx = ((Imm >> (i%8)) & 1) ? i + NumElts : i; 587 Idxs.push_back(Builder.getInt32(Idx)); 588 } 589 590 Rep = Builder.CreateShuffleVector(Op0, Op1, ConstantVector::get(Idxs)); 591 } else if (Name == "llvm.x86.avx.vinsertf128.pd.256" || 592 Name == "llvm.x86.avx.vinsertf128.ps.256" || 593 Name == "llvm.x86.avx.vinsertf128.si.256" || 594 Name == "llvm.x86.avx2.vinserti128") { 595 Value *Op0 = CI->getArgOperand(0); 596 Value *Op1 = CI->getArgOperand(1); 597 unsigned Imm = cast<ConstantInt>(CI->getArgOperand(2))->getZExtValue(); 598 VectorType *VecTy = cast<VectorType>(CI->getType()); 599 unsigned NumElts = VecTy->getNumElements(); 600 601 // Mask off the high bits of the immediate value; hardware ignores those. 602 Imm = Imm & 1; 603 604 // Extend the second operand into a vector that is twice as big. 605 Value *UndefV = UndefValue::get(Op1->getType()); 606 SmallVector<Constant*, 8> Idxs; 607 for (unsigned i = 0; i != NumElts; ++i) { 608 Idxs.push_back(Builder.getInt32(i)); 609 } 610 Rep = Builder.CreateShuffleVector(Op1, UndefV, ConstantVector::get(Idxs)); 611 612 // Insert the second operand into the first operand. 613 614 // Note that there is no guarantee that instruction lowering will actually 615 // produce a vinsertf128 instruction for the created shuffles. In 616 // particular, the 0 immediate case involves no lane changes, so it can 617 // be handled as a blend. 618 619 // Example of shuffle mask for 32-bit elements: 620 // Imm = 1 <i32 0, i32 1, i32 2, i32 3, i32 8, i32 9, i32 10, i32 11> 621 // Imm = 0 <i32 8, i32 9, i32 10, i32 11, i32 4, i32 5, i32 6, i32 7 > 622 623 SmallVector<Constant*, 8> Idxs2; 624 // The low half of the result is either the low half of the 1st operand 625 // or the low half of the 2nd operand (the inserted vector). 626 for (unsigned i = 0; i != NumElts / 2; ++i) { 627 unsigned Idx = Imm ? i : (i + NumElts); 628 Idxs2.push_back(Builder.getInt32(Idx)); 629 } 630 // The high half of the result is either the low half of the 2nd operand 631 // (the inserted vector) or the high half of the 1st operand. 632 for (unsigned i = NumElts / 2; i != NumElts; ++i) { 633 unsigned Idx = Imm ? (i + NumElts / 2) : i; 634 Idxs2.push_back(Builder.getInt32(Idx)); 635 } 636 Rep = Builder.CreateShuffleVector(Op0, Rep, ConstantVector::get(Idxs2)); 637 } else if (Name == "llvm.x86.avx.vextractf128.pd.256" || 638 Name == "llvm.x86.avx.vextractf128.ps.256" || 639 Name == "llvm.x86.avx.vextractf128.si.256" || 640 Name == "llvm.x86.avx2.vextracti128") { 641 Value *Op0 = CI->getArgOperand(0); 642 unsigned Imm = cast<ConstantInt>(CI->getArgOperand(1))->getZExtValue(); 643 VectorType *VecTy = cast<VectorType>(CI->getType()); 644 unsigned NumElts = VecTy->getNumElements(); 645 646 // Mask off the high bits of the immediate value; hardware ignores those. 647 Imm = Imm & 1; 648 649 // Get indexes for either the high half or low half of the input vector. 650 SmallVector<Constant*, 4> Idxs(NumElts); 651 for (unsigned i = 0; i != NumElts; ++i) { 652 unsigned Idx = Imm ? (i + NumElts) : i; 653 Idxs[i] = Builder.getInt32(Idx); 654 } 655 656 Value *UndefV = UndefValue::get(Op0->getType()); 657 Rep = Builder.CreateShuffleVector(Op0, UndefV, ConstantVector::get(Idxs)); 658 } else if (Name == "llvm.stackprotectorcheck") { 659 Rep = nullptr; 660 } else { 661 bool PD128 = false, PD256 = false, PS128 = false, PS256 = false; 662 if (Name == "llvm.x86.avx.vpermil.pd.256") 663 PD256 = true; 664 else if (Name == "llvm.x86.avx.vpermil.pd") 665 PD128 = true; 666 else if (Name == "llvm.x86.avx.vpermil.ps.256") 667 PS256 = true; 668 else if (Name == "llvm.x86.avx.vpermil.ps") 669 PS128 = true; 670 671 if (PD256 || PD128 || PS256 || PS128) { 672 Value *Op0 = CI->getArgOperand(0); 673 unsigned Imm = cast<ConstantInt>(CI->getArgOperand(1))->getZExtValue(); 674 SmallVector<Constant*, 8> Idxs; 675 676 if (PD128) 677 for (unsigned i = 0; i != 2; ++i) 678 Idxs.push_back(Builder.getInt32((Imm >> i) & 0x1)); 679 else if (PD256) 680 for (unsigned l = 0; l != 4; l+=2) 681 for (unsigned i = 0; i != 2; ++i) 682 Idxs.push_back(Builder.getInt32(((Imm >> (l+i)) & 0x1) + l)); 683 else if (PS128) 684 for (unsigned i = 0; i != 4; ++i) 685 Idxs.push_back(Builder.getInt32((Imm >> (2 * i)) & 0x3)); 686 else if (PS256) 687 for (unsigned l = 0; l != 8; l+=4) 688 for (unsigned i = 0; i != 4; ++i) 689 Idxs.push_back(Builder.getInt32(((Imm >> (2 * i)) & 0x3) + l)); 690 else 691 llvm_unreachable("Unexpected function"); 692 693 Rep = Builder.CreateShuffleVector(Op0, Op0, ConstantVector::get(Idxs)); 694 } else { 695 llvm_unreachable("Unknown function for CallInst upgrade."); 696 } 697 } 698 699 if (Rep) 700 CI->replaceAllUsesWith(Rep); 701 CI->eraseFromParent(); 702 return; 703 } 704 705 std::string Name = CI->getName(); 706 if (!Name.empty()) 707 CI->setName(Name + ".old"); 708 709 switch (NewFn->getIntrinsicID()) { 710 default: 711 llvm_unreachable("Unknown function for CallInst upgrade."); 712 713 case Intrinsic::arm_neon_vld1: 714 case Intrinsic::arm_neon_vld2: 715 case Intrinsic::arm_neon_vld3: 716 case Intrinsic::arm_neon_vld4: 717 case Intrinsic::arm_neon_vld2lane: 718 case Intrinsic::arm_neon_vld3lane: 719 case Intrinsic::arm_neon_vld4lane: 720 case Intrinsic::arm_neon_vst1: 721 case Intrinsic::arm_neon_vst2: 722 case Intrinsic::arm_neon_vst3: 723 case Intrinsic::arm_neon_vst4: 724 case Intrinsic::arm_neon_vst2lane: 725 case Intrinsic::arm_neon_vst3lane: 726 case Intrinsic::arm_neon_vst4lane: { 727 SmallVector<Value *, 4> Args(CI->arg_operands().begin(), 728 CI->arg_operands().end()); 729 CI->replaceAllUsesWith(Builder.CreateCall(NewFn, Args)); 730 CI->eraseFromParent(); 731 return; 732 } 733 734 case Intrinsic::ctlz: 735 case Intrinsic::cttz: 736 assert(CI->getNumArgOperands() == 1 && 737 "Mismatch between function args and call args"); 738 CI->replaceAllUsesWith(Builder.CreateCall( 739 NewFn, {CI->getArgOperand(0), Builder.getFalse()}, Name)); 740 CI->eraseFromParent(); 741 return; 742 743 case Intrinsic::objectsize: 744 CI->replaceAllUsesWith(Builder.CreateCall( 745 NewFn, {CI->getArgOperand(0), CI->getArgOperand(1)}, Name)); 746 CI->eraseFromParent(); 747 return; 748 749 case Intrinsic::ctpop: { 750 CI->replaceAllUsesWith(Builder.CreateCall(NewFn, {CI->getArgOperand(0)})); 751 CI->eraseFromParent(); 752 return; 753 } 754 755 case Intrinsic::x86_xop_vfrcz_ss: 756 case Intrinsic::x86_xop_vfrcz_sd: 757 CI->replaceAllUsesWith( 758 Builder.CreateCall(NewFn, {CI->getArgOperand(1)}, Name)); 759 CI->eraseFromParent(); 760 return; 761 762 case Intrinsic::x86_sse41_ptestc: 763 case Intrinsic::x86_sse41_ptestz: 764 case Intrinsic::x86_sse41_ptestnzc: { 765 // The arguments for these intrinsics used to be v4f32, and changed 766 // to v2i64. This is purely a nop, since those are bitwise intrinsics. 767 // So, the only thing required is a bitcast for both arguments. 768 // First, check the arguments have the old type. 769 Value *Arg0 = CI->getArgOperand(0); 770 if (Arg0->getType() != VectorType::get(Type::getFloatTy(C), 4)) 771 return; 772 773 // Old intrinsic, add bitcasts 774 Value *Arg1 = CI->getArgOperand(1); 775 776 Type *NewVecTy = VectorType::get(Type::getInt64Ty(C), 2); 777 778 Value *BC0 = Builder.CreateBitCast(Arg0, NewVecTy, "cast"); 779 Value *BC1 = Builder.CreateBitCast(Arg1, NewVecTy, "cast"); 780 781 CallInst *NewCall = Builder.CreateCall(NewFn, {BC0, BC1}, Name); 782 CI->replaceAllUsesWith(NewCall); 783 CI->eraseFromParent(); 784 return; 785 } 786 787 case Intrinsic::x86_sse41_insertps: 788 case Intrinsic::x86_sse41_dppd: 789 case Intrinsic::x86_sse41_dpps: 790 case Intrinsic::x86_sse41_mpsadbw: 791 case Intrinsic::x86_avx_dp_ps_256: 792 case Intrinsic::x86_avx2_mpsadbw: { 793 // Need to truncate the last argument from i32 to i8 -- this argument models 794 // an inherently 8-bit immediate operand to these x86 instructions. 795 SmallVector<Value *, 4> Args(CI->arg_operands().begin(), 796 CI->arg_operands().end()); 797 798 // Replace the last argument with a trunc. 799 Args.back() = Builder.CreateTrunc(Args.back(), Type::getInt8Ty(C), "trunc"); 800 801 CallInst *NewCall = Builder.CreateCall(NewFn, Args); 802 CI->replaceAllUsesWith(NewCall); 803 CI->eraseFromParent(); 804 return; 805 } 806 807 case Intrinsic::thread_pointer: { 808 CI->replaceAllUsesWith(Builder.CreateCall(NewFn, {})); 809 CI->eraseFromParent(); 810 return; 811 } 812 } 813 } 814 815 void llvm::UpgradeCallsToIntrinsic(Function *F) { 816 assert(F && "Illegal attempt to upgrade a non-existent intrinsic."); 817 818 // Check if this function should be upgraded and get the replacement function 819 // if there is one. 820 Function *NewFn; 821 if (UpgradeIntrinsicFunction(F, NewFn)) { 822 // Replace all users of the old function with the new function or new 823 // instructions. This is not a range loop because the call is deleted. 824 for (auto UI = F->user_begin(), UE = F->user_end(); UI != UE; ) 825 if (CallInst *CI = dyn_cast<CallInst>(*UI++)) 826 UpgradeIntrinsicCall(CI, NewFn); 827 828 // Remove old function, no longer used, from the module. 829 F->eraseFromParent(); 830 } 831 } 832 833 void llvm::UpgradeInstWithTBAATag(Instruction *I) { 834 MDNode *MD = I->getMetadata(LLVMContext::MD_tbaa); 835 assert(MD && "UpgradeInstWithTBAATag should have a TBAA tag"); 836 // Check if the tag uses struct-path aware TBAA format. 837 if (isa<MDNode>(MD->getOperand(0)) && MD->getNumOperands() >= 3) 838 return; 839 840 if (MD->getNumOperands() == 3) { 841 Metadata *Elts[] = {MD->getOperand(0), MD->getOperand(1)}; 842 MDNode *ScalarType = MDNode::get(I->getContext(), Elts); 843 // Create a MDNode <ScalarType, ScalarType, offset 0, const> 844 Metadata *Elts2[] = {ScalarType, ScalarType, 845 ConstantAsMetadata::get(Constant::getNullValue( 846 Type::getInt64Ty(I->getContext()))), 847 MD->getOperand(2)}; 848 I->setMetadata(LLVMContext::MD_tbaa, MDNode::get(I->getContext(), Elts2)); 849 } else { 850 // Create a MDNode <MD, MD, offset 0> 851 Metadata *Elts[] = {MD, MD, ConstantAsMetadata::get(Constant::getNullValue( 852 Type::getInt64Ty(I->getContext())))}; 853 I->setMetadata(LLVMContext::MD_tbaa, MDNode::get(I->getContext(), Elts)); 854 } 855 } 856 857 Instruction *llvm::UpgradeBitCastInst(unsigned Opc, Value *V, Type *DestTy, 858 Instruction *&Temp) { 859 if (Opc != Instruction::BitCast) 860 return nullptr; 861 862 Temp = nullptr; 863 Type *SrcTy = V->getType(); 864 if (SrcTy->isPtrOrPtrVectorTy() && DestTy->isPtrOrPtrVectorTy() && 865 SrcTy->getPointerAddressSpace() != DestTy->getPointerAddressSpace()) { 866 LLVMContext &Context = V->getContext(); 867 868 // We have no information about target data layout, so we assume that 869 // the maximum pointer size is 64bit. 870 Type *MidTy = Type::getInt64Ty(Context); 871 Temp = CastInst::Create(Instruction::PtrToInt, V, MidTy); 872 873 return CastInst::Create(Instruction::IntToPtr, Temp, DestTy); 874 } 875 876 return nullptr; 877 } 878 879 Value *llvm::UpgradeBitCastExpr(unsigned Opc, Constant *C, Type *DestTy) { 880 if (Opc != Instruction::BitCast) 881 return nullptr; 882 883 Type *SrcTy = C->getType(); 884 if (SrcTy->isPtrOrPtrVectorTy() && DestTy->isPtrOrPtrVectorTy() && 885 SrcTy->getPointerAddressSpace() != DestTy->getPointerAddressSpace()) { 886 LLVMContext &Context = C->getContext(); 887 888 // We have no information about target data layout, so we assume that 889 // the maximum pointer size is 64bit. 890 Type *MidTy = Type::getInt64Ty(Context); 891 892 return ConstantExpr::getIntToPtr(ConstantExpr::getPtrToInt(C, MidTy), 893 DestTy); 894 } 895 896 return nullptr; 897 } 898 899 /// Check the debug info version number, if it is out-dated, drop the debug 900 /// info. Return true if module is modified. 901 bool llvm::UpgradeDebugInfo(Module &M) { 902 unsigned Version = getDebugMetadataVersionFromModule(M); 903 if (Version == DEBUG_METADATA_VERSION) 904 return false; 905 906 bool RetCode = StripDebugInfo(M); 907 if (RetCode) { 908 DiagnosticInfoDebugMetadataVersion DiagVersion(M, Version); 909 M.getContext().diagnose(DiagVersion); 910 } 911 return RetCode; 912 } 913 914 static bool isOldLoopArgument(Metadata *MD) { 915 auto *T = dyn_cast_or_null<MDTuple>(MD); 916 if (!T) 917 return false; 918 if (T->getNumOperands() < 1) 919 return false; 920 auto *S = dyn_cast_or_null<MDString>(T->getOperand(0)); 921 if (!S) 922 return false; 923 return S->getString().startswith("llvm.vectorizer."); 924 } 925 926 static MDString *upgradeLoopTag(LLVMContext &C, StringRef OldTag) { 927 StringRef OldPrefix = "llvm.vectorizer."; 928 assert(OldTag.startswith(OldPrefix) && "Expected old prefix"); 929 930 if (OldTag == "llvm.vectorizer.unroll") 931 return MDString::get(C, "llvm.loop.interleave.count"); 932 933 return MDString::get( 934 C, (Twine("llvm.loop.vectorize.") + OldTag.drop_front(OldPrefix.size())) 935 .str()); 936 } 937 938 static Metadata *upgradeLoopArgument(Metadata *MD) { 939 auto *T = dyn_cast_or_null<MDTuple>(MD); 940 if (!T) 941 return MD; 942 if (T->getNumOperands() < 1) 943 return MD; 944 auto *OldTag = dyn_cast_or_null<MDString>(T->getOperand(0)); 945 if (!OldTag) 946 return MD; 947 if (!OldTag->getString().startswith("llvm.vectorizer.")) 948 return MD; 949 950 // This has an old tag. Upgrade it. 951 SmallVector<Metadata *, 8> Ops; 952 Ops.reserve(T->getNumOperands()); 953 Ops.push_back(upgradeLoopTag(T->getContext(), OldTag->getString())); 954 for (unsigned I = 1, E = T->getNumOperands(); I != E; ++I) 955 Ops.push_back(T->getOperand(I)); 956 957 return MDTuple::get(T->getContext(), Ops); 958 } 959 960 MDNode *llvm::upgradeInstructionLoopAttachment(MDNode &N) { 961 auto *T = dyn_cast<MDTuple>(&N); 962 if (!T) 963 return &N; 964 965 if (!llvm::any_of(T->operands(), isOldLoopArgument)) 966 return &N; 967 968 SmallVector<Metadata *, 8> Ops; 969 Ops.reserve(T->getNumOperands()); 970 for (Metadata *MD : T->operands()) 971 Ops.push_back(upgradeLoopArgument(MD)); 972 973 return MDTuple::get(T->getContext(), Ops); 974 } 975