1 //===-- AutoUpgrade.cpp - Implement auto-upgrade helper functions ---------===// 2 // 3 // The LLVM Compiler Infrastructure 4 // 5 // This file is distributed under the University of Illinois Open Source 6 // License. See LICENSE.TXT for details. 7 // 8 //===----------------------------------------------------------------------===// 9 // 10 // This file implements the auto-upgrade helper functions. 11 // This is where deprecated IR intrinsics and other IR features are updated to 12 // current specifications. 13 // 14 //===----------------------------------------------------------------------===// 15 16 #include "llvm/IR/AutoUpgrade.h" 17 #include "llvm/IR/CFG.h" 18 #include "llvm/IR/CallSite.h" 19 #include "llvm/IR/Constants.h" 20 #include "llvm/IR/DIBuilder.h" 21 #include "llvm/IR/DebugInfo.h" 22 #include "llvm/IR/DiagnosticInfo.h" 23 #include "llvm/IR/Function.h" 24 #include "llvm/IR/IRBuilder.h" 25 #include "llvm/IR/Instruction.h" 26 #include "llvm/IR/IntrinsicInst.h" 27 #include "llvm/IR/LLVMContext.h" 28 #include "llvm/IR/Module.h" 29 #include "llvm/Support/ErrorHandling.h" 30 #include "llvm/Support/Regex.h" 31 #include <cstring> 32 using namespace llvm; 33 34 // Upgrade the declarations of the SSE4.1 functions whose arguments have 35 // changed their type from v4f32 to v2i64. 36 static bool UpgradeSSE41Function(Function* F, Intrinsic::ID IID, 37 Function *&NewFn) { 38 // Check whether this is an old version of the function, which received 39 // v4f32 arguments. 40 Type *Arg0Type = F->getFunctionType()->getParamType(0); 41 if (Arg0Type != VectorType::get(Type::getFloatTy(F->getContext()), 4)) 42 return false; 43 44 // Yes, it's old, replace it with new version. 45 F->setName(F->getName() + ".old"); 46 NewFn = Intrinsic::getDeclaration(F->getParent(), IID); 47 return true; 48 } 49 50 // Upgrade the declarations of intrinsic functions whose 8-bit immediate mask 51 // arguments have changed their type from i32 to i8. 52 static bool UpgradeX86IntrinsicsWith8BitMask(Function *F, Intrinsic::ID IID, 53 Function *&NewFn) { 54 // Check that the last argument is an i32. 55 Type *LastArgType = F->getFunctionType()->getParamType( 56 F->getFunctionType()->getNumParams() - 1); 57 if (!LastArgType->isIntegerTy(32)) 58 return false; 59 60 // Move this function aside and map down. 61 F->setName(F->getName() + ".old"); 62 NewFn = Intrinsic::getDeclaration(F->getParent(), IID); 63 return true; 64 } 65 66 static bool UpgradeIntrinsicFunction1(Function *F, Function *&NewFn) { 67 assert(F && "Illegal to upgrade a non-existent Function."); 68 69 // Quickly eliminate it, if it's not a candidate. 70 StringRef Name = F->getName(); 71 if (Name.size() <= 8 || !Name.startswith("llvm.")) 72 return false; 73 Name = Name.substr(5); // Strip off "llvm." 74 75 switch (Name[0]) { 76 default: break; 77 case 'a': { 78 if (Name.startswith("arm.neon.vclz")) { 79 Type* args[2] = { 80 F->arg_begin()->getType(), 81 Type::getInt1Ty(F->getContext()) 82 }; 83 // Can't use Intrinsic::getDeclaration here as it adds a ".i1" to 84 // the end of the name. Change name from llvm.arm.neon.vclz.* to 85 // llvm.ctlz.* 86 FunctionType* fType = FunctionType::get(F->getReturnType(), args, false); 87 NewFn = Function::Create(fType, F->getLinkage(), 88 "llvm.ctlz." + Name.substr(14), F->getParent()); 89 return true; 90 } 91 if (Name.startswith("arm.neon.vcnt")) { 92 NewFn = Intrinsic::getDeclaration(F->getParent(), Intrinsic::ctpop, 93 F->arg_begin()->getType()); 94 return true; 95 } 96 Regex vldRegex("^arm\\.neon\\.vld([1234]|[234]lane)\\.v[a-z0-9]*$"); 97 if (vldRegex.match(Name)) { 98 auto fArgs = F->getFunctionType()->params(); 99 SmallVector<Type *, 4> Tys(fArgs.begin(), fArgs.end()); 100 // Can't use Intrinsic::getDeclaration here as the return types might 101 // then only be structurally equal. 102 FunctionType* fType = FunctionType::get(F->getReturnType(), Tys, false); 103 NewFn = Function::Create(fType, F->getLinkage(), 104 "llvm." + Name + ".p0i8", F->getParent()); 105 return true; 106 } 107 Regex vstRegex("^arm\\.neon\\.vst([1234]|[234]lane)\\.v[a-z0-9]*$"); 108 if (vstRegex.match(Name)) { 109 static Intrinsic::ID StoreInts[] = {Intrinsic::arm_neon_vst1, 110 Intrinsic::arm_neon_vst2, 111 Intrinsic::arm_neon_vst3, 112 Intrinsic::arm_neon_vst4}; 113 114 static Intrinsic::ID StoreLaneInts[] = {Intrinsic::arm_neon_vst2lane, 115 Intrinsic::arm_neon_vst3lane, 116 Intrinsic::arm_neon_vst4lane}; 117 118 auto fArgs = F->getFunctionType()->params(); 119 Type *Tys[] = {fArgs[0], fArgs[1]}; 120 if (Name.find("lane") == StringRef::npos) 121 NewFn = Intrinsic::getDeclaration(F->getParent(), 122 StoreInts[fArgs.size() - 3], Tys); 123 else 124 NewFn = Intrinsic::getDeclaration(F->getParent(), 125 StoreLaneInts[fArgs.size() - 5], Tys); 126 return true; 127 } 128 break; 129 } 130 131 case 'c': { 132 if (Name.startswith("ctlz.") && F->arg_size() == 1) { 133 F->setName(Name + ".old"); 134 NewFn = Intrinsic::getDeclaration(F->getParent(), Intrinsic::ctlz, 135 F->arg_begin()->getType()); 136 return true; 137 } 138 if (Name.startswith("cttz.") && F->arg_size() == 1) { 139 F->setName(Name + ".old"); 140 NewFn = Intrinsic::getDeclaration(F->getParent(), Intrinsic::cttz, 141 F->arg_begin()->getType()); 142 return true; 143 } 144 break; 145 } 146 147 case 'o': 148 // We only need to change the name to match the mangling including the 149 // address space. 150 if (F->arg_size() == 2 && Name.startswith("objectsize.")) { 151 Type *Tys[2] = { F->getReturnType(), F->arg_begin()->getType() }; 152 if (F->getName() != Intrinsic::getName(Intrinsic::objectsize, Tys)) { 153 F->setName(Name + ".old"); 154 NewFn = Intrinsic::getDeclaration(F->getParent(), 155 Intrinsic::objectsize, Tys); 156 return true; 157 } 158 } 159 break; 160 161 case 'x': { 162 if (Name.startswith("x86.sse2.pcmpeq.") || 163 Name.startswith("x86.sse2.pcmpgt.") || 164 Name.startswith("x86.avx2.pcmpeq.") || 165 Name.startswith("x86.avx2.pcmpgt.") || 166 Name.startswith("x86.avx2.vbroadcast") || 167 Name.startswith("x86.avx2.pbroadcast") || 168 Name.startswith("x86.avx.vpermil.") || 169 Name.startswith("x86.sse41.pmovsx") || 170 Name == "x86.avx.vinsertf128.pd.256" || 171 Name == "x86.avx.vinsertf128.ps.256" || 172 Name == "x86.avx.vinsertf128.si.256" || 173 Name == "x86.avx2.vinserti128" || 174 Name == "x86.avx.vextractf128.pd.256" || 175 Name == "x86.avx.vextractf128.ps.256" || 176 Name == "x86.avx.vextractf128.si.256" || 177 Name == "x86.avx2.vextracti128" || 178 Name == "x86.avx.movnt.dq.256" || 179 Name == "x86.avx.movnt.pd.256" || 180 Name == "x86.avx.movnt.ps.256" || 181 Name == "x86.sse42.crc32.64.8" || 182 Name == "x86.avx.vbroadcast.ss" || 183 Name == "x86.avx.vbroadcast.ss.256" || 184 Name == "x86.avx.vbroadcast.sd.256" || 185 Name == "x86.sse2.psll.dq" || 186 Name == "x86.sse2.psrl.dq" || 187 Name == "x86.avx2.psll.dq" || 188 Name == "x86.avx2.psrl.dq" || 189 Name == "x86.sse2.psll.dq.bs" || 190 Name == "x86.sse2.psrl.dq.bs" || 191 Name == "x86.avx2.psll.dq.bs" || 192 Name == "x86.avx2.psrl.dq.bs" || 193 Name == "x86.sse41.pblendw" || 194 Name == "x86.sse41.blendpd" || 195 Name == "x86.sse41.blendps" || 196 Name == "x86.avx.blend.pd.256" || 197 Name == "x86.avx.blend.ps.256" || 198 Name == "x86.avx2.pblendw" || 199 Name == "x86.avx2.pblendd.128" || 200 Name == "x86.avx2.pblendd.256" || 201 Name == "x86.avx2.vbroadcasti128" || 202 (Name.startswith("x86.xop.vpcom") && F->arg_size() == 2)) { 203 NewFn = nullptr; 204 return true; 205 } 206 // SSE4.1 ptest functions may have an old signature. 207 if (Name.startswith("x86.sse41.ptest")) { 208 if (Name == "x86.sse41.ptestc") 209 return UpgradeSSE41Function(F, Intrinsic::x86_sse41_ptestc, NewFn); 210 if (Name == "x86.sse41.ptestz") 211 return UpgradeSSE41Function(F, Intrinsic::x86_sse41_ptestz, NewFn); 212 if (Name == "x86.sse41.ptestnzc") 213 return UpgradeSSE41Function(F, Intrinsic::x86_sse41_ptestnzc, NewFn); 214 } 215 // Several blend and other instructions with masks used the wrong number of 216 // bits. 217 if (Name == "x86.sse41.insertps") 218 return UpgradeX86IntrinsicsWith8BitMask(F, Intrinsic::x86_sse41_insertps, 219 NewFn); 220 if (Name == "x86.sse41.dppd") 221 return UpgradeX86IntrinsicsWith8BitMask(F, Intrinsic::x86_sse41_dppd, 222 NewFn); 223 if (Name == "x86.sse41.dpps") 224 return UpgradeX86IntrinsicsWith8BitMask(F, Intrinsic::x86_sse41_dpps, 225 NewFn); 226 if (Name == "x86.sse41.mpsadbw") 227 return UpgradeX86IntrinsicsWith8BitMask(F, Intrinsic::x86_sse41_mpsadbw, 228 NewFn); 229 if (Name == "x86.avx.dp.ps.256") 230 return UpgradeX86IntrinsicsWith8BitMask(F, Intrinsic::x86_avx_dp_ps_256, 231 NewFn); 232 if (Name == "x86.avx2.mpsadbw") 233 return UpgradeX86IntrinsicsWith8BitMask(F, Intrinsic::x86_avx2_mpsadbw, 234 NewFn); 235 236 // frcz.ss/sd may need to have an argument dropped 237 if (Name.startswith("x86.xop.vfrcz.ss") && F->arg_size() == 2) { 238 F->setName(Name + ".old"); 239 NewFn = Intrinsic::getDeclaration(F->getParent(), 240 Intrinsic::x86_xop_vfrcz_ss); 241 return true; 242 } 243 if (Name.startswith("x86.xop.vfrcz.sd") && F->arg_size() == 2) { 244 F->setName(Name + ".old"); 245 NewFn = Intrinsic::getDeclaration(F->getParent(), 246 Intrinsic::x86_xop_vfrcz_sd); 247 return true; 248 } 249 // Fix the FMA4 intrinsics to remove the 4 250 if (Name.startswith("x86.fma4.")) { 251 F->setName("llvm.x86.fma" + Name.substr(8)); 252 NewFn = F; 253 return true; 254 } 255 break; 256 } 257 } 258 259 // This may not belong here. This function is effectively being overloaded 260 // to both detect an intrinsic which needs upgrading, and to provide the 261 // upgraded form of the intrinsic. We should perhaps have two separate 262 // functions for this. 263 return false; 264 } 265 266 bool llvm::UpgradeIntrinsicFunction(Function *F, Function *&NewFn) { 267 NewFn = nullptr; 268 bool Upgraded = UpgradeIntrinsicFunction1(F, NewFn); 269 assert(F != NewFn && "Intrinsic function upgraded to the same function"); 270 271 // Upgrade intrinsic attributes. This does not change the function. 272 if (NewFn) 273 F = NewFn; 274 if (Intrinsic::ID id = F->getIntrinsicID()) 275 F->setAttributes(Intrinsic::getAttributes(F->getContext(), id)); 276 return Upgraded; 277 } 278 279 bool llvm::UpgradeGlobalVariable(GlobalVariable *GV) { 280 // Nothing to do yet. 281 return false; 282 } 283 284 // Handles upgrading SSE2 and AVX2 PSLLDQ intrinsics by converting them 285 // to byte shuffles. 286 static Value *UpgradeX86PSLLDQIntrinsics(IRBuilder<> &Builder, LLVMContext &C, 287 Value *Op, unsigned NumLanes, 288 unsigned Shift) { 289 // Each lane is 16 bytes. 290 unsigned NumElts = NumLanes * 16; 291 292 // Bitcast from a 64-bit element type to a byte element type. 293 Op = Builder.CreateBitCast(Op, 294 VectorType::get(Type::getInt8Ty(C), NumElts), 295 "cast"); 296 // We'll be shuffling in zeroes. 297 Value *Res = ConstantVector::getSplat(NumElts, Builder.getInt8(0)); 298 299 // If shift is less than 16, emit a shuffle to move the bytes. Otherwise, 300 // we'll just return the zero vector. 301 if (Shift < 16) { 302 SmallVector<Constant*, 32> Idxs; 303 // 256-bit version is split into two 16-byte lanes. 304 for (unsigned l = 0; l != NumElts; l += 16) 305 for (unsigned i = 0; i != 16; ++i) { 306 unsigned Idx = NumElts + i - Shift; 307 if (Idx < NumElts) 308 Idx -= NumElts - 16; // end of lane, switch operand. 309 Idxs.push_back(Builder.getInt32(Idx + l)); 310 } 311 312 Res = Builder.CreateShuffleVector(Res, Op, ConstantVector::get(Idxs)); 313 } 314 315 // Bitcast back to a 64-bit element type. 316 return Builder.CreateBitCast(Res, 317 VectorType::get(Type::getInt64Ty(C), 2*NumLanes), 318 "cast"); 319 } 320 321 // Handles upgrading SSE2 and AVX2 PSRLDQ intrinsics by converting them 322 // to byte shuffles. 323 static Value *UpgradeX86PSRLDQIntrinsics(IRBuilder<> &Builder, LLVMContext &C, 324 Value *Op, unsigned NumLanes, 325 unsigned Shift) { 326 // Each lane is 16 bytes. 327 unsigned NumElts = NumLanes * 16; 328 329 // Bitcast from a 64-bit element type to a byte element type. 330 Op = Builder.CreateBitCast(Op, 331 VectorType::get(Type::getInt8Ty(C), NumElts), 332 "cast"); 333 // We'll be shuffling in zeroes. 334 Value *Res = ConstantVector::getSplat(NumElts, Builder.getInt8(0)); 335 336 // If shift is less than 16, emit a shuffle to move the bytes. Otherwise, 337 // we'll just return the zero vector. 338 if (Shift < 16) { 339 SmallVector<Constant*, 32> Idxs; 340 // 256-bit version is split into two 16-byte lanes. 341 for (unsigned l = 0; l != NumElts; l += 16) 342 for (unsigned i = 0; i != 16; ++i) { 343 unsigned Idx = i + Shift; 344 if (Idx >= 16) 345 Idx += NumElts - 16; // end of lane, switch operand. 346 Idxs.push_back(Builder.getInt32(Idx + l)); 347 } 348 349 Res = Builder.CreateShuffleVector(Op, Res, ConstantVector::get(Idxs)); 350 } 351 352 // Bitcast back to a 64-bit element type. 353 return Builder.CreateBitCast(Res, 354 VectorType::get(Type::getInt64Ty(C), 2*NumLanes), 355 "cast"); 356 } 357 358 // UpgradeIntrinsicCall - Upgrade a call to an old intrinsic to be a call the 359 // upgraded intrinsic. All argument and return casting must be provided in 360 // order to seamlessly integrate with existing context. 361 void llvm::UpgradeIntrinsicCall(CallInst *CI, Function *NewFn) { 362 Function *F = CI->getCalledFunction(); 363 LLVMContext &C = CI->getContext(); 364 IRBuilder<> Builder(C); 365 Builder.SetInsertPoint(CI->getParent(), CI); 366 367 assert(F && "Intrinsic call is not direct?"); 368 369 if (!NewFn) { 370 // Get the Function's name. 371 StringRef Name = F->getName(); 372 373 Value *Rep; 374 // Upgrade packed integer vector compares intrinsics to compare instructions 375 if (Name.startswith("llvm.x86.sse2.pcmpeq.") || 376 Name.startswith("llvm.x86.avx2.pcmpeq.")) { 377 Rep = Builder.CreateICmpEQ(CI->getArgOperand(0), CI->getArgOperand(1), 378 "pcmpeq"); 379 // need to sign extend since icmp returns vector of i1 380 Rep = Builder.CreateSExt(Rep, CI->getType(), ""); 381 } else if (Name.startswith("llvm.x86.sse2.pcmpgt.") || 382 Name.startswith("llvm.x86.avx2.pcmpgt.")) { 383 Rep = Builder.CreateICmpSGT(CI->getArgOperand(0), CI->getArgOperand(1), 384 "pcmpgt"); 385 // need to sign extend since icmp returns vector of i1 386 Rep = Builder.CreateSExt(Rep, CI->getType(), ""); 387 } else if (Name == "llvm.x86.avx.movnt.dq.256" || 388 Name == "llvm.x86.avx.movnt.ps.256" || 389 Name == "llvm.x86.avx.movnt.pd.256") { 390 IRBuilder<> Builder(C); 391 Builder.SetInsertPoint(CI->getParent(), CI); 392 393 Module *M = F->getParent(); 394 SmallVector<Metadata *, 1> Elts; 395 Elts.push_back( 396 ConstantAsMetadata::get(ConstantInt::get(Type::getInt32Ty(C), 1))); 397 MDNode *Node = MDNode::get(C, Elts); 398 399 Value *Arg0 = CI->getArgOperand(0); 400 Value *Arg1 = CI->getArgOperand(1); 401 402 // Convert the type of the pointer to a pointer to the stored type. 403 Value *BC = Builder.CreateBitCast(Arg0, 404 PointerType::getUnqual(Arg1->getType()), 405 "cast"); 406 StoreInst *SI = Builder.CreateStore(Arg1, BC); 407 SI->setMetadata(M->getMDKindID("nontemporal"), Node); 408 SI->setAlignment(32); 409 410 // Remove intrinsic. 411 CI->eraseFromParent(); 412 return; 413 } else if (Name.startswith("llvm.x86.xop.vpcom")) { 414 Intrinsic::ID intID; 415 if (Name.endswith("ub")) 416 intID = Intrinsic::x86_xop_vpcomub; 417 else if (Name.endswith("uw")) 418 intID = Intrinsic::x86_xop_vpcomuw; 419 else if (Name.endswith("ud")) 420 intID = Intrinsic::x86_xop_vpcomud; 421 else if (Name.endswith("uq")) 422 intID = Intrinsic::x86_xop_vpcomuq; 423 else if (Name.endswith("b")) 424 intID = Intrinsic::x86_xop_vpcomb; 425 else if (Name.endswith("w")) 426 intID = Intrinsic::x86_xop_vpcomw; 427 else if (Name.endswith("d")) 428 intID = Intrinsic::x86_xop_vpcomd; 429 else if (Name.endswith("q")) 430 intID = Intrinsic::x86_xop_vpcomq; 431 else 432 llvm_unreachable("Unknown suffix"); 433 434 Name = Name.substr(18); // strip off "llvm.x86.xop.vpcom" 435 unsigned Imm; 436 if (Name.startswith("lt")) 437 Imm = 0; 438 else if (Name.startswith("le")) 439 Imm = 1; 440 else if (Name.startswith("gt")) 441 Imm = 2; 442 else if (Name.startswith("ge")) 443 Imm = 3; 444 else if (Name.startswith("eq")) 445 Imm = 4; 446 else if (Name.startswith("ne")) 447 Imm = 5; 448 else if (Name.startswith("false")) 449 Imm = 6; 450 else if (Name.startswith("true")) 451 Imm = 7; 452 else 453 llvm_unreachable("Unknown condition"); 454 455 Function *VPCOM = Intrinsic::getDeclaration(F->getParent(), intID); 456 Rep = 457 Builder.CreateCall(VPCOM, {CI->getArgOperand(0), CI->getArgOperand(1), 458 Builder.getInt8(Imm)}); 459 } else if (Name == "llvm.x86.sse42.crc32.64.8") { 460 Function *CRC32 = Intrinsic::getDeclaration(F->getParent(), 461 Intrinsic::x86_sse42_crc32_32_8); 462 Value *Trunc0 = Builder.CreateTrunc(CI->getArgOperand(0), Type::getInt32Ty(C)); 463 Rep = Builder.CreateCall(CRC32, {Trunc0, CI->getArgOperand(1)}); 464 Rep = Builder.CreateZExt(Rep, CI->getType(), ""); 465 } else if (Name.startswith("llvm.x86.avx.vbroadcast")) { 466 // Replace broadcasts with a series of insertelements. 467 Type *VecTy = CI->getType(); 468 Type *EltTy = VecTy->getVectorElementType(); 469 unsigned EltNum = VecTy->getVectorNumElements(); 470 Value *Cast = Builder.CreateBitCast(CI->getArgOperand(0), 471 EltTy->getPointerTo()); 472 Value *Load = Builder.CreateLoad(EltTy, Cast); 473 Type *I32Ty = Type::getInt32Ty(C); 474 Rep = UndefValue::get(VecTy); 475 for (unsigned I = 0; I < EltNum; ++I) 476 Rep = Builder.CreateInsertElement(Rep, Load, 477 ConstantInt::get(I32Ty, I)); 478 } else if (Name.startswith("llvm.x86.sse41.pmovsx")) { 479 VectorType *SrcTy = cast<VectorType>(CI->getArgOperand(0)->getType()); 480 VectorType *DstTy = cast<VectorType>(CI->getType()); 481 unsigned NumDstElts = DstTy->getNumElements(); 482 483 // Extract a subvector of the first NumDstElts lanes and sign extend. 484 SmallVector<int, 8> ShuffleMask; 485 for (int i = 0; i != (int)NumDstElts; ++i) 486 ShuffleMask.push_back(i); 487 488 Value *SV = Builder.CreateShuffleVector( 489 CI->getArgOperand(0), UndefValue::get(SrcTy), ShuffleMask); 490 Rep = Builder.CreateSExt(SV, DstTy); 491 } else if (Name == "llvm.x86.avx2.vbroadcasti128") { 492 // Replace vbroadcasts with a vector shuffle. 493 Type *VT = VectorType::get(Type::getInt64Ty(C), 2); 494 Value *Op = Builder.CreatePointerCast(CI->getArgOperand(0), 495 PointerType::getUnqual(VT)); 496 Value *Load = Builder.CreateLoad(VT, Op); 497 const int Idxs[4] = { 0, 1, 0, 1 }; 498 Rep = Builder.CreateShuffleVector(Load, UndefValue::get(Load->getType()), 499 Idxs); 500 } else if (Name.startswith("llvm.x86.avx2.pbroadcast") || 501 Name.startswith("llvm.x86.avx2.vbroadcast")) { 502 // Replace vp?broadcasts with a vector shuffle. 503 Value *Op = CI->getArgOperand(0); 504 unsigned NumElts = CI->getType()->getVectorNumElements(); 505 Type *MaskTy = VectorType::get(Type::getInt32Ty(C), NumElts); 506 Rep = Builder.CreateShuffleVector(Op, UndefValue::get(Op->getType()), 507 Constant::getNullValue(MaskTy)); 508 } else if (Name == "llvm.x86.sse2.psll.dq") { 509 // 128-bit shift left specified in bits. 510 unsigned Shift = cast<ConstantInt>(CI->getArgOperand(1))->getZExtValue(); 511 Rep = UpgradeX86PSLLDQIntrinsics(Builder, C, CI->getArgOperand(0), 1, 512 Shift / 8); // Shift is in bits. 513 } else if (Name == "llvm.x86.sse2.psrl.dq") { 514 // 128-bit shift right specified in bits. 515 unsigned Shift = cast<ConstantInt>(CI->getArgOperand(1))->getZExtValue(); 516 Rep = UpgradeX86PSRLDQIntrinsics(Builder, C, CI->getArgOperand(0), 1, 517 Shift / 8); // Shift is in bits. 518 } else if (Name == "llvm.x86.avx2.psll.dq") { 519 // 256-bit shift left specified in bits. 520 unsigned Shift = cast<ConstantInt>(CI->getArgOperand(1))->getZExtValue(); 521 Rep = UpgradeX86PSLLDQIntrinsics(Builder, C, CI->getArgOperand(0), 2, 522 Shift / 8); // Shift is in bits. 523 } else if (Name == "llvm.x86.avx2.psrl.dq") { 524 // 256-bit shift right specified in bits. 525 unsigned Shift = cast<ConstantInt>(CI->getArgOperand(1))->getZExtValue(); 526 Rep = UpgradeX86PSRLDQIntrinsics(Builder, C, CI->getArgOperand(0), 2, 527 Shift / 8); // Shift is in bits. 528 } else if (Name == "llvm.x86.sse2.psll.dq.bs") { 529 // 128-bit shift left specified in bytes. 530 unsigned Shift = cast<ConstantInt>(CI->getArgOperand(1))->getZExtValue(); 531 Rep = UpgradeX86PSLLDQIntrinsics(Builder, C, CI->getArgOperand(0), 1, 532 Shift); 533 } else if (Name == "llvm.x86.sse2.psrl.dq.bs") { 534 // 128-bit shift right specified in bytes. 535 unsigned Shift = cast<ConstantInt>(CI->getArgOperand(1))->getZExtValue(); 536 Rep = UpgradeX86PSRLDQIntrinsics(Builder, C, CI->getArgOperand(0), 1, 537 Shift); 538 } else if (Name == "llvm.x86.avx2.psll.dq.bs") { 539 // 256-bit shift left specified in bytes. 540 unsigned Shift = cast<ConstantInt>(CI->getArgOperand(1))->getZExtValue(); 541 Rep = UpgradeX86PSLLDQIntrinsics(Builder, C, CI->getArgOperand(0), 2, 542 Shift); 543 } else if (Name == "llvm.x86.avx2.psrl.dq.bs") { 544 // 256-bit shift right specified in bytes. 545 unsigned Shift = cast<ConstantInt>(CI->getArgOperand(1))->getZExtValue(); 546 Rep = UpgradeX86PSRLDQIntrinsics(Builder, C, CI->getArgOperand(0), 2, 547 Shift); 548 } else if (Name == "llvm.x86.sse41.pblendw" || 549 Name == "llvm.x86.sse41.blendpd" || 550 Name == "llvm.x86.sse41.blendps" || 551 Name == "llvm.x86.avx.blend.pd.256" || 552 Name == "llvm.x86.avx.blend.ps.256" || 553 Name == "llvm.x86.avx2.pblendw" || 554 Name == "llvm.x86.avx2.pblendd.128" || 555 Name == "llvm.x86.avx2.pblendd.256") { 556 Value *Op0 = CI->getArgOperand(0); 557 Value *Op1 = CI->getArgOperand(1); 558 unsigned Imm = cast <ConstantInt>(CI->getArgOperand(2))->getZExtValue(); 559 VectorType *VecTy = cast<VectorType>(CI->getType()); 560 unsigned NumElts = VecTy->getNumElements(); 561 562 SmallVector<Constant*, 16> Idxs; 563 for (unsigned i = 0; i != NumElts; ++i) { 564 unsigned Idx = ((Imm >> (i%8)) & 1) ? i + NumElts : i; 565 Idxs.push_back(Builder.getInt32(Idx)); 566 } 567 568 Rep = Builder.CreateShuffleVector(Op0, Op1, ConstantVector::get(Idxs)); 569 } else if (Name == "llvm.x86.avx.vinsertf128.pd.256" || 570 Name == "llvm.x86.avx.vinsertf128.ps.256" || 571 Name == "llvm.x86.avx.vinsertf128.si.256" || 572 Name == "llvm.x86.avx2.vinserti128") { 573 Value *Op0 = CI->getArgOperand(0); 574 Value *Op1 = CI->getArgOperand(1); 575 unsigned Imm = cast<ConstantInt>(CI->getArgOperand(2))->getZExtValue(); 576 VectorType *VecTy = cast<VectorType>(CI->getType()); 577 unsigned NumElts = VecTy->getNumElements(); 578 579 // Mask off the high bits of the immediate value; hardware ignores those. 580 Imm = Imm & 1; 581 582 // Extend the second operand into a vector that is twice as big. 583 Value *UndefV = UndefValue::get(Op1->getType()); 584 SmallVector<Constant*, 8> Idxs; 585 for (unsigned i = 0; i != NumElts; ++i) { 586 Idxs.push_back(Builder.getInt32(i)); 587 } 588 Rep = Builder.CreateShuffleVector(Op1, UndefV, ConstantVector::get(Idxs)); 589 590 // Insert the second operand into the first operand. 591 592 // Note that there is no guarantee that instruction lowering will actually 593 // produce a vinsertf128 instruction for the created shuffles. In 594 // particular, the 0 immediate case involves no lane changes, so it can 595 // be handled as a blend. 596 597 // Example of shuffle mask for 32-bit elements: 598 // Imm = 1 <i32 0, i32 1, i32 2, i32 3, i32 8, i32 9, i32 10, i32 11> 599 // Imm = 0 <i32 8, i32 9, i32 10, i32 11, i32 4, i32 5, i32 6, i32 7 > 600 601 SmallVector<Constant*, 8> Idxs2; 602 // The low half of the result is either the low half of the 1st operand 603 // or the low half of the 2nd operand (the inserted vector). 604 for (unsigned i = 0; i != NumElts / 2; ++i) { 605 unsigned Idx = Imm ? i : (i + NumElts); 606 Idxs2.push_back(Builder.getInt32(Idx)); 607 } 608 // The high half of the result is either the low half of the 2nd operand 609 // (the inserted vector) or the high half of the 1st operand. 610 for (unsigned i = NumElts / 2; i != NumElts; ++i) { 611 unsigned Idx = Imm ? (i + NumElts / 2) : i; 612 Idxs2.push_back(Builder.getInt32(Idx)); 613 } 614 Rep = Builder.CreateShuffleVector(Op0, Rep, ConstantVector::get(Idxs2)); 615 } else if (Name == "llvm.x86.avx.vextractf128.pd.256" || 616 Name == "llvm.x86.avx.vextractf128.ps.256" || 617 Name == "llvm.x86.avx.vextractf128.si.256" || 618 Name == "llvm.x86.avx2.vextracti128") { 619 Value *Op0 = CI->getArgOperand(0); 620 unsigned Imm = cast<ConstantInt>(CI->getArgOperand(1))->getZExtValue(); 621 VectorType *VecTy = cast<VectorType>(CI->getType()); 622 unsigned NumElts = VecTy->getNumElements(); 623 624 // Mask off the high bits of the immediate value; hardware ignores those. 625 Imm = Imm & 1; 626 627 // Get indexes for either the high half or low half of the input vector. 628 SmallVector<Constant*, 4> Idxs(NumElts); 629 for (unsigned i = 0; i != NumElts; ++i) { 630 unsigned Idx = Imm ? (i + NumElts) : i; 631 Idxs[i] = Builder.getInt32(Idx); 632 } 633 634 Value *UndefV = UndefValue::get(Op0->getType()); 635 Rep = Builder.CreateShuffleVector(Op0, UndefV, ConstantVector::get(Idxs)); 636 } else { 637 bool PD128 = false, PD256 = false, PS128 = false, PS256 = false; 638 if (Name == "llvm.x86.avx.vpermil.pd.256") 639 PD256 = true; 640 else if (Name == "llvm.x86.avx.vpermil.pd") 641 PD128 = true; 642 else if (Name == "llvm.x86.avx.vpermil.ps.256") 643 PS256 = true; 644 else if (Name == "llvm.x86.avx.vpermil.ps") 645 PS128 = true; 646 647 if (PD256 || PD128 || PS256 || PS128) { 648 Value *Op0 = CI->getArgOperand(0); 649 unsigned Imm = cast<ConstantInt>(CI->getArgOperand(1))->getZExtValue(); 650 SmallVector<Constant*, 8> Idxs; 651 652 if (PD128) 653 for (unsigned i = 0; i != 2; ++i) 654 Idxs.push_back(Builder.getInt32((Imm >> i) & 0x1)); 655 else if (PD256) 656 for (unsigned l = 0; l != 4; l+=2) 657 for (unsigned i = 0; i != 2; ++i) 658 Idxs.push_back(Builder.getInt32(((Imm >> (l+i)) & 0x1) + l)); 659 else if (PS128) 660 for (unsigned i = 0; i != 4; ++i) 661 Idxs.push_back(Builder.getInt32((Imm >> (2 * i)) & 0x3)); 662 else if (PS256) 663 for (unsigned l = 0; l != 8; l+=4) 664 for (unsigned i = 0; i != 4; ++i) 665 Idxs.push_back(Builder.getInt32(((Imm >> (2 * i)) & 0x3) + l)); 666 else 667 llvm_unreachable("Unexpected function"); 668 669 Rep = Builder.CreateShuffleVector(Op0, Op0, ConstantVector::get(Idxs)); 670 } else { 671 llvm_unreachable("Unknown function for CallInst upgrade."); 672 } 673 } 674 675 CI->replaceAllUsesWith(Rep); 676 CI->eraseFromParent(); 677 return; 678 } 679 680 std::string Name = CI->getName(); 681 if (!Name.empty()) 682 CI->setName(Name + ".old"); 683 684 switch (NewFn->getIntrinsicID()) { 685 default: 686 llvm_unreachable("Unknown function for CallInst upgrade."); 687 688 case Intrinsic::arm_neon_vld1: 689 case Intrinsic::arm_neon_vld2: 690 case Intrinsic::arm_neon_vld3: 691 case Intrinsic::arm_neon_vld4: 692 case Intrinsic::arm_neon_vld2lane: 693 case Intrinsic::arm_neon_vld3lane: 694 case Intrinsic::arm_neon_vld4lane: 695 case Intrinsic::arm_neon_vst1: 696 case Intrinsic::arm_neon_vst2: 697 case Intrinsic::arm_neon_vst3: 698 case Intrinsic::arm_neon_vst4: 699 case Intrinsic::arm_neon_vst2lane: 700 case Intrinsic::arm_neon_vst3lane: 701 case Intrinsic::arm_neon_vst4lane: { 702 SmallVector<Value *, 4> Args(CI->arg_operands().begin(), 703 CI->arg_operands().end()); 704 CI->replaceAllUsesWith(Builder.CreateCall(NewFn, Args)); 705 CI->eraseFromParent(); 706 return; 707 } 708 709 case Intrinsic::ctlz: 710 case Intrinsic::cttz: 711 assert(CI->getNumArgOperands() == 1 && 712 "Mismatch between function args and call args"); 713 CI->replaceAllUsesWith(Builder.CreateCall( 714 NewFn, {CI->getArgOperand(0), Builder.getFalse()}, Name)); 715 CI->eraseFromParent(); 716 return; 717 718 case Intrinsic::objectsize: 719 CI->replaceAllUsesWith(Builder.CreateCall( 720 NewFn, {CI->getArgOperand(0), CI->getArgOperand(1)}, Name)); 721 CI->eraseFromParent(); 722 return; 723 724 case Intrinsic::ctpop: { 725 CI->replaceAllUsesWith(Builder.CreateCall(NewFn, {CI->getArgOperand(0)})); 726 CI->eraseFromParent(); 727 return; 728 } 729 730 case Intrinsic::x86_xop_vfrcz_ss: 731 case Intrinsic::x86_xop_vfrcz_sd: 732 CI->replaceAllUsesWith( 733 Builder.CreateCall(NewFn, {CI->getArgOperand(1)}, Name)); 734 CI->eraseFromParent(); 735 return; 736 737 case Intrinsic::x86_sse41_ptestc: 738 case Intrinsic::x86_sse41_ptestz: 739 case Intrinsic::x86_sse41_ptestnzc: { 740 // The arguments for these intrinsics used to be v4f32, and changed 741 // to v2i64. This is purely a nop, since those are bitwise intrinsics. 742 // So, the only thing required is a bitcast for both arguments. 743 // First, check the arguments have the old type. 744 Value *Arg0 = CI->getArgOperand(0); 745 if (Arg0->getType() != VectorType::get(Type::getFloatTy(C), 4)) 746 return; 747 748 // Old intrinsic, add bitcasts 749 Value *Arg1 = CI->getArgOperand(1); 750 751 Type *NewVecTy = VectorType::get(Type::getInt64Ty(C), 2); 752 753 Value *BC0 = Builder.CreateBitCast(Arg0, NewVecTy, "cast"); 754 Value *BC1 = Builder.CreateBitCast(Arg1, NewVecTy, "cast"); 755 756 CallInst *NewCall = Builder.CreateCall(NewFn, {BC0, BC1}, Name); 757 CI->replaceAllUsesWith(NewCall); 758 CI->eraseFromParent(); 759 return; 760 } 761 762 case Intrinsic::x86_sse41_insertps: 763 case Intrinsic::x86_sse41_dppd: 764 case Intrinsic::x86_sse41_dpps: 765 case Intrinsic::x86_sse41_mpsadbw: 766 case Intrinsic::x86_avx_dp_ps_256: 767 case Intrinsic::x86_avx2_mpsadbw: { 768 // Need to truncate the last argument from i32 to i8 -- this argument models 769 // an inherently 8-bit immediate operand to these x86 instructions. 770 SmallVector<Value *, 4> Args(CI->arg_operands().begin(), 771 CI->arg_operands().end()); 772 773 // Replace the last argument with a trunc. 774 Args.back() = Builder.CreateTrunc(Args.back(), Type::getInt8Ty(C), "trunc"); 775 776 CallInst *NewCall = Builder.CreateCall(NewFn, Args); 777 CI->replaceAllUsesWith(NewCall); 778 CI->eraseFromParent(); 779 return; 780 } 781 } 782 } 783 784 // This tests each Function to determine if it needs upgrading. When we find 785 // one we are interested in, we then upgrade all calls to reflect the new 786 // function. 787 void llvm::UpgradeCallsToIntrinsic(Function* F) { 788 assert(F && "Illegal attempt to upgrade a non-existent intrinsic."); 789 790 // Upgrade the function and check if it is a totaly new function. 791 Function *NewFn; 792 if (UpgradeIntrinsicFunction(F, NewFn)) { 793 // Replace all uses to the old function with the new one if necessary. 794 for (Value::user_iterator UI = F->user_begin(), UE = F->user_end(); 795 UI != UE;) { 796 if (CallInst *CI = dyn_cast<CallInst>(*UI++)) 797 UpgradeIntrinsicCall(CI, NewFn); 798 } 799 // Remove old function, no longer used, from the module. 800 F->eraseFromParent(); 801 } 802 } 803 804 void llvm::UpgradeInstWithTBAATag(Instruction *I) { 805 MDNode *MD = I->getMetadata(LLVMContext::MD_tbaa); 806 assert(MD && "UpgradeInstWithTBAATag should have a TBAA tag"); 807 // Check if the tag uses struct-path aware TBAA format. 808 if (isa<MDNode>(MD->getOperand(0)) && MD->getNumOperands() >= 3) 809 return; 810 811 if (MD->getNumOperands() == 3) { 812 Metadata *Elts[] = {MD->getOperand(0), MD->getOperand(1)}; 813 MDNode *ScalarType = MDNode::get(I->getContext(), Elts); 814 // Create a MDNode <ScalarType, ScalarType, offset 0, const> 815 Metadata *Elts2[] = {ScalarType, ScalarType, 816 ConstantAsMetadata::get(Constant::getNullValue( 817 Type::getInt64Ty(I->getContext()))), 818 MD->getOperand(2)}; 819 I->setMetadata(LLVMContext::MD_tbaa, MDNode::get(I->getContext(), Elts2)); 820 } else { 821 // Create a MDNode <MD, MD, offset 0> 822 Metadata *Elts[] = {MD, MD, ConstantAsMetadata::get(Constant::getNullValue( 823 Type::getInt64Ty(I->getContext())))}; 824 I->setMetadata(LLVMContext::MD_tbaa, MDNode::get(I->getContext(), Elts)); 825 } 826 } 827 828 Instruction *llvm::UpgradeBitCastInst(unsigned Opc, Value *V, Type *DestTy, 829 Instruction *&Temp) { 830 if (Opc != Instruction::BitCast) 831 return nullptr; 832 833 Temp = nullptr; 834 Type *SrcTy = V->getType(); 835 if (SrcTy->isPtrOrPtrVectorTy() && DestTy->isPtrOrPtrVectorTy() && 836 SrcTy->getPointerAddressSpace() != DestTy->getPointerAddressSpace()) { 837 LLVMContext &Context = V->getContext(); 838 839 // We have no information about target data layout, so we assume that 840 // the maximum pointer size is 64bit. 841 Type *MidTy = Type::getInt64Ty(Context); 842 Temp = CastInst::Create(Instruction::PtrToInt, V, MidTy); 843 844 return CastInst::Create(Instruction::IntToPtr, Temp, DestTy); 845 } 846 847 return nullptr; 848 } 849 850 Value *llvm::UpgradeBitCastExpr(unsigned Opc, Constant *C, Type *DestTy) { 851 if (Opc != Instruction::BitCast) 852 return nullptr; 853 854 Type *SrcTy = C->getType(); 855 if (SrcTy->isPtrOrPtrVectorTy() && DestTy->isPtrOrPtrVectorTy() && 856 SrcTy->getPointerAddressSpace() != DestTy->getPointerAddressSpace()) { 857 LLVMContext &Context = C->getContext(); 858 859 // We have no information about target data layout, so we assume that 860 // the maximum pointer size is 64bit. 861 Type *MidTy = Type::getInt64Ty(Context); 862 863 return ConstantExpr::getIntToPtr(ConstantExpr::getPtrToInt(C, MidTy), 864 DestTy); 865 } 866 867 return nullptr; 868 } 869 870 /// Check the debug info version number, if it is out-dated, drop the debug 871 /// info. Return true if module is modified. 872 bool llvm::UpgradeDebugInfo(Module &M) { 873 unsigned Version = getDebugMetadataVersionFromModule(M); 874 if (Version == DEBUG_METADATA_VERSION) 875 return false; 876 877 bool RetCode = StripDebugInfo(M); 878 if (RetCode) { 879 DiagnosticInfoDebugMetadataVersion DiagVersion(M, Version); 880 M.getContext().diagnose(DiagVersion); 881 } 882 return RetCode; 883 } 884 885 void llvm::UpgradeMDStringConstant(std::string &String) { 886 const std::string OldPrefix = "llvm.vectorizer."; 887 if (String == "llvm.vectorizer.unroll") { 888 String = "llvm.loop.interleave.count"; 889 } else if (String.find(OldPrefix) == 0) { 890 String.replace(0, OldPrefix.size(), "llvm.loop.vectorize."); 891 } 892 } 893