1 //===-- AutoUpgrade.cpp - Implement auto-upgrade helper functions ---------===// 2 // 3 // The LLVM Compiler Infrastructure 4 // 5 // This file is distributed under the University of Illinois Open Source 6 // License. See LICENSE.TXT for details. 7 // 8 //===----------------------------------------------------------------------===// 9 // 10 // This file implements the auto-upgrade helper functions. 11 // This is where deprecated IR intrinsics and other IR features are updated to 12 // current specifications. 13 // 14 //===----------------------------------------------------------------------===// 15 16 #include "llvm/IR/AutoUpgrade.h" 17 #include "llvm/IR/CFG.h" 18 #include "llvm/IR/CallSite.h" 19 #include "llvm/IR/Constants.h" 20 #include "llvm/IR/DIBuilder.h" 21 #include "llvm/IR/DebugInfo.h" 22 #include "llvm/IR/DiagnosticInfo.h" 23 #include "llvm/IR/Function.h" 24 #include "llvm/IR/IRBuilder.h" 25 #include "llvm/IR/Instruction.h" 26 #include "llvm/IR/IntrinsicInst.h" 27 #include "llvm/IR/LLVMContext.h" 28 #include "llvm/IR/Module.h" 29 #include "llvm/Support/ErrorHandling.h" 30 #include "llvm/Support/Regex.h" 31 #include <cstring> 32 using namespace llvm; 33 34 // Upgrade the declarations of the SSE4.1 functions whose arguments have 35 // changed their type from v4f32 to v2i64. 36 static bool UpgradeSSE41Function(Function* F, Intrinsic::ID IID, 37 Function *&NewFn) { 38 // Check whether this is an old version of the function, which received 39 // v4f32 arguments. 40 Type *Arg0Type = F->getFunctionType()->getParamType(0); 41 if (Arg0Type != VectorType::get(Type::getFloatTy(F->getContext()), 4)) 42 return false; 43 44 // Yes, it's old, replace it with new version. 45 F->setName(F->getName() + ".old"); 46 NewFn = Intrinsic::getDeclaration(F->getParent(), IID); 47 return true; 48 } 49 50 // Upgrade the declarations of intrinsic functions whose 8-bit immediate mask 51 // arguments have changed their type from i32 to i8. 52 static bool UpgradeX86IntrinsicsWith8BitMask(Function *F, Intrinsic::ID IID, 53 Function *&NewFn) { 54 // Check that the last argument is an i32. 55 Type *LastArgType = F->getFunctionType()->getParamType( 56 F->getFunctionType()->getNumParams() - 1); 57 if (!LastArgType->isIntegerTy(32)) 58 return false; 59 60 // Move this function aside and map down. 61 F->setName(F->getName() + ".old"); 62 NewFn = Intrinsic::getDeclaration(F->getParent(), IID); 63 return true; 64 } 65 66 static bool UpgradeIntrinsicFunction1(Function *F, Function *&NewFn) { 67 assert(F && "Illegal to upgrade a non-existent Function."); 68 69 // Quickly eliminate it, if it's not a candidate. 70 StringRef Name = F->getName(); 71 if (Name.size() <= 8 || !Name.startswith("llvm.")) 72 return false; 73 Name = Name.substr(5); // Strip off "llvm." 74 75 switch (Name[0]) { 76 default: break; 77 case 'a': { 78 if (Name.startswith("arm.neon.vclz")) { 79 Type* args[2] = { 80 F->arg_begin()->getType(), 81 Type::getInt1Ty(F->getContext()) 82 }; 83 // Can't use Intrinsic::getDeclaration here as it adds a ".i1" to 84 // the end of the name. Change name from llvm.arm.neon.vclz.* to 85 // llvm.ctlz.* 86 FunctionType* fType = FunctionType::get(F->getReturnType(), args, false); 87 NewFn = Function::Create(fType, F->getLinkage(), 88 "llvm.ctlz." + Name.substr(14), F->getParent()); 89 return true; 90 } 91 if (Name.startswith("arm.neon.vcnt")) { 92 NewFn = Intrinsic::getDeclaration(F->getParent(), Intrinsic::ctpop, 93 F->arg_begin()->getType()); 94 return true; 95 } 96 Regex vldRegex("^arm\\.neon\\.vld([1234]|[234]lane)\\.v[a-z0-9]*$"); 97 if (vldRegex.match(Name)) { 98 auto fArgs = F->getFunctionType()->params(); 99 SmallVector<Type *, 4> Tys(fArgs.begin(), fArgs.end()); 100 // Can't use Intrinsic::getDeclaration here as the return types might 101 // then only be structurally equal. 102 FunctionType* fType = FunctionType::get(F->getReturnType(), Tys, false); 103 NewFn = Function::Create(fType, F->getLinkage(), 104 "llvm." + Name + ".p0i8", F->getParent()); 105 return true; 106 } 107 Regex vstRegex("^arm\\.neon\\.vst([1234]|[234]lane)\\.v[a-z0-9]*$"); 108 if (vstRegex.match(Name)) { 109 static const Intrinsic::ID StoreInts[] = {Intrinsic::arm_neon_vst1, 110 Intrinsic::arm_neon_vst2, 111 Intrinsic::arm_neon_vst3, 112 Intrinsic::arm_neon_vst4}; 113 114 static const Intrinsic::ID StoreLaneInts[] = { 115 Intrinsic::arm_neon_vst2lane, Intrinsic::arm_neon_vst3lane, 116 Intrinsic::arm_neon_vst4lane 117 }; 118 119 auto fArgs = F->getFunctionType()->params(); 120 Type *Tys[] = {fArgs[0], fArgs[1]}; 121 if (Name.find("lane") == StringRef::npos) 122 NewFn = Intrinsic::getDeclaration(F->getParent(), 123 StoreInts[fArgs.size() - 3], Tys); 124 else 125 NewFn = Intrinsic::getDeclaration(F->getParent(), 126 StoreLaneInts[fArgs.size() - 5], Tys); 127 return true; 128 } 129 break; 130 } 131 132 case 'c': { 133 if (Name.startswith("ctlz.") && F->arg_size() == 1) { 134 F->setName(Name + ".old"); 135 NewFn = Intrinsic::getDeclaration(F->getParent(), Intrinsic::ctlz, 136 F->arg_begin()->getType()); 137 return true; 138 } 139 if (Name.startswith("cttz.") && F->arg_size() == 1) { 140 F->setName(Name + ".old"); 141 NewFn = Intrinsic::getDeclaration(F->getParent(), Intrinsic::cttz, 142 F->arg_begin()->getType()); 143 return true; 144 } 145 break; 146 } 147 148 case 'm': { 149 if (Name.startswith("masked.load.")) { 150 Type *Tys[] = { F->getReturnType(), F->arg_begin()->getType() }; 151 if (F->getName() != Intrinsic::getName(Intrinsic::masked_load, Tys)) { 152 F->setName(Name + ".old"); 153 NewFn = Intrinsic::getDeclaration(F->getParent(), 154 Intrinsic::masked_load, 155 Tys); 156 return true; 157 } 158 } 159 if (Name.startswith("masked.store.")) { 160 auto Args = F->getFunctionType()->params(); 161 Type *Tys[] = { Args[0], Args[1] }; 162 if (F->getName() != Intrinsic::getName(Intrinsic::masked_store, Tys)) { 163 F->setName(Name + ".old"); 164 NewFn = Intrinsic::getDeclaration(F->getParent(), 165 Intrinsic::masked_store, 166 Tys); 167 return true; 168 } 169 } 170 break; 171 } 172 173 case 'o': 174 // We only need to change the name to match the mangling including the 175 // address space. 176 if (F->arg_size() == 2 && Name.startswith("objectsize.")) { 177 Type *Tys[2] = { F->getReturnType(), F->arg_begin()->getType() }; 178 if (F->getName() != Intrinsic::getName(Intrinsic::objectsize, Tys)) { 179 F->setName(Name + ".old"); 180 NewFn = Intrinsic::getDeclaration(F->getParent(), 181 Intrinsic::objectsize, Tys); 182 return true; 183 } 184 } 185 break; 186 187 case 'x': { 188 if (Name.startswith("x86.sse2.pcmpeq.") || 189 Name.startswith("x86.sse2.pcmpgt.") || 190 Name.startswith("x86.avx2.pcmpeq.") || 191 Name.startswith("x86.avx2.pcmpgt.") || 192 Name.startswith("x86.avx2.vbroadcast") || 193 Name.startswith("x86.avx2.pbroadcast") || 194 Name.startswith("x86.avx.vpermil.") || 195 Name.startswith("x86.sse41.pmovsx") || 196 Name == "x86.avx.vinsertf128.pd.256" || 197 Name == "x86.avx.vinsertf128.ps.256" || 198 Name == "x86.avx.vinsertf128.si.256" || 199 Name == "x86.avx2.vinserti128" || 200 Name == "x86.avx.vextractf128.pd.256" || 201 Name == "x86.avx.vextractf128.ps.256" || 202 Name == "x86.avx.vextractf128.si.256" || 203 Name == "x86.avx2.vextracti128" || 204 Name == "x86.avx.movnt.dq.256" || 205 Name == "x86.avx.movnt.pd.256" || 206 Name == "x86.avx.movnt.ps.256" || 207 Name == "x86.sse42.crc32.64.8" || 208 Name == "x86.avx.vbroadcast.ss" || 209 Name == "x86.avx.vbroadcast.ss.256" || 210 Name == "x86.avx.vbroadcast.sd.256" || 211 Name == "x86.sse2.psll.dq" || 212 Name == "x86.sse2.psrl.dq" || 213 Name == "x86.avx2.psll.dq" || 214 Name == "x86.avx2.psrl.dq" || 215 Name == "x86.sse2.psll.dq.bs" || 216 Name == "x86.sse2.psrl.dq.bs" || 217 Name == "x86.avx2.psll.dq.bs" || 218 Name == "x86.avx2.psrl.dq.bs" || 219 Name == "x86.sse41.pblendw" || 220 Name == "x86.sse41.blendpd" || 221 Name == "x86.sse41.blendps" || 222 Name == "x86.avx.blend.pd.256" || 223 Name == "x86.avx.blend.ps.256" || 224 Name == "x86.avx2.pblendw" || 225 Name == "x86.avx2.pblendd.128" || 226 Name == "x86.avx2.pblendd.256" || 227 Name == "x86.avx2.vbroadcasti128" || 228 Name == "x86.xop.vpcmov" || 229 (Name.startswith("x86.xop.vpcom") && F->arg_size() == 2)) { 230 NewFn = nullptr; 231 return true; 232 } 233 // SSE4.1 ptest functions may have an old signature. 234 if (Name.startswith("x86.sse41.ptest")) { 235 if (Name == "x86.sse41.ptestc") 236 return UpgradeSSE41Function(F, Intrinsic::x86_sse41_ptestc, NewFn); 237 if (Name == "x86.sse41.ptestz") 238 return UpgradeSSE41Function(F, Intrinsic::x86_sse41_ptestz, NewFn); 239 if (Name == "x86.sse41.ptestnzc") 240 return UpgradeSSE41Function(F, Intrinsic::x86_sse41_ptestnzc, NewFn); 241 } 242 // Several blend and other instructions with masks used the wrong number of 243 // bits. 244 if (Name == "x86.sse41.insertps") 245 return UpgradeX86IntrinsicsWith8BitMask(F, Intrinsic::x86_sse41_insertps, 246 NewFn); 247 if (Name == "x86.sse41.dppd") 248 return UpgradeX86IntrinsicsWith8BitMask(F, Intrinsic::x86_sse41_dppd, 249 NewFn); 250 if (Name == "x86.sse41.dpps") 251 return UpgradeX86IntrinsicsWith8BitMask(F, Intrinsic::x86_sse41_dpps, 252 NewFn); 253 if (Name == "x86.sse41.mpsadbw") 254 return UpgradeX86IntrinsicsWith8BitMask(F, Intrinsic::x86_sse41_mpsadbw, 255 NewFn); 256 if (Name == "x86.avx.dp.ps.256") 257 return UpgradeX86IntrinsicsWith8BitMask(F, Intrinsic::x86_avx_dp_ps_256, 258 NewFn); 259 if (Name == "x86.avx2.mpsadbw") 260 return UpgradeX86IntrinsicsWith8BitMask(F, Intrinsic::x86_avx2_mpsadbw, 261 NewFn); 262 263 // frcz.ss/sd may need to have an argument dropped 264 if (Name.startswith("x86.xop.vfrcz.ss") && F->arg_size() == 2) { 265 F->setName(Name + ".old"); 266 NewFn = Intrinsic::getDeclaration(F->getParent(), 267 Intrinsic::x86_xop_vfrcz_ss); 268 return true; 269 } 270 if (Name.startswith("x86.xop.vfrcz.sd") && F->arg_size() == 2) { 271 F->setName(Name + ".old"); 272 NewFn = Intrinsic::getDeclaration(F->getParent(), 273 Intrinsic::x86_xop_vfrcz_sd); 274 return true; 275 } 276 // Fix the FMA4 intrinsics to remove the 4 277 if (Name.startswith("x86.fma4.")) { 278 F->setName("llvm.x86.fma" + Name.substr(8)); 279 NewFn = F; 280 return true; 281 } 282 break; 283 } 284 } 285 286 // This may not belong here. This function is effectively being overloaded 287 // to both detect an intrinsic which needs upgrading, and to provide the 288 // upgraded form of the intrinsic. We should perhaps have two separate 289 // functions for this. 290 return false; 291 } 292 293 bool llvm::UpgradeIntrinsicFunction(Function *F, Function *&NewFn) { 294 NewFn = nullptr; 295 bool Upgraded = UpgradeIntrinsicFunction1(F, NewFn); 296 assert(F != NewFn && "Intrinsic function upgraded to the same function"); 297 298 // Upgrade intrinsic attributes. This does not change the function. 299 if (NewFn) 300 F = NewFn; 301 if (Intrinsic::ID id = F->getIntrinsicID()) 302 F->setAttributes(Intrinsic::getAttributes(F->getContext(), id)); 303 return Upgraded; 304 } 305 306 bool llvm::UpgradeGlobalVariable(GlobalVariable *GV) { 307 // Nothing to do yet. 308 return false; 309 } 310 311 // Handles upgrading SSE2 and AVX2 PSLLDQ intrinsics by converting them 312 // to byte shuffles. 313 static Value *UpgradeX86PSLLDQIntrinsics(IRBuilder<> &Builder, LLVMContext &C, 314 Value *Op, unsigned NumLanes, 315 unsigned Shift) { 316 // Each lane is 16 bytes. 317 unsigned NumElts = NumLanes * 16; 318 319 // Bitcast from a 64-bit element type to a byte element type. 320 Op = Builder.CreateBitCast(Op, 321 VectorType::get(Type::getInt8Ty(C), NumElts), 322 "cast"); 323 // We'll be shuffling in zeroes. 324 Value *Res = ConstantVector::getSplat(NumElts, Builder.getInt8(0)); 325 326 // If shift is less than 16, emit a shuffle to move the bytes. Otherwise, 327 // we'll just return the zero vector. 328 if (Shift < 16) { 329 SmallVector<Constant*, 32> Idxs; 330 // 256-bit version is split into two 16-byte lanes. 331 for (unsigned l = 0; l != NumElts; l += 16) 332 for (unsigned i = 0; i != 16; ++i) { 333 unsigned Idx = NumElts + i - Shift; 334 if (Idx < NumElts) 335 Idx -= NumElts - 16; // end of lane, switch operand. 336 Idxs.push_back(Builder.getInt32(Idx + l)); 337 } 338 339 Res = Builder.CreateShuffleVector(Res, Op, ConstantVector::get(Idxs)); 340 } 341 342 // Bitcast back to a 64-bit element type. 343 return Builder.CreateBitCast(Res, 344 VectorType::get(Type::getInt64Ty(C), 2*NumLanes), 345 "cast"); 346 } 347 348 // Handles upgrading SSE2 and AVX2 PSRLDQ intrinsics by converting them 349 // to byte shuffles. 350 static Value *UpgradeX86PSRLDQIntrinsics(IRBuilder<> &Builder, LLVMContext &C, 351 Value *Op, unsigned NumLanes, 352 unsigned Shift) { 353 // Each lane is 16 bytes. 354 unsigned NumElts = NumLanes * 16; 355 356 // Bitcast from a 64-bit element type to a byte element type. 357 Op = Builder.CreateBitCast(Op, 358 VectorType::get(Type::getInt8Ty(C), NumElts), 359 "cast"); 360 // We'll be shuffling in zeroes. 361 Value *Res = ConstantVector::getSplat(NumElts, Builder.getInt8(0)); 362 363 // If shift is less than 16, emit a shuffle to move the bytes. Otherwise, 364 // we'll just return the zero vector. 365 if (Shift < 16) { 366 SmallVector<Constant*, 32> Idxs; 367 // 256-bit version is split into two 16-byte lanes. 368 for (unsigned l = 0; l != NumElts; l += 16) 369 for (unsigned i = 0; i != 16; ++i) { 370 unsigned Idx = i + Shift; 371 if (Idx >= 16) 372 Idx += NumElts - 16; // end of lane, switch operand. 373 Idxs.push_back(Builder.getInt32(Idx + l)); 374 } 375 376 Res = Builder.CreateShuffleVector(Op, Res, ConstantVector::get(Idxs)); 377 } 378 379 // Bitcast back to a 64-bit element type. 380 return Builder.CreateBitCast(Res, 381 VectorType::get(Type::getInt64Ty(C), 2*NumLanes), 382 "cast"); 383 } 384 385 // UpgradeIntrinsicCall - Upgrade a call to an old intrinsic to be a call the 386 // upgraded intrinsic. All argument and return casting must be provided in 387 // order to seamlessly integrate with existing context. 388 void llvm::UpgradeIntrinsicCall(CallInst *CI, Function *NewFn) { 389 Function *F = CI->getCalledFunction(); 390 LLVMContext &C = CI->getContext(); 391 IRBuilder<> Builder(C); 392 Builder.SetInsertPoint(CI->getParent(), CI->getIterator()); 393 394 assert(F && "Intrinsic call is not direct?"); 395 396 if (!NewFn) { 397 // Get the Function's name. 398 StringRef Name = F->getName(); 399 400 Value *Rep; 401 // Upgrade packed integer vector compares intrinsics to compare instructions 402 if (Name.startswith("llvm.x86.sse2.pcmpeq.") || 403 Name.startswith("llvm.x86.avx2.pcmpeq.")) { 404 Rep = Builder.CreateICmpEQ(CI->getArgOperand(0), CI->getArgOperand(1), 405 "pcmpeq"); 406 // need to sign extend since icmp returns vector of i1 407 Rep = Builder.CreateSExt(Rep, CI->getType(), ""); 408 } else if (Name.startswith("llvm.x86.sse2.pcmpgt.") || 409 Name.startswith("llvm.x86.avx2.pcmpgt.")) { 410 Rep = Builder.CreateICmpSGT(CI->getArgOperand(0), CI->getArgOperand(1), 411 "pcmpgt"); 412 // need to sign extend since icmp returns vector of i1 413 Rep = Builder.CreateSExt(Rep, CI->getType(), ""); 414 } else if (Name == "llvm.x86.avx.movnt.dq.256" || 415 Name == "llvm.x86.avx.movnt.ps.256" || 416 Name == "llvm.x86.avx.movnt.pd.256") { 417 IRBuilder<> Builder(C); 418 Builder.SetInsertPoint(CI->getParent(), CI->getIterator()); 419 420 Module *M = F->getParent(); 421 SmallVector<Metadata *, 1> Elts; 422 Elts.push_back( 423 ConstantAsMetadata::get(ConstantInt::get(Type::getInt32Ty(C), 1))); 424 MDNode *Node = MDNode::get(C, Elts); 425 426 Value *Arg0 = CI->getArgOperand(0); 427 Value *Arg1 = CI->getArgOperand(1); 428 429 // Convert the type of the pointer to a pointer to the stored type. 430 Value *BC = Builder.CreateBitCast(Arg0, 431 PointerType::getUnqual(Arg1->getType()), 432 "cast"); 433 StoreInst *SI = Builder.CreateStore(Arg1, BC); 434 SI->setMetadata(M->getMDKindID("nontemporal"), Node); 435 SI->setAlignment(32); 436 437 // Remove intrinsic. 438 CI->eraseFromParent(); 439 return; 440 } else if (Name.startswith("llvm.x86.xop.vpcom")) { 441 Intrinsic::ID intID; 442 if (Name.endswith("ub")) 443 intID = Intrinsic::x86_xop_vpcomub; 444 else if (Name.endswith("uw")) 445 intID = Intrinsic::x86_xop_vpcomuw; 446 else if (Name.endswith("ud")) 447 intID = Intrinsic::x86_xop_vpcomud; 448 else if (Name.endswith("uq")) 449 intID = Intrinsic::x86_xop_vpcomuq; 450 else if (Name.endswith("b")) 451 intID = Intrinsic::x86_xop_vpcomb; 452 else if (Name.endswith("w")) 453 intID = Intrinsic::x86_xop_vpcomw; 454 else if (Name.endswith("d")) 455 intID = Intrinsic::x86_xop_vpcomd; 456 else if (Name.endswith("q")) 457 intID = Intrinsic::x86_xop_vpcomq; 458 else 459 llvm_unreachable("Unknown suffix"); 460 461 Name = Name.substr(18); // strip off "llvm.x86.xop.vpcom" 462 unsigned Imm; 463 if (Name.startswith("lt")) 464 Imm = 0; 465 else if (Name.startswith("le")) 466 Imm = 1; 467 else if (Name.startswith("gt")) 468 Imm = 2; 469 else if (Name.startswith("ge")) 470 Imm = 3; 471 else if (Name.startswith("eq")) 472 Imm = 4; 473 else if (Name.startswith("ne")) 474 Imm = 5; 475 else if (Name.startswith("false")) 476 Imm = 6; 477 else if (Name.startswith("true")) 478 Imm = 7; 479 else 480 llvm_unreachable("Unknown condition"); 481 482 Function *VPCOM = Intrinsic::getDeclaration(F->getParent(), intID); 483 Rep = 484 Builder.CreateCall(VPCOM, {CI->getArgOperand(0), CI->getArgOperand(1), 485 Builder.getInt8(Imm)}); 486 } else if (Name == "llvm.x86.xop.vpcmov") { 487 Value *Arg0 = CI->getArgOperand(0); 488 Value *Arg1 = CI->getArgOperand(1); 489 Value *Sel = CI->getArgOperand(2); 490 unsigned NumElts = CI->getType()->getVectorNumElements(); 491 Constant *MinusOne = ConstantVector::getSplat(NumElts, Builder.getInt64(-1)); 492 Value *NotSel = Builder.CreateXor(Sel, MinusOne); 493 Value *Sel0 = Builder.CreateAnd(Arg0, Sel); 494 Value *Sel1 = Builder.CreateAnd(Arg1, NotSel); 495 Rep = Builder.CreateOr(Sel0, Sel1); 496 } else if (Name == "llvm.x86.sse42.crc32.64.8") { 497 Function *CRC32 = Intrinsic::getDeclaration(F->getParent(), 498 Intrinsic::x86_sse42_crc32_32_8); 499 Value *Trunc0 = Builder.CreateTrunc(CI->getArgOperand(0), Type::getInt32Ty(C)); 500 Rep = Builder.CreateCall(CRC32, {Trunc0, CI->getArgOperand(1)}); 501 Rep = Builder.CreateZExt(Rep, CI->getType(), ""); 502 } else if (Name.startswith("llvm.x86.avx.vbroadcast")) { 503 // Replace broadcasts with a series of insertelements. 504 Type *VecTy = CI->getType(); 505 Type *EltTy = VecTy->getVectorElementType(); 506 unsigned EltNum = VecTy->getVectorNumElements(); 507 Value *Cast = Builder.CreateBitCast(CI->getArgOperand(0), 508 EltTy->getPointerTo()); 509 Value *Load = Builder.CreateLoad(EltTy, Cast); 510 Type *I32Ty = Type::getInt32Ty(C); 511 Rep = UndefValue::get(VecTy); 512 for (unsigned I = 0; I < EltNum; ++I) 513 Rep = Builder.CreateInsertElement(Rep, Load, 514 ConstantInt::get(I32Ty, I)); 515 } else if (Name.startswith("llvm.x86.sse41.pmovsx")) { 516 VectorType *SrcTy = cast<VectorType>(CI->getArgOperand(0)->getType()); 517 VectorType *DstTy = cast<VectorType>(CI->getType()); 518 unsigned NumDstElts = DstTy->getNumElements(); 519 520 // Extract a subvector of the first NumDstElts lanes and sign extend. 521 SmallVector<int, 8> ShuffleMask; 522 for (int i = 0; i != (int)NumDstElts; ++i) 523 ShuffleMask.push_back(i); 524 525 Value *SV = Builder.CreateShuffleVector( 526 CI->getArgOperand(0), UndefValue::get(SrcTy), ShuffleMask); 527 Rep = Builder.CreateSExt(SV, DstTy); 528 } else if (Name == "llvm.x86.avx2.vbroadcasti128") { 529 // Replace vbroadcasts with a vector shuffle. 530 Type *VT = VectorType::get(Type::getInt64Ty(C), 2); 531 Value *Op = Builder.CreatePointerCast(CI->getArgOperand(0), 532 PointerType::getUnqual(VT)); 533 Value *Load = Builder.CreateLoad(VT, Op); 534 const int Idxs[4] = { 0, 1, 0, 1 }; 535 Rep = Builder.CreateShuffleVector(Load, UndefValue::get(Load->getType()), 536 Idxs); 537 } else if (Name.startswith("llvm.x86.avx2.pbroadcast") || 538 Name.startswith("llvm.x86.avx2.vbroadcast")) { 539 // Replace vp?broadcasts with a vector shuffle. 540 Value *Op = CI->getArgOperand(0); 541 unsigned NumElts = CI->getType()->getVectorNumElements(); 542 Type *MaskTy = VectorType::get(Type::getInt32Ty(C), NumElts); 543 Rep = Builder.CreateShuffleVector(Op, UndefValue::get(Op->getType()), 544 Constant::getNullValue(MaskTy)); 545 } else if (Name == "llvm.x86.sse2.psll.dq") { 546 // 128-bit shift left specified in bits. 547 unsigned Shift = cast<ConstantInt>(CI->getArgOperand(1))->getZExtValue(); 548 Rep = UpgradeX86PSLLDQIntrinsics(Builder, C, CI->getArgOperand(0), 1, 549 Shift / 8); // Shift is in bits. 550 } else if (Name == "llvm.x86.sse2.psrl.dq") { 551 // 128-bit shift right specified in bits. 552 unsigned Shift = cast<ConstantInt>(CI->getArgOperand(1))->getZExtValue(); 553 Rep = UpgradeX86PSRLDQIntrinsics(Builder, C, CI->getArgOperand(0), 1, 554 Shift / 8); // Shift is in bits. 555 } else if (Name == "llvm.x86.avx2.psll.dq") { 556 // 256-bit shift left specified in bits. 557 unsigned Shift = cast<ConstantInt>(CI->getArgOperand(1))->getZExtValue(); 558 Rep = UpgradeX86PSLLDQIntrinsics(Builder, C, CI->getArgOperand(0), 2, 559 Shift / 8); // Shift is in bits. 560 } else if (Name == "llvm.x86.avx2.psrl.dq") { 561 // 256-bit shift right specified in bits. 562 unsigned Shift = cast<ConstantInt>(CI->getArgOperand(1))->getZExtValue(); 563 Rep = UpgradeX86PSRLDQIntrinsics(Builder, C, CI->getArgOperand(0), 2, 564 Shift / 8); // Shift is in bits. 565 } else if (Name == "llvm.x86.sse2.psll.dq.bs") { 566 // 128-bit shift left specified in bytes. 567 unsigned Shift = cast<ConstantInt>(CI->getArgOperand(1))->getZExtValue(); 568 Rep = UpgradeX86PSLLDQIntrinsics(Builder, C, CI->getArgOperand(0), 1, 569 Shift); 570 } else if (Name == "llvm.x86.sse2.psrl.dq.bs") { 571 // 128-bit shift right specified in bytes. 572 unsigned Shift = cast<ConstantInt>(CI->getArgOperand(1))->getZExtValue(); 573 Rep = UpgradeX86PSRLDQIntrinsics(Builder, C, CI->getArgOperand(0), 1, 574 Shift); 575 } else if (Name == "llvm.x86.avx2.psll.dq.bs") { 576 // 256-bit shift left specified in bytes. 577 unsigned Shift = cast<ConstantInt>(CI->getArgOperand(1))->getZExtValue(); 578 Rep = UpgradeX86PSLLDQIntrinsics(Builder, C, CI->getArgOperand(0), 2, 579 Shift); 580 } else if (Name == "llvm.x86.avx2.psrl.dq.bs") { 581 // 256-bit shift right specified in bytes. 582 unsigned Shift = cast<ConstantInt>(CI->getArgOperand(1))->getZExtValue(); 583 Rep = UpgradeX86PSRLDQIntrinsics(Builder, C, CI->getArgOperand(0), 2, 584 Shift); 585 } else if (Name == "llvm.x86.sse41.pblendw" || 586 Name == "llvm.x86.sse41.blendpd" || 587 Name == "llvm.x86.sse41.blendps" || 588 Name == "llvm.x86.avx.blend.pd.256" || 589 Name == "llvm.x86.avx.blend.ps.256" || 590 Name == "llvm.x86.avx2.pblendw" || 591 Name == "llvm.x86.avx2.pblendd.128" || 592 Name == "llvm.x86.avx2.pblendd.256") { 593 Value *Op0 = CI->getArgOperand(0); 594 Value *Op1 = CI->getArgOperand(1); 595 unsigned Imm = cast <ConstantInt>(CI->getArgOperand(2))->getZExtValue(); 596 VectorType *VecTy = cast<VectorType>(CI->getType()); 597 unsigned NumElts = VecTy->getNumElements(); 598 599 SmallVector<Constant*, 16> Idxs; 600 for (unsigned i = 0; i != NumElts; ++i) { 601 unsigned Idx = ((Imm >> (i%8)) & 1) ? i + NumElts : i; 602 Idxs.push_back(Builder.getInt32(Idx)); 603 } 604 605 Rep = Builder.CreateShuffleVector(Op0, Op1, ConstantVector::get(Idxs)); 606 } else if (Name == "llvm.x86.avx.vinsertf128.pd.256" || 607 Name == "llvm.x86.avx.vinsertf128.ps.256" || 608 Name == "llvm.x86.avx.vinsertf128.si.256" || 609 Name == "llvm.x86.avx2.vinserti128") { 610 Value *Op0 = CI->getArgOperand(0); 611 Value *Op1 = CI->getArgOperand(1); 612 unsigned Imm = cast<ConstantInt>(CI->getArgOperand(2))->getZExtValue(); 613 VectorType *VecTy = cast<VectorType>(CI->getType()); 614 unsigned NumElts = VecTy->getNumElements(); 615 616 // Mask off the high bits of the immediate value; hardware ignores those. 617 Imm = Imm & 1; 618 619 // Extend the second operand into a vector that is twice as big. 620 Value *UndefV = UndefValue::get(Op1->getType()); 621 SmallVector<Constant*, 8> Idxs; 622 for (unsigned i = 0; i != NumElts; ++i) { 623 Idxs.push_back(Builder.getInt32(i)); 624 } 625 Rep = Builder.CreateShuffleVector(Op1, UndefV, ConstantVector::get(Idxs)); 626 627 // Insert the second operand into the first operand. 628 629 // Note that there is no guarantee that instruction lowering will actually 630 // produce a vinsertf128 instruction for the created shuffles. In 631 // particular, the 0 immediate case involves no lane changes, so it can 632 // be handled as a blend. 633 634 // Example of shuffle mask for 32-bit elements: 635 // Imm = 1 <i32 0, i32 1, i32 2, i32 3, i32 8, i32 9, i32 10, i32 11> 636 // Imm = 0 <i32 8, i32 9, i32 10, i32 11, i32 4, i32 5, i32 6, i32 7 > 637 638 SmallVector<Constant*, 8> Idxs2; 639 // The low half of the result is either the low half of the 1st operand 640 // or the low half of the 2nd operand (the inserted vector). 641 for (unsigned i = 0; i != NumElts / 2; ++i) { 642 unsigned Idx = Imm ? i : (i + NumElts); 643 Idxs2.push_back(Builder.getInt32(Idx)); 644 } 645 // The high half of the result is either the low half of the 2nd operand 646 // (the inserted vector) or the high half of the 1st operand. 647 for (unsigned i = NumElts / 2; i != NumElts; ++i) { 648 unsigned Idx = Imm ? (i + NumElts / 2) : i; 649 Idxs2.push_back(Builder.getInt32(Idx)); 650 } 651 Rep = Builder.CreateShuffleVector(Op0, Rep, ConstantVector::get(Idxs2)); 652 } else if (Name == "llvm.x86.avx.vextractf128.pd.256" || 653 Name == "llvm.x86.avx.vextractf128.ps.256" || 654 Name == "llvm.x86.avx.vextractf128.si.256" || 655 Name == "llvm.x86.avx2.vextracti128") { 656 Value *Op0 = CI->getArgOperand(0); 657 unsigned Imm = cast<ConstantInt>(CI->getArgOperand(1))->getZExtValue(); 658 VectorType *VecTy = cast<VectorType>(CI->getType()); 659 unsigned NumElts = VecTy->getNumElements(); 660 661 // Mask off the high bits of the immediate value; hardware ignores those. 662 Imm = Imm & 1; 663 664 // Get indexes for either the high half or low half of the input vector. 665 SmallVector<Constant*, 4> Idxs(NumElts); 666 for (unsigned i = 0; i != NumElts; ++i) { 667 unsigned Idx = Imm ? (i + NumElts) : i; 668 Idxs[i] = Builder.getInt32(Idx); 669 } 670 671 Value *UndefV = UndefValue::get(Op0->getType()); 672 Rep = Builder.CreateShuffleVector(Op0, UndefV, ConstantVector::get(Idxs)); 673 } else { 674 bool PD128 = false, PD256 = false, PS128 = false, PS256 = false; 675 if (Name == "llvm.x86.avx.vpermil.pd.256") 676 PD256 = true; 677 else if (Name == "llvm.x86.avx.vpermil.pd") 678 PD128 = true; 679 else if (Name == "llvm.x86.avx.vpermil.ps.256") 680 PS256 = true; 681 else if (Name == "llvm.x86.avx.vpermil.ps") 682 PS128 = true; 683 684 if (PD256 || PD128 || PS256 || PS128) { 685 Value *Op0 = CI->getArgOperand(0); 686 unsigned Imm = cast<ConstantInt>(CI->getArgOperand(1))->getZExtValue(); 687 SmallVector<Constant*, 8> Idxs; 688 689 if (PD128) 690 for (unsigned i = 0; i != 2; ++i) 691 Idxs.push_back(Builder.getInt32((Imm >> i) & 0x1)); 692 else if (PD256) 693 for (unsigned l = 0; l != 4; l+=2) 694 for (unsigned i = 0; i != 2; ++i) 695 Idxs.push_back(Builder.getInt32(((Imm >> (l+i)) & 0x1) + l)); 696 else if (PS128) 697 for (unsigned i = 0; i != 4; ++i) 698 Idxs.push_back(Builder.getInt32((Imm >> (2 * i)) & 0x3)); 699 else if (PS256) 700 for (unsigned l = 0; l != 8; l+=4) 701 for (unsigned i = 0; i != 4; ++i) 702 Idxs.push_back(Builder.getInt32(((Imm >> (2 * i)) & 0x3) + l)); 703 else 704 llvm_unreachable("Unexpected function"); 705 706 Rep = Builder.CreateShuffleVector(Op0, Op0, ConstantVector::get(Idxs)); 707 } else { 708 llvm_unreachable("Unknown function for CallInst upgrade."); 709 } 710 } 711 712 CI->replaceAllUsesWith(Rep); 713 CI->eraseFromParent(); 714 return; 715 } 716 717 std::string Name = CI->getName(); 718 if (!Name.empty()) 719 CI->setName(Name + ".old"); 720 721 switch (NewFn->getIntrinsicID()) { 722 default: 723 llvm_unreachable("Unknown function for CallInst upgrade."); 724 725 case Intrinsic::arm_neon_vld1: 726 case Intrinsic::arm_neon_vld2: 727 case Intrinsic::arm_neon_vld3: 728 case Intrinsic::arm_neon_vld4: 729 case Intrinsic::arm_neon_vld2lane: 730 case Intrinsic::arm_neon_vld3lane: 731 case Intrinsic::arm_neon_vld4lane: 732 case Intrinsic::arm_neon_vst1: 733 case Intrinsic::arm_neon_vst2: 734 case Intrinsic::arm_neon_vst3: 735 case Intrinsic::arm_neon_vst4: 736 case Intrinsic::arm_neon_vst2lane: 737 case Intrinsic::arm_neon_vst3lane: 738 case Intrinsic::arm_neon_vst4lane: { 739 SmallVector<Value *, 4> Args(CI->arg_operands().begin(), 740 CI->arg_operands().end()); 741 CI->replaceAllUsesWith(Builder.CreateCall(NewFn, Args)); 742 CI->eraseFromParent(); 743 return; 744 } 745 746 case Intrinsic::ctlz: 747 case Intrinsic::cttz: 748 assert(CI->getNumArgOperands() == 1 && 749 "Mismatch between function args and call args"); 750 CI->replaceAllUsesWith(Builder.CreateCall( 751 NewFn, {CI->getArgOperand(0), Builder.getFalse()}, Name)); 752 CI->eraseFromParent(); 753 return; 754 755 case Intrinsic::objectsize: 756 CI->replaceAllUsesWith(Builder.CreateCall( 757 NewFn, {CI->getArgOperand(0), CI->getArgOperand(1)}, Name)); 758 CI->eraseFromParent(); 759 return; 760 761 case Intrinsic::ctpop: { 762 CI->replaceAllUsesWith(Builder.CreateCall(NewFn, {CI->getArgOperand(0)})); 763 CI->eraseFromParent(); 764 return; 765 } 766 767 case Intrinsic::x86_xop_vfrcz_ss: 768 case Intrinsic::x86_xop_vfrcz_sd: 769 CI->replaceAllUsesWith( 770 Builder.CreateCall(NewFn, {CI->getArgOperand(1)}, Name)); 771 CI->eraseFromParent(); 772 return; 773 774 case Intrinsic::x86_sse41_ptestc: 775 case Intrinsic::x86_sse41_ptestz: 776 case Intrinsic::x86_sse41_ptestnzc: { 777 // The arguments for these intrinsics used to be v4f32, and changed 778 // to v2i64. This is purely a nop, since those are bitwise intrinsics. 779 // So, the only thing required is a bitcast for both arguments. 780 // First, check the arguments have the old type. 781 Value *Arg0 = CI->getArgOperand(0); 782 if (Arg0->getType() != VectorType::get(Type::getFloatTy(C), 4)) 783 return; 784 785 // Old intrinsic, add bitcasts 786 Value *Arg1 = CI->getArgOperand(1); 787 788 Type *NewVecTy = VectorType::get(Type::getInt64Ty(C), 2); 789 790 Value *BC0 = Builder.CreateBitCast(Arg0, NewVecTy, "cast"); 791 Value *BC1 = Builder.CreateBitCast(Arg1, NewVecTy, "cast"); 792 793 CallInst *NewCall = Builder.CreateCall(NewFn, {BC0, BC1}, Name); 794 CI->replaceAllUsesWith(NewCall); 795 CI->eraseFromParent(); 796 return; 797 } 798 799 case Intrinsic::x86_sse41_insertps: 800 case Intrinsic::x86_sse41_dppd: 801 case Intrinsic::x86_sse41_dpps: 802 case Intrinsic::x86_sse41_mpsadbw: 803 case Intrinsic::x86_avx_dp_ps_256: 804 case Intrinsic::x86_avx2_mpsadbw: { 805 // Need to truncate the last argument from i32 to i8 -- this argument models 806 // an inherently 8-bit immediate operand to these x86 instructions. 807 SmallVector<Value *, 4> Args(CI->arg_operands().begin(), 808 CI->arg_operands().end()); 809 810 // Replace the last argument with a trunc. 811 Args.back() = Builder.CreateTrunc(Args.back(), Type::getInt8Ty(C), "trunc"); 812 813 CallInst *NewCall = Builder.CreateCall(NewFn, Args); 814 CI->replaceAllUsesWith(NewCall); 815 CI->eraseFromParent(); 816 return; 817 } 818 819 case Intrinsic::masked_load: 820 case Intrinsic::masked_store: { 821 SmallVector<Value *, 4> Args(CI->arg_operands().begin(), 822 CI->arg_operands().end()); 823 CI->replaceAllUsesWith(Builder.CreateCall(NewFn, Args)); 824 CI->eraseFromParent(); 825 return; 826 } 827 } 828 } 829 830 // This tests each Function to determine if it needs upgrading. When we find 831 // one we are interested in, we then upgrade all calls to reflect the new 832 // function. 833 void llvm::UpgradeCallsToIntrinsic(Function* F) { 834 assert(F && "Illegal attempt to upgrade a non-existent intrinsic."); 835 836 // Upgrade the function and check if it is a totaly new function. 837 Function *NewFn; 838 if (UpgradeIntrinsicFunction(F, NewFn)) { 839 // Replace all uses to the old function with the new one if necessary. 840 for (Value::user_iterator UI = F->user_begin(), UE = F->user_end(); 841 UI != UE;) { 842 if (CallInst *CI = dyn_cast<CallInst>(*UI++)) 843 UpgradeIntrinsicCall(CI, NewFn); 844 } 845 // Remove old function, no longer used, from the module. 846 F->eraseFromParent(); 847 } 848 } 849 850 void llvm::UpgradeInstWithTBAATag(Instruction *I) { 851 MDNode *MD = I->getMetadata(LLVMContext::MD_tbaa); 852 assert(MD && "UpgradeInstWithTBAATag should have a TBAA tag"); 853 // Check if the tag uses struct-path aware TBAA format. 854 if (isa<MDNode>(MD->getOperand(0)) && MD->getNumOperands() >= 3) 855 return; 856 857 if (MD->getNumOperands() == 3) { 858 Metadata *Elts[] = {MD->getOperand(0), MD->getOperand(1)}; 859 MDNode *ScalarType = MDNode::get(I->getContext(), Elts); 860 // Create a MDNode <ScalarType, ScalarType, offset 0, const> 861 Metadata *Elts2[] = {ScalarType, ScalarType, 862 ConstantAsMetadata::get(Constant::getNullValue( 863 Type::getInt64Ty(I->getContext()))), 864 MD->getOperand(2)}; 865 I->setMetadata(LLVMContext::MD_tbaa, MDNode::get(I->getContext(), Elts2)); 866 } else { 867 // Create a MDNode <MD, MD, offset 0> 868 Metadata *Elts[] = {MD, MD, ConstantAsMetadata::get(Constant::getNullValue( 869 Type::getInt64Ty(I->getContext())))}; 870 I->setMetadata(LLVMContext::MD_tbaa, MDNode::get(I->getContext(), Elts)); 871 } 872 } 873 874 Instruction *llvm::UpgradeBitCastInst(unsigned Opc, Value *V, Type *DestTy, 875 Instruction *&Temp) { 876 if (Opc != Instruction::BitCast) 877 return nullptr; 878 879 Temp = nullptr; 880 Type *SrcTy = V->getType(); 881 if (SrcTy->isPtrOrPtrVectorTy() && DestTy->isPtrOrPtrVectorTy() && 882 SrcTy->getPointerAddressSpace() != DestTy->getPointerAddressSpace()) { 883 LLVMContext &Context = V->getContext(); 884 885 // We have no information about target data layout, so we assume that 886 // the maximum pointer size is 64bit. 887 Type *MidTy = Type::getInt64Ty(Context); 888 Temp = CastInst::Create(Instruction::PtrToInt, V, MidTy); 889 890 return CastInst::Create(Instruction::IntToPtr, Temp, DestTy); 891 } 892 893 return nullptr; 894 } 895 896 Value *llvm::UpgradeBitCastExpr(unsigned Opc, Constant *C, Type *DestTy) { 897 if (Opc != Instruction::BitCast) 898 return nullptr; 899 900 Type *SrcTy = C->getType(); 901 if (SrcTy->isPtrOrPtrVectorTy() && DestTy->isPtrOrPtrVectorTy() && 902 SrcTy->getPointerAddressSpace() != DestTy->getPointerAddressSpace()) { 903 LLVMContext &Context = C->getContext(); 904 905 // We have no information about target data layout, so we assume that 906 // the maximum pointer size is 64bit. 907 Type *MidTy = Type::getInt64Ty(Context); 908 909 return ConstantExpr::getIntToPtr(ConstantExpr::getPtrToInt(C, MidTy), 910 DestTy); 911 } 912 913 return nullptr; 914 } 915 916 /// Check the debug info version number, if it is out-dated, drop the debug 917 /// info. Return true if module is modified. 918 bool llvm::UpgradeDebugInfo(Module &M) { 919 unsigned Version = getDebugMetadataVersionFromModule(M); 920 if (Version == DEBUG_METADATA_VERSION) 921 return false; 922 923 bool RetCode = StripDebugInfo(M); 924 if (RetCode) { 925 DiagnosticInfoDebugMetadataVersion DiagVersion(M, Version); 926 M.getContext().diagnose(DiagVersion); 927 } 928 return RetCode; 929 } 930 931 void llvm::UpgradeMDStringConstant(std::string &String) { 932 const std::string OldPrefix = "llvm.vectorizer."; 933 if (String == "llvm.vectorizer.unroll") { 934 String = "llvm.loop.interleave.count"; 935 } else if (String.find(OldPrefix) == 0) { 936 String.replace(0, OldPrefix.size(), "llvm.loop.vectorize."); 937 } 938 } 939