1 //===-- AutoUpgrade.cpp - Implement auto-upgrade helper functions ---------===// 2 // 3 // The LLVM Compiler Infrastructure 4 // 5 // This file is distributed under the University of Illinois Open Source 6 // License. See LICENSE.TXT for details. 7 // 8 //===----------------------------------------------------------------------===// 9 // 10 // This file implements the auto-upgrade helper functions. 11 // This is where deprecated IR intrinsics and other IR features are updated to 12 // current specifications. 13 // 14 //===----------------------------------------------------------------------===// 15 16 #include "llvm/IR/AutoUpgrade.h" 17 #include "llvm/IR/CFG.h" 18 #include "llvm/IR/CallSite.h" 19 #include "llvm/IR/Constants.h" 20 #include "llvm/IR/DIBuilder.h" 21 #include "llvm/IR/DebugInfo.h" 22 #include "llvm/IR/DiagnosticInfo.h" 23 #include "llvm/IR/Function.h" 24 #include "llvm/IR/IRBuilder.h" 25 #include "llvm/IR/Instruction.h" 26 #include "llvm/IR/IntrinsicInst.h" 27 #include "llvm/IR/LLVMContext.h" 28 #include "llvm/IR/Module.h" 29 #include "llvm/Support/ErrorHandling.h" 30 #include <cstring> 31 using namespace llvm; 32 33 // Upgrade the declarations of the SSE4.1 functions whose arguments have 34 // changed their type from v4f32 to v2i64. 35 static bool UpgradeSSE41Function(Function* F, Intrinsic::ID IID, 36 Function *&NewFn) { 37 // Check whether this is an old version of the function, which received 38 // v4f32 arguments. 39 Type *Arg0Type = F->getFunctionType()->getParamType(0); 40 if (Arg0Type != VectorType::get(Type::getFloatTy(F->getContext()), 4)) 41 return false; 42 43 // Yes, it's old, replace it with new version. 44 F->setName(F->getName() + ".old"); 45 NewFn = Intrinsic::getDeclaration(F->getParent(), IID); 46 return true; 47 } 48 49 // Upgrade the declarations of intrinsic functions whose 8-bit immediate mask 50 // arguments have changed their type from i32 to i8. 51 static bool UpgradeX86IntrinsicsWith8BitMask(Function *F, Intrinsic::ID IID, 52 Function *&NewFn) { 53 // Check that the last argument is an i32. 54 Type *LastArgType = F->getFunctionType()->getParamType( 55 F->getFunctionType()->getNumParams() - 1); 56 if (!LastArgType->isIntegerTy(32)) 57 return false; 58 59 // Move this function aside and map down. 60 F->setName(F->getName() + ".old"); 61 NewFn = Intrinsic::getDeclaration(F->getParent(), IID); 62 return true; 63 } 64 65 static bool UpgradeIntrinsicFunction1(Function *F, Function *&NewFn) { 66 assert(F && "Illegal to upgrade a non-existent Function."); 67 68 // Quickly eliminate it, if it's not a candidate. 69 StringRef Name = F->getName(); 70 if (Name.size() <= 8 || !Name.startswith("llvm.")) 71 return false; 72 Name = Name.substr(5); // Strip off "llvm." 73 74 switch (Name[0]) { 75 default: break; 76 case 'a': { 77 if (Name.startswith("arm.neon.vclz")) { 78 Type* args[2] = { 79 F->arg_begin()->getType(), 80 Type::getInt1Ty(F->getContext()) 81 }; 82 // Can't use Intrinsic::getDeclaration here as it adds a ".i1" to 83 // the end of the name. Change name from llvm.arm.neon.vclz.* to 84 // llvm.ctlz.* 85 FunctionType* fType = FunctionType::get(F->getReturnType(), args, false); 86 NewFn = Function::Create(fType, F->getLinkage(), 87 "llvm.ctlz." + Name.substr(14), F->getParent()); 88 return true; 89 } 90 if (Name.startswith("arm.neon.vcnt")) { 91 NewFn = Intrinsic::getDeclaration(F->getParent(), Intrinsic::ctpop, 92 F->arg_begin()->getType()); 93 return true; 94 } 95 break; 96 } 97 case 'c': { 98 if (Name.startswith("ctlz.") && F->arg_size() == 1) { 99 F->setName(Name + ".old"); 100 NewFn = Intrinsic::getDeclaration(F->getParent(), Intrinsic::ctlz, 101 F->arg_begin()->getType()); 102 return true; 103 } 104 if (Name.startswith("cttz.") && F->arg_size() == 1) { 105 F->setName(Name + ".old"); 106 NewFn = Intrinsic::getDeclaration(F->getParent(), Intrinsic::cttz, 107 F->arg_begin()->getType()); 108 return true; 109 } 110 break; 111 } 112 113 case 'o': 114 // We only need to change the name to match the mangling including the 115 // address space. 116 if (F->arg_size() == 2 && Name.startswith("objectsize.")) { 117 Type *Tys[2] = { F->getReturnType(), F->arg_begin()->getType() }; 118 if (F->getName() != Intrinsic::getName(Intrinsic::objectsize, Tys)) { 119 F->setName(Name + ".old"); 120 NewFn = Intrinsic::getDeclaration(F->getParent(), 121 Intrinsic::objectsize, Tys); 122 return true; 123 } 124 } 125 break; 126 127 case 'x': { 128 if (Name.startswith("x86.sse2.pcmpeq.") || 129 Name.startswith("x86.sse2.pcmpgt.") || 130 Name.startswith("x86.avx2.pcmpeq.") || 131 Name.startswith("x86.avx2.pcmpgt.") || 132 Name.startswith("x86.avx.vpermil.") || 133 Name == "x86.avx.vinsertf128.pd.256" || 134 Name == "x86.avx.vinsertf128.ps.256" || 135 Name == "x86.avx.vinsertf128.si.256" || 136 Name == "x86.avx2.vinserti128" || 137 Name == "x86.avx.vextractf128.pd.256" || 138 Name == "x86.avx.vextractf128.ps.256" || 139 Name == "x86.avx.vextractf128.si.256" || 140 Name == "x86.avx2.vextracti128" || 141 Name == "x86.avx.movnt.dq.256" || 142 Name == "x86.avx.movnt.pd.256" || 143 Name == "x86.avx.movnt.ps.256" || 144 Name == "x86.sse42.crc32.64.8" || 145 Name == "x86.avx.vbroadcast.ss" || 146 Name == "x86.avx.vbroadcast.ss.256" || 147 Name == "x86.avx.vbroadcast.sd.256" || 148 Name == "x86.sse2.psll.dq" || 149 Name == "x86.sse2.psrl.dq" || 150 Name == "x86.avx2.psll.dq" || 151 Name == "x86.avx2.psrl.dq" || 152 Name == "x86.sse2.psll.dq.bs" || 153 Name == "x86.sse2.psrl.dq.bs" || 154 Name == "x86.avx2.psll.dq.bs" || 155 Name == "x86.avx2.psrl.dq.bs" || 156 Name == "x86.sse41.pblendw" || 157 Name == "x86.sse41.blendpd" || 158 Name == "x86.sse41.blendps" || 159 Name == "x86.avx.blend.pd.256" || 160 Name == "x86.avx.blend.ps.256" || 161 Name == "x86.avx2.pblendw" || 162 Name == "x86.avx2.pblendd.128" || 163 Name == "x86.avx2.pblendd.256" || 164 Name == "x86.avx2.vbroadcasti128" || 165 (Name.startswith("x86.xop.vpcom") && F->arg_size() == 2)) { 166 NewFn = nullptr; 167 return true; 168 } 169 // SSE4.1 ptest functions may have an old signature. 170 if (Name.startswith("x86.sse41.ptest")) { 171 if (Name == "x86.sse41.ptestc") 172 return UpgradeSSE41Function(F, Intrinsic::x86_sse41_ptestc, NewFn); 173 if (Name == "x86.sse41.ptestz") 174 return UpgradeSSE41Function(F, Intrinsic::x86_sse41_ptestz, NewFn); 175 if (Name == "x86.sse41.ptestnzc") 176 return UpgradeSSE41Function(F, Intrinsic::x86_sse41_ptestnzc, NewFn); 177 } 178 // Several blend and other instructions with masks used the wrong number of 179 // bits. 180 if (Name == "x86.sse41.insertps") 181 return UpgradeX86IntrinsicsWith8BitMask(F, Intrinsic::x86_sse41_insertps, 182 NewFn); 183 if (Name == "x86.sse41.dppd") 184 return UpgradeX86IntrinsicsWith8BitMask(F, Intrinsic::x86_sse41_dppd, 185 NewFn); 186 if (Name == "x86.sse41.dpps") 187 return UpgradeX86IntrinsicsWith8BitMask(F, Intrinsic::x86_sse41_dpps, 188 NewFn); 189 if (Name == "x86.sse41.mpsadbw") 190 return UpgradeX86IntrinsicsWith8BitMask(F, Intrinsic::x86_sse41_mpsadbw, 191 NewFn); 192 if (Name == "x86.avx.dp.ps.256") 193 return UpgradeX86IntrinsicsWith8BitMask(F, Intrinsic::x86_avx_dp_ps_256, 194 NewFn); 195 if (Name == "x86.avx2.mpsadbw") 196 return UpgradeX86IntrinsicsWith8BitMask(F, Intrinsic::x86_avx2_mpsadbw, 197 NewFn); 198 199 // frcz.ss/sd may need to have an argument dropped 200 if (Name.startswith("x86.xop.vfrcz.ss") && F->arg_size() == 2) { 201 F->setName(Name + ".old"); 202 NewFn = Intrinsic::getDeclaration(F->getParent(), 203 Intrinsic::x86_xop_vfrcz_ss); 204 return true; 205 } 206 if (Name.startswith("x86.xop.vfrcz.sd") && F->arg_size() == 2) { 207 F->setName(Name + ".old"); 208 NewFn = Intrinsic::getDeclaration(F->getParent(), 209 Intrinsic::x86_xop_vfrcz_sd); 210 return true; 211 } 212 // Fix the FMA4 intrinsics to remove the 4 213 if (Name.startswith("x86.fma4.")) { 214 F->setName("llvm.x86.fma" + Name.substr(8)); 215 NewFn = F; 216 return true; 217 } 218 break; 219 } 220 } 221 222 // This may not belong here. This function is effectively being overloaded 223 // to both detect an intrinsic which needs upgrading, and to provide the 224 // upgraded form of the intrinsic. We should perhaps have two separate 225 // functions for this. 226 return false; 227 } 228 229 bool llvm::UpgradeIntrinsicFunction(Function *F, Function *&NewFn) { 230 NewFn = nullptr; 231 bool Upgraded = UpgradeIntrinsicFunction1(F, NewFn); 232 assert(F != NewFn && "Intrinsic function upgraded to the same function"); 233 234 // Upgrade intrinsic attributes. This does not change the function. 235 if (NewFn) 236 F = NewFn; 237 if (Intrinsic::ID id = F->getIntrinsicID()) 238 F->setAttributes(Intrinsic::getAttributes(F->getContext(), id)); 239 return Upgraded; 240 } 241 242 bool llvm::UpgradeGlobalVariable(GlobalVariable *GV) { 243 // Nothing to do yet. 244 return false; 245 } 246 247 // Handles upgrading SSE2 and AVX2 PSLLDQ intrinsics by converting them 248 // to byte shuffles. 249 static Value *UpgradeX86PSLLDQIntrinsics(IRBuilder<> &Builder, LLVMContext &C, 250 Value *Op, unsigned NumLanes, 251 unsigned Shift) { 252 // Each lane is 16 bytes. 253 unsigned NumElts = NumLanes * 16; 254 255 // Bitcast from a 64-bit element type to a byte element type. 256 Op = Builder.CreateBitCast(Op, 257 VectorType::get(Type::getInt8Ty(C), NumElts), 258 "cast"); 259 // We'll be shuffling in zeroes. 260 Value *Res = ConstantVector::getSplat(NumElts, Builder.getInt8(0)); 261 262 // If shift is less than 16, emit a shuffle to move the bytes. Otherwise, 263 // we'll just return the zero vector. 264 if (Shift < 16) { 265 SmallVector<Constant*, 32> Idxs; 266 // 256-bit version is split into two 16-byte lanes. 267 for (unsigned l = 0; l != NumElts; l += 16) 268 for (unsigned i = 0; i != 16; ++i) { 269 unsigned Idx = NumElts + i - Shift; 270 if (Idx < NumElts) 271 Idx -= NumElts - 16; // end of lane, switch operand. 272 Idxs.push_back(Builder.getInt32(Idx + l)); 273 } 274 275 Res = Builder.CreateShuffleVector(Res, Op, ConstantVector::get(Idxs)); 276 } 277 278 // Bitcast back to a 64-bit element type. 279 return Builder.CreateBitCast(Res, 280 VectorType::get(Type::getInt64Ty(C), 2*NumLanes), 281 "cast"); 282 } 283 284 // Handles upgrading SSE2 and AVX2 PSRLDQ intrinsics by converting them 285 // to byte shuffles. 286 static Value *UpgradeX86PSRLDQIntrinsics(IRBuilder<> &Builder, LLVMContext &C, 287 Value *Op, unsigned NumLanes, 288 unsigned Shift) { 289 // Each lane is 16 bytes. 290 unsigned NumElts = NumLanes * 16; 291 292 // Bitcast from a 64-bit element type to a byte element type. 293 Op = Builder.CreateBitCast(Op, 294 VectorType::get(Type::getInt8Ty(C), NumElts), 295 "cast"); 296 // We'll be shuffling in zeroes. 297 Value *Res = ConstantVector::getSplat(NumElts, Builder.getInt8(0)); 298 299 // If shift is less than 16, emit a shuffle to move the bytes. Otherwise, 300 // we'll just return the zero vector. 301 if (Shift < 16) { 302 SmallVector<Constant*, 32> Idxs; 303 // 256-bit version is split into two 16-byte lanes. 304 for (unsigned l = 0; l != NumElts; l += 16) 305 for (unsigned i = 0; i != 16; ++i) { 306 unsigned Idx = i + Shift; 307 if (Idx >= 16) 308 Idx += NumElts - 16; // end of lane, switch operand. 309 Idxs.push_back(Builder.getInt32(Idx + l)); 310 } 311 312 Res = Builder.CreateShuffleVector(Op, Res, ConstantVector::get(Idxs)); 313 } 314 315 // Bitcast back to a 64-bit element type. 316 return Builder.CreateBitCast(Res, 317 VectorType::get(Type::getInt64Ty(C), 2*NumLanes), 318 "cast"); 319 } 320 321 // UpgradeIntrinsicCall - Upgrade a call to an old intrinsic to be a call the 322 // upgraded intrinsic. All argument and return casting must be provided in 323 // order to seamlessly integrate with existing context. 324 void llvm::UpgradeIntrinsicCall(CallInst *CI, Function *NewFn) { 325 Function *F = CI->getCalledFunction(); 326 LLVMContext &C = CI->getContext(); 327 IRBuilder<> Builder(C); 328 Builder.SetInsertPoint(CI->getParent(), CI); 329 330 assert(F && "Intrinsic call is not direct?"); 331 332 if (!NewFn) { 333 // Get the Function's name. 334 StringRef Name = F->getName(); 335 336 Value *Rep; 337 // Upgrade packed integer vector compares intrinsics to compare instructions 338 if (Name.startswith("llvm.x86.sse2.pcmpeq.") || 339 Name.startswith("llvm.x86.avx2.pcmpeq.")) { 340 Rep = Builder.CreateICmpEQ(CI->getArgOperand(0), CI->getArgOperand(1), 341 "pcmpeq"); 342 // need to sign extend since icmp returns vector of i1 343 Rep = Builder.CreateSExt(Rep, CI->getType(), ""); 344 } else if (Name.startswith("llvm.x86.sse2.pcmpgt.") || 345 Name.startswith("llvm.x86.avx2.pcmpgt.")) { 346 Rep = Builder.CreateICmpSGT(CI->getArgOperand(0), CI->getArgOperand(1), 347 "pcmpgt"); 348 // need to sign extend since icmp returns vector of i1 349 Rep = Builder.CreateSExt(Rep, CI->getType(), ""); 350 } else if (Name == "llvm.x86.avx.movnt.dq.256" || 351 Name == "llvm.x86.avx.movnt.ps.256" || 352 Name == "llvm.x86.avx.movnt.pd.256") { 353 IRBuilder<> Builder(C); 354 Builder.SetInsertPoint(CI->getParent(), CI); 355 356 Module *M = F->getParent(); 357 SmallVector<Metadata *, 1> Elts; 358 Elts.push_back( 359 ConstantAsMetadata::get(ConstantInt::get(Type::getInt32Ty(C), 1))); 360 MDNode *Node = MDNode::get(C, Elts); 361 362 Value *Arg0 = CI->getArgOperand(0); 363 Value *Arg1 = CI->getArgOperand(1); 364 365 // Convert the type of the pointer to a pointer to the stored type. 366 Value *BC = Builder.CreateBitCast(Arg0, 367 PointerType::getUnqual(Arg1->getType()), 368 "cast"); 369 StoreInst *SI = Builder.CreateStore(Arg1, BC); 370 SI->setMetadata(M->getMDKindID("nontemporal"), Node); 371 SI->setAlignment(16); 372 373 // Remove intrinsic. 374 CI->eraseFromParent(); 375 return; 376 } else if (Name.startswith("llvm.x86.xop.vpcom")) { 377 Intrinsic::ID intID; 378 if (Name.endswith("ub")) 379 intID = Intrinsic::x86_xop_vpcomub; 380 else if (Name.endswith("uw")) 381 intID = Intrinsic::x86_xop_vpcomuw; 382 else if (Name.endswith("ud")) 383 intID = Intrinsic::x86_xop_vpcomud; 384 else if (Name.endswith("uq")) 385 intID = Intrinsic::x86_xop_vpcomuq; 386 else if (Name.endswith("b")) 387 intID = Intrinsic::x86_xop_vpcomb; 388 else if (Name.endswith("w")) 389 intID = Intrinsic::x86_xop_vpcomw; 390 else if (Name.endswith("d")) 391 intID = Intrinsic::x86_xop_vpcomd; 392 else if (Name.endswith("q")) 393 intID = Intrinsic::x86_xop_vpcomq; 394 else 395 llvm_unreachable("Unknown suffix"); 396 397 Name = Name.substr(18); // strip off "llvm.x86.xop.vpcom" 398 unsigned Imm; 399 if (Name.startswith("lt")) 400 Imm = 0; 401 else if (Name.startswith("le")) 402 Imm = 1; 403 else if (Name.startswith("gt")) 404 Imm = 2; 405 else if (Name.startswith("ge")) 406 Imm = 3; 407 else if (Name.startswith("eq")) 408 Imm = 4; 409 else if (Name.startswith("ne")) 410 Imm = 5; 411 else if (Name.startswith("false")) 412 Imm = 6; 413 else if (Name.startswith("true")) 414 Imm = 7; 415 else 416 llvm_unreachable("Unknown condition"); 417 418 Function *VPCOM = Intrinsic::getDeclaration(F->getParent(), intID); 419 Rep = 420 Builder.CreateCall(VPCOM, {CI->getArgOperand(0), CI->getArgOperand(1), 421 Builder.getInt8(Imm)}); 422 } else if (Name == "llvm.x86.sse42.crc32.64.8") { 423 Function *CRC32 = Intrinsic::getDeclaration(F->getParent(), 424 Intrinsic::x86_sse42_crc32_32_8); 425 Value *Trunc0 = Builder.CreateTrunc(CI->getArgOperand(0), Type::getInt32Ty(C)); 426 Rep = Builder.CreateCall(CRC32, {Trunc0, CI->getArgOperand(1)}); 427 Rep = Builder.CreateZExt(Rep, CI->getType(), ""); 428 } else if (Name.startswith("llvm.x86.avx.vbroadcast")) { 429 // Replace broadcasts with a series of insertelements. 430 Type *VecTy = CI->getType(); 431 Type *EltTy = VecTy->getVectorElementType(); 432 unsigned EltNum = VecTy->getVectorNumElements(); 433 Value *Cast = Builder.CreateBitCast(CI->getArgOperand(0), 434 EltTy->getPointerTo()); 435 Value *Load = Builder.CreateLoad(EltTy, Cast); 436 Type *I32Ty = Type::getInt32Ty(C); 437 Rep = UndefValue::get(VecTy); 438 for (unsigned I = 0; I < EltNum; ++I) 439 Rep = Builder.CreateInsertElement(Rep, Load, 440 ConstantInt::get(I32Ty, I)); 441 } else if (Name == "llvm.x86.avx2.vbroadcasti128") { 442 // Replace vbroadcasts with a vector shuffle. 443 Type *VT = VectorType::get(Type::getInt64Ty(C), 2); 444 Value *Op = Builder.CreatePointerCast(CI->getArgOperand(0), 445 PointerType::getUnqual(VT)); 446 Value *Load = Builder.CreateLoad(VT, Op); 447 const int Idxs[4] = { 0, 1, 0, 1 }; 448 Rep = Builder.CreateShuffleVector(Load, UndefValue::get(Load->getType()), 449 Idxs); 450 } else if (Name == "llvm.x86.sse2.psll.dq") { 451 // 128-bit shift left specified in bits. 452 unsigned Shift = cast<ConstantInt>(CI->getArgOperand(1))->getZExtValue(); 453 Rep = UpgradeX86PSLLDQIntrinsics(Builder, C, CI->getArgOperand(0), 1, 454 Shift / 8); // Shift is in bits. 455 } else if (Name == "llvm.x86.sse2.psrl.dq") { 456 // 128-bit shift right specified in bits. 457 unsigned Shift = cast<ConstantInt>(CI->getArgOperand(1))->getZExtValue(); 458 Rep = UpgradeX86PSRLDQIntrinsics(Builder, C, CI->getArgOperand(0), 1, 459 Shift / 8); // Shift is in bits. 460 } else if (Name == "llvm.x86.avx2.psll.dq") { 461 // 256-bit shift left specified in bits. 462 unsigned Shift = cast<ConstantInt>(CI->getArgOperand(1))->getZExtValue(); 463 Rep = UpgradeX86PSLLDQIntrinsics(Builder, C, CI->getArgOperand(0), 2, 464 Shift / 8); // Shift is in bits. 465 } else if (Name == "llvm.x86.avx2.psrl.dq") { 466 // 256-bit shift right specified in bits. 467 unsigned Shift = cast<ConstantInt>(CI->getArgOperand(1))->getZExtValue(); 468 Rep = UpgradeX86PSRLDQIntrinsics(Builder, C, CI->getArgOperand(0), 2, 469 Shift / 8); // Shift is in bits. 470 } else if (Name == "llvm.x86.sse2.psll.dq.bs") { 471 // 128-bit shift left specified in bytes. 472 unsigned Shift = cast<ConstantInt>(CI->getArgOperand(1))->getZExtValue(); 473 Rep = UpgradeX86PSLLDQIntrinsics(Builder, C, CI->getArgOperand(0), 1, 474 Shift); 475 } else if (Name == "llvm.x86.sse2.psrl.dq.bs") { 476 // 128-bit shift right specified in bytes. 477 unsigned Shift = cast<ConstantInt>(CI->getArgOperand(1))->getZExtValue(); 478 Rep = UpgradeX86PSRLDQIntrinsics(Builder, C, CI->getArgOperand(0), 1, 479 Shift); 480 } else if (Name == "llvm.x86.avx2.psll.dq.bs") { 481 // 256-bit shift left specified in bytes. 482 unsigned Shift = cast<ConstantInt>(CI->getArgOperand(1))->getZExtValue(); 483 Rep = UpgradeX86PSLLDQIntrinsics(Builder, C, CI->getArgOperand(0), 2, 484 Shift); 485 } else if (Name == "llvm.x86.avx2.psrl.dq.bs") { 486 // 256-bit shift right specified in bytes. 487 unsigned Shift = cast<ConstantInt>(CI->getArgOperand(1))->getZExtValue(); 488 Rep = UpgradeX86PSRLDQIntrinsics(Builder, C, CI->getArgOperand(0), 2, 489 Shift); 490 } else if (Name == "llvm.x86.sse41.pblendw" || 491 Name == "llvm.x86.sse41.blendpd" || 492 Name == "llvm.x86.sse41.blendps" || 493 Name == "llvm.x86.avx.blend.pd.256" || 494 Name == "llvm.x86.avx.blend.ps.256" || 495 Name == "llvm.x86.avx2.pblendw" || 496 Name == "llvm.x86.avx2.pblendd.128" || 497 Name == "llvm.x86.avx2.pblendd.256") { 498 Value *Op0 = CI->getArgOperand(0); 499 Value *Op1 = CI->getArgOperand(1); 500 unsigned Imm = cast <ConstantInt>(CI->getArgOperand(2))->getZExtValue(); 501 VectorType *VecTy = cast<VectorType>(CI->getType()); 502 unsigned NumElts = VecTy->getNumElements(); 503 504 SmallVector<Constant*, 16> Idxs; 505 for (unsigned i = 0; i != NumElts; ++i) { 506 unsigned Idx = ((Imm >> (i%8)) & 1) ? i + NumElts : i; 507 Idxs.push_back(Builder.getInt32(Idx)); 508 } 509 510 Rep = Builder.CreateShuffleVector(Op0, Op1, ConstantVector::get(Idxs)); 511 } else if (Name == "llvm.x86.avx.vinsertf128.pd.256" || 512 Name == "llvm.x86.avx.vinsertf128.ps.256" || 513 Name == "llvm.x86.avx.vinsertf128.si.256" || 514 Name == "llvm.x86.avx2.vinserti128") { 515 Value *Op0 = CI->getArgOperand(0); 516 Value *Op1 = CI->getArgOperand(1); 517 unsigned Imm = cast<ConstantInt>(CI->getArgOperand(2))->getZExtValue(); 518 VectorType *VecTy = cast<VectorType>(CI->getType()); 519 unsigned NumElts = VecTy->getNumElements(); 520 521 // Mask off the high bits of the immediate value; hardware ignores those. 522 Imm = Imm & 1; 523 524 // Extend the second operand into a vector that is twice as big. 525 Value *UndefV = UndefValue::get(Op1->getType()); 526 SmallVector<Constant*, 8> Idxs; 527 for (unsigned i = 0; i != NumElts; ++i) { 528 Idxs.push_back(Builder.getInt32(i)); 529 } 530 Rep = Builder.CreateShuffleVector(Op1, UndefV, ConstantVector::get(Idxs)); 531 532 // Insert the second operand into the first operand. 533 534 // Note that there is no guarantee that instruction lowering will actually 535 // produce a vinsertf128 instruction for the created shuffles. In 536 // particular, the 0 immediate case involves no lane changes, so it can 537 // be handled as a blend. 538 539 // Example of shuffle mask for 32-bit elements: 540 // Imm = 1 <i32 0, i32 1, i32 2, i32 3, i32 8, i32 9, i32 10, i32 11> 541 // Imm = 0 <i32 8, i32 9, i32 10, i32 11, i32 4, i32 5, i32 6, i32 7 > 542 543 SmallVector<Constant*, 8> Idxs2; 544 // The low half of the result is either the low half of the 1st operand 545 // or the low half of the 2nd operand (the inserted vector). 546 for (unsigned i = 0; i != NumElts / 2; ++i) { 547 unsigned Idx = Imm ? i : (i + NumElts); 548 Idxs2.push_back(Builder.getInt32(Idx)); 549 } 550 // The high half of the result is either the low half of the 2nd operand 551 // (the inserted vector) or the high half of the 1st operand. 552 for (unsigned i = NumElts / 2; i != NumElts; ++i) { 553 unsigned Idx = Imm ? (i + NumElts / 2) : i; 554 Idxs2.push_back(Builder.getInt32(Idx)); 555 } 556 Rep = Builder.CreateShuffleVector(Op0, Rep, ConstantVector::get(Idxs2)); 557 } else if (Name == "llvm.x86.avx.vextractf128.pd.256" || 558 Name == "llvm.x86.avx.vextractf128.ps.256" || 559 Name == "llvm.x86.avx.vextractf128.si.256" || 560 Name == "llvm.x86.avx2.vextracti128") { 561 Value *Op0 = CI->getArgOperand(0); 562 unsigned Imm = cast<ConstantInt>(CI->getArgOperand(1))->getZExtValue(); 563 VectorType *VecTy = cast<VectorType>(CI->getType()); 564 unsigned NumElts = VecTy->getNumElements(); 565 566 // Mask off the high bits of the immediate value; hardware ignores those. 567 Imm = Imm & 1; 568 569 // Get indexes for either the high half or low half of the input vector. 570 SmallVector<Constant*, 4> Idxs(NumElts); 571 for (unsigned i = 0; i != NumElts; ++i) { 572 unsigned Idx = Imm ? (i + NumElts) : i; 573 Idxs[i] = Builder.getInt32(Idx); 574 } 575 576 Value *UndefV = UndefValue::get(Op0->getType()); 577 Rep = Builder.CreateShuffleVector(Op0, UndefV, ConstantVector::get(Idxs)); 578 } else { 579 bool PD128 = false, PD256 = false, PS128 = false, PS256 = false; 580 if (Name == "llvm.x86.avx.vpermil.pd.256") 581 PD256 = true; 582 else if (Name == "llvm.x86.avx.vpermil.pd") 583 PD128 = true; 584 else if (Name == "llvm.x86.avx.vpermil.ps.256") 585 PS256 = true; 586 else if (Name == "llvm.x86.avx.vpermil.ps") 587 PS128 = true; 588 589 if (PD256 || PD128 || PS256 || PS128) { 590 Value *Op0 = CI->getArgOperand(0); 591 unsigned Imm = cast<ConstantInt>(CI->getArgOperand(1))->getZExtValue(); 592 SmallVector<Constant*, 8> Idxs; 593 594 if (PD128) 595 for (unsigned i = 0; i != 2; ++i) 596 Idxs.push_back(Builder.getInt32((Imm >> i) & 0x1)); 597 else if (PD256) 598 for (unsigned l = 0; l != 4; l+=2) 599 for (unsigned i = 0; i != 2; ++i) 600 Idxs.push_back(Builder.getInt32(((Imm >> (l+i)) & 0x1) + l)); 601 else if (PS128) 602 for (unsigned i = 0; i != 4; ++i) 603 Idxs.push_back(Builder.getInt32((Imm >> (2 * i)) & 0x3)); 604 else if (PS256) 605 for (unsigned l = 0; l != 8; l+=4) 606 for (unsigned i = 0; i != 4; ++i) 607 Idxs.push_back(Builder.getInt32(((Imm >> (2 * i)) & 0x3) + l)); 608 else 609 llvm_unreachable("Unexpected function"); 610 611 Rep = Builder.CreateShuffleVector(Op0, Op0, ConstantVector::get(Idxs)); 612 } else { 613 llvm_unreachable("Unknown function for CallInst upgrade."); 614 } 615 } 616 617 CI->replaceAllUsesWith(Rep); 618 CI->eraseFromParent(); 619 return; 620 } 621 622 std::string Name = CI->getName(); 623 if (!Name.empty()) 624 CI->setName(Name + ".old"); 625 626 switch (NewFn->getIntrinsicID()) { 627 default: 628 llvm_unreachable("Unknown function for CallInst upgrade."); 629 630 case Intrinsic::ctlz: 631 case Intrinsic::cttz: 632 assert(CI->getNumArgOperands() == 1 && 633 "Mismatch between function args and call args"); 634 CI->replaceAllUsesWith(Builder.CreateCall( 635 NewFn, {CI->getArgOperand(0), Builder.getFalse()}, Name)); 636 CI->eraseFromParent(); 637 return; 638 639 case Intrinsic::objectsize: 640 CI->replaceAllUsesWith(Builder.CreateCall( 641 NewFn, {CI->getArgOperand(0), CI->getArgOperand(1)}, Name)); 642 CI->eraseFromParent(); 643 return; 644 645 case Intrinsic::ctpop: { 646 CI->replaceAllUsesWith(Builder.CreateCall(NewFn, {CI->getArgOperand(0)})); 647 CI->eraseFromParent(); 648 return; 649 } 650 651 case Intrinsic::x86_xop_vfrcz_ss: 652 case Intrinsic::x86_xop_vfrcz_sd: 653 CI->replaceAllUsesWith( 654 Builder.CreateCall(NewFn, {CI->getArgOperand(1)}, Name)); 655 CI->eraseFromParent(); 656 return; 657 658 case Intrinsic::x86_sse41_ptestc: 659 case Intrinsic::x86_sse41_ptestz: 660 case Intrinsic::x86_sse41_ptestnzc: { 661 // The arguments for these intrinsics used to be v4f32, and changed 662 // to v2i64. This is purely a nop, since those are bitwise intrinsics. 663 // So, the only thing required is a bitcast for both arguments. 664 // First, check the arguments have the old type. 665 Value *Arg0 = CI->getArgOperand(0); 666 if (Arg0->getType() != VectorType::get(Type::getFloatTy(C), 4)) 667 return; 668 669 // Old intrinsic, add bitcasts 670 Value *Arg1 = CI->getArgOperand(1); 671 672 Type *NewVecTy = VectorType::get(Type::getInt64Ty(C), 2); 673 674 Value *BC0 = Builder.CreateBitCast(Arg0, NewVecTy, "cast"); 675 Value *BC1 = Builder.CreateBitCast(Arg1, NewVecTy, "cast"); 676 677 CallInst *NewCall = Builder.CreateCall(NewFn, {BC0, BC1}, Name); 678 CI->replaceAllUsesWith(NewCall); 679 CI->eraseFromParent(); 680 return; 681 } 682 683 case Intrinsic::x86_sse41_insertps: 684 case Intrinsic::x86_sse41_dppd: 685 case Intrinsic::x86_sse41_dpps: 686 case Intrinsic::x86_sse41_mpsadbw: 687 case Intrinsic::x86_avx_dp_ps_256: 688 case Intrinsic::x86_avx2_mpsadbw: { 689 // Need to truncate the last argument from i32 to i8 -- this argument models 690 // an inherently 8-bit immediate operand to these x86 instructions. 691 SmallVector<Value *, 4> Args(CI->arg_operands().begin(), 692 CI->arg_operands().end()); 693 694 // Replace the last argument with a trunc. 695 Args.back() = Builder.CreateTrunc(Args.back(), Type::getInt8Ty(C), "trunc"); 696 697 CallInst *NewCall = Builder.CreateCall(NewFn, Args); 698 CI->replaceAllUsesWith(NewCall); 699 CI->eraseFromParent(); 700 return; 701 } 702 } 703 } 704 705 // This tests each Function to determine if it needs upgrading. When we find 706 // one we are interested in, we then upgrade all calls to reflect the new 707 // function. 708 void llvm::UpgradeCallsToIntrinsic(Function* F) { 709 assert(F && "Illegal attempt to upgrade a non-existent intrinsic."); 710 711 // Upgrade the function and check if it is a totaly new function. 712 Function *NewFn; 713 if (UpgradeIntrinsicFunction(F, NewFn)) { 714 // Replace all uses to the old function with the new one if necessary. 715 for (Value::user_iterator UI = F->user_begin(), UE = F->user_end(); 716 UI != UE;) { 717 if (CallInst *CI = dyn_cast<CallInst>(*UI++)) 718 UpgradeIntrinsicCall(CI, NewFn); 719 } 720 // Remove old function, no longer used, from the module. 721 F->eraseFromParent(); 722 } 723 } 724 725 void llvm::UpgradeInstWithTBAATag(Instruction *I) { 726 MDNode *MD = I->getMetadata(LLVMContext::MD_tbaa); 727 assert(MD && "UpgradeInstWithTBAATag should have a TBAA tag"); 728 // Check if the tag uses struct-path aware TBAA format. 729 if (isa<MDNode>(MD->getOperand(0)) && MD->getNumOperands() >= 3) 730 return; 731 732 if (MD->getNumOperands() == 3) { 733 Metadata *Elts[] = {MD->getOperand(0), MD->getOperand(1)}; 734 MDNode *ScalarType = MDNode::get(I->getContext(), Elts); 735 // Create a MDNode <ScalarType, ScalarType, offset 0, const> 736 Metadata *Elts2[] = {ScalarType, ScalarType, 737 ConstantAsMetadata::get(Constant::getNullValue( 738 Type::getInt64Ty(I->getContext()))), 739 MD->getOperand(2)}; 740 I->setMetadata(LLVMContext::MD_tbaa, MDNode::get(I->getContext(), Elts2)); 741 } else { 742 // Create a MDNode <MD, MD, offset 0> 743 Metadata *Elts[] = {MD, MD, ConstantAsMetadata::get(Constant::getNullValue( 744 Type::getInt64Ty(I->getContext())))}; 745 I->setMetadata(LLVMContext::MD_tbaa, MDNode::get(I->getContext(), Elts)); 746 } 747 } 748 749 Instruction *llvm::UpgradeBitCastInst(unsigned Opc, Value *V, Type *DestTy, 750 Instruction *&Temp) { 751 if (Opc != Instruction::BitCast) 752 return nullptr; 753 754 Temp = nullptr; 755 Type *SrcTy = V->getType(); 756 if (SrcTy->isPtrOrPtrVectorTy() && DestTy->isPtrOrPtrVectorTy() && 757 SrcTy->getPointerAddressSpace() != DestTy->getPointerAddressSpace()) { 758 LLVMContext &Context = V->getContext(); 759 760 // We have no information about target data layout, so we assume that 761 // the maximum pointer size is 64bit. 762 Type *MidTy = Type::getInt64Ty(Context); 763 Temp = CastInst::Create(Instruction::PtrToInt, V, MidTy); 764 765 return CastInst::Create(Instruction::IntToPtr, Temp, DestTy); 766 } 767 768 return nullptr; 769 } 770 771 Value *llvm::UpgradeBitCastExpr(unsigned Opc, Constant *C, Type *DestTy) { 772 if (Opc != Instruction::BitCast) 773 return nullptr; 774 775 Type *SrcTy = C->getType(); 776 if (SrcTy->isPtrOrPtrVectorTy() && DestTy->isPtrOrPtrVectorTy() && 777 SrcTy->getPointerAddressSpace() != DestTy->getPointerAddressSpace()) { 778 LLVMContext &Context = C->getContext(); 779 780 // We have no information about target data layout, so we assume that 781 // the maximum pointer size is 64bit. 782 Type *MidTy = Type::getInt64Ty(Context); 783 784 return ConstantExpr::getIntToPtr(ConstantExpr::getPtrToInt(C, MidTy), 785 DestTy); 786 } 787 788 return nullptr; 789 } 790 791 /// Check the debug info version number, if it is out-dated, drop the debug 792 /// info. Return true if module is modified. 793 bool llvm::UpgradeDebugInfo(Module &M) { 794 unsigned Version = getDebugMetadataVersionFromModule(M); 795 if (Version == DEBUG_METADATA_VERSION) 796 return false; 797 798 bool RetCode = StripDebugInfo(M); 799 if (RetCode) { 800 DiagnosticInfoDebugMetadataVersion DiagVersion(M, Version); 801 M.getContext().diagnose(DiagVersion); 802 } 803 return RetCode; 804 } 805 806 void llvm::UpgradeMDStringConstant(std::string &String) { 807 const std::string OldPrefix = "llvm.vectorizer."; 808 if (String == "llvm.vectorizer.unroll") { 809 String = "llvm.loop.interleave.count"; 810 } else if (String.find(OldPrefix) == 0) { 811 String.replace(0, OldPrefix.size(), "llvm.loop.vectorize."); 812 } 813 } 814