1 //===-- AutoUpgrade.cpp - Implement auto-upgrade helper functions ---------===// 2 // 3 // The LLVM Compiler Infrastructure 4 // 5 // This file is distributed under the University of Illinois Open Source 6 // License. See LICENSE.TXT for details. 7 // 8 //===----------------------------------------------------------------------===// 9 // 10 // This file implements the auto-upgrade helper functions. 11 // This is where deprecated IR intrinsics and other IR features are updated to 12 // current specifications. 13 // 14 //===----------------------------------------------------------------------===// 15 16 #include "llvm/IR/AutoUpgrade.h" 17 #include "llvm/IR/CFG.h" 18 #include "llvm/IR/CallSite.h" 19 #include "llvm/IR/Constants.h" 20 #include "llvm/IR/DIBuilder.h" 21 #include "llvm/IR/DebugInfo.h" 22 #include "llvm/IR/DiagnosticInfo.h" 23 #include "llvm/IR/Function.h" 24 #include "llvm/IR/IRBuilder.h" 25 #include "llvm/IR/Instruction.h" 26 #include "llvm/IR/IntrinsicInst.h" 27 #include "llvm/IR/LLVMContext.h" 28 #include "llvm/IR/Module.h" 29 #include "llvm/Support/ErrorHandling.h" 30 #include <cstring> 31 using namespace llvm; 32 33 // Upgrade the declarations of the SSE4.1 functions whose arguments have 34 // changed their type from v4f32 to v2i64. 35 static bool UpgradeSSE41Function(Function* F, Intrinsic::ID IID, 36 Function *&NewFn) { 37 // Check whether this is an old version of the function, which received 38 // v4f32 arguments. 39 Type *Arg0Type = F->getFunctionType()->getParamType(0); 40 if (Arg0Type != VectorType::get(Type::getFloatTy(F->getContext()), 4)) 41 return false; 42 43 // Yes, it's old, replace it with new version. 44 F->setName(F->getName() + ".old"); 45 NewFn = Intrinsic::getDeclaration(F->getParent(), IID); 46 return true; 47 } 48 49 // Upgrade the declarations of intrinsic functions whose 8-bit immediate mask 50 // arguments have changed their type from i32 to i8. 51 static bool UpgradeX86IntrinsicsWith8BitMask(Function *F, Intrinsic::ID IID, 52 Function *&NewFn) { 53 // Check that the last argument is an i32. 54 Type *LastArgType = F->getFunctionType()->getParamType( 55 F->getFunctionType()->getNumParams() - 1); 56 if (!LastArgType->isIntegerTy(32)) 57 return false; 58 59 // Move this function aside and map down. 60 F->setName(F->getName() + ".old"); 61 NewFn = Intrinsic::getDeclaration(F->getParent(), IID); 62 return true; 63 } 64 65 static bool UpgradeIntrinsicFunction1(Function *F, Function *&NewFn) { 66 assert(F && "Illegal to upgrade a non-existent Function."); 67 68 // Quickly eliminate it, if it's not a candidate. 69 StringRef Name = F->getName(); 70 if (Name.size() <= 8 || !Name.startswith("llvm.")) 71 return false; 72 Name = Name.substr(5); // Strip off "llvm." 73 74 switch (Name[0]) { 75 default: break; 76 case 'a': { 77 if (Name.startswith("arm.neon.vclz")) { 78 Type* args[2] = { 79 F->arg_begin()->getType(), 80 Type::getInt1Ty(F->getContext()) 81 }; 82 // Can't use Intrinsic::getDeclaration here as it adds a ".i1" to 83 // the end of the name. Change name from llvm.arm.neon.vclz.* to 84 // llvm.ctlz.* 85 FunctionType* fType = FunctionType::get(F->getReturnType(), args, false); 86 NewFn = Function::Create(fType, F->getLinkage(), 87 "llvm.ctlz." + Name.substr(14), F->getParent()); 88 return true; 89 } 90 if (Name.startswith("arm.neon.vcnt")) { 91 NewFn = Intrinsic::getDeclaration(F->getParent(), Intrinsic::ctpop, 92 F->arg_begin()->getType()); 93 return true; 94 } 95 break; 96 } 97 case 'c': { 98 if (Name.startswith("ctlz.") && F->arg_size() == 1) { 99 F->setName(Name + ".old"); 100 NewFn = Intrinsic::getDeclaration(F->getParent(), Intrinsic::ctlz, 101 F->arg_begin()->getType()); 102 return true; 103 } 104 if (Name.startswith("cttz.") && F->arg_size() == 1) { 105 F->setName(Name + ".old"); 106 NewFn = Intrinsic::getDeclaration(F->getParent(), Intrinsic::cttz, 107 F->arg_begin()->getType()); 108 return true; 109 } 110 break; 111 } 112 113 case 'o': 114 // We only need to change the name to match the mangling including the 115 // address space. 116 if (F->arg_size() == 2 && Name.startswith("objectsize.")) { 117 Type *Tys[2] = { F->getReturnType(), F->arg_begin()->getType() }; 118 if (F->getName() != Intrinsic::getName(Intrinsic::objectsize, Tys)) { 119 F->setName(Name + ".old"); 120 NewFn = Intrinsic::getDeclaration(F->getParent(), 121 Intrinsic::objectsize, Tys); 122 return true; 123 } 124 } 125 break; 126 127 case 'x': { 128 if (Name.startswith("x86.sse2.pcmpeq.") || 129 Name.startswith("x86.sse2.pcmpgt.") || 130 Name.startswith("x86.avx2.pcmpeq.") || 131 Name.startswith("x86.avx2.pcmpgt.") || 132 Name.startswith("x86.avx2.vbroadcast") || 133 Name.startswith("x86.avx2.pbroadcast") || 134 Name.startswith("x86.avx.vpermil.") || 135 Name == "x86.avx.vinsertf128.pd.256" || 136 Name == "x86.avx.vinsertf128.ps.256" || 137 Name == "x86.avx.vinsertf128.si.256" || 138 Name == "x86.avx2.vinserti128" || 139 Name == "x86.avx.vextractf128.pd.256" || 140 Name == "x86.avx.vextractf128.ps.256" || 141 Name == "x86.avx.vextractf128.si.256" || 142 Name == "x86.avx2.vextracti128" || 143 Name == "x86.avx.movnt.dq.256" || 144 Name == "x86.avx.movnt.pd.256" || 145 Name == "x86.avx.movnt.ps.256" || 146 Name == "x86.sse42.crc32.64.8" || 147 Name == "x86.avx.vbroadcast.ss" || 148 Name == "x86.avx.vbroadcast.ss.256" || 149 Name == "x86.avx.vbroadcast.sd.256" || 150 Name == "x86.sse2.psll.dq" || 151 Name == "x86.sse2.psrl.dq" || 152 Name == "x86.avx2.psll.dq" || 153 Name == "x86.avx2.psrl.dq" || 154 Name == "x86.sse2.psll.dq.bs" || 155 Name == "x86.sse2.psrl.dq.bs" || 156 Name == "x86.avx2.psll.dq.bs" || 157 Name == "x86.avx2.psrl.dq.bs" || 158 Name == "x86.sse41.pblendw" || 159 Name == "x86.sse41.blendpd" || 160 Name == "x86.sse41.blendps" || 161 Name == "x86.avx.blend.pd.256" || 162 Name == "x86.avx.blend.ps.256" || 163 Name == "x86.avx2.pblendw" || 164 Name == "x86.avx2.pblendd.128" || 165 Name == "x86.avx2.pblendd.256" || 166 Name == "x86.avx2.vbroadcasti128" || 167 (Name.startswith("x86.xop.vpcom") && F->arg_size() == 2)) { 168 NewFn = nullptr; 169 return true; 170 } 171 // SSE4.1 ptest functions may have an old signature. 172 if (Name.startswith("x86.sse41.ptest")) { 173 if (Name == "x86.sse41.ptestc") 174 return UpgradeSSE41Function(F, Intrinsic::x86_sse41_ptestc, NewFn); 175 if (Name == "x86.sse41.ptestz") 176 return UpgradeSSE41Function(F, Intrinsic::x86_sse41_ptestz, NewFn); 177 if (Name == "x86.sse41.ptestnzc") 178 return UpgradeSSE41Function(F, Intrinsic::x86_sse41_ptestnzc, NewFn); 179 } 180 // Several blend and other instructions with masks used the wrong number of 181 // bits. 182 if (Name == "x86.sse41.insertps") 183 return UpgradeX86IntrinsicsWith8BitMask(F, Intrinsic::x86_sse41_insertps, 184 NewFn); 185 if (Name == "x86.sse41.dppd") 186 return UpgradeX86IntrinsicsWith8BitMask(F, Intrinsic::x86_sse41_dppd, 187 NewFn); 188 if (Name == "x86.sse41.dpps") 189 return UpgradeX86IntrinsicsWith8BitMask(F, Intrinsic::x86_sse41_dpps, 190 NewFn); 191 if (Name == "x86.sse41.mpsadbw") 192 return UpgradeX86IntrinsicsWith8BitMask(F, Intrinsic::x86_sse41_mpsadbw, 193 NewFn); 194 if (Name == "x86.avx.dp.ps.256") 195 return UpgradeX86IntrinsicsWith8BitMask(F, Intrinsic::x86_avx_dp_ps_256, 196 NewFn); 197 if (Name == "x86.avx2.mpsadbw") 198 return UpgradeX86IntrinsicsWith8BitMask(F, Intrinsic::x86_avx2_mpsadbw, 199 NewFn); 200 201 // frcz.ss/sd may need to have an argument dropped 202 if (Name.startswith("x86.xop.vfrcz.ss") && F->arg_size() == 2) { 203 F->setName(Name + ".old"); 204 NewFn = Intrinsic::getDeclaration(F->getParent(), 205 Intrinsic::x86_xop_vfrcz_ss); 206 return true; 207 } 208 if (Name.startswith("x86.xop.vfrcz.sd") && F->arg_size() == 2) { 209 F->setName(Name + ".old"); 210 NewFn = Intrinsic::getDeclaration(F->getParent(), 211 Intrinsic::x86_xop_vfrcz_sd); 212 return true; 213 } 214 // Fix the FMA4 intrinsics to remove the 4 215 if (Name.startswith("x86.fma4.")) { 216 F->setName("llvm.x86.fma" + Name.substr(8)); 217 NewFn = F; 218 return true; 219 } 220 break; 221 } 222 } 223 224 // This may not belong here. This function is effectively being overloaded 225 // to both detect an intrinsic which needs upgrading, and to provide the 226 // upgraded form of the intrinsic. We should perhaps have two separate 227 // functions for this. 228 return false; 229 } 230 231 bool llvm::UpgradeIntrinsicFunction(Function *F, Function *&NewFn) { 232 NewFn = nullptr; 233 bool Upgraded = UpgradeIntrinsicFunction1(F, NewFn); 234 assert(F != NewFn && "Intrinsic function upgraded to the same function"); 235 236 // Upgrade intrinsic attributes. This does not change the function. 237 if (NewFn) 238 F = NewFn; 239 if (Intrinsic::ID id = F->getIntrinsicID()) 240 F->setAttributes(Intrinsic::getAttributes(F->getContext(), id)); 241 return Upgraded; 242 } 243 244 bool llvm::UpgradeGlobalVariable(GlobalVariable *GV) { 245 // Nothing to do yet. 246 return false; 247 } 248 249 // Handles upgrading SSE2 and AVX2 PSLLDQ intrinsics by converting them 250 // to byte shuffles. 251 static Value *UpgradeX86PSLLDQIntrinsics(IRBuilder<> &Builder, LLVMContext &C, 252 Value *Op, unsigned NumLanes, 253 unsigned Shift) { 254 // Each lane is 16 bytes. 255 unsigned NumElts = NumLanes * 16; 256 257 // Bitcast from a 64-bit element type to a byte element type. 258 Op = Builder.CreateBitCast(Op, 259 VectorType::get(Type::getInt8Ty(C), NumElts), 260 "cast"); 261 // We'll be shuffling in zeroes. 262 Value *Res = ConstantVector::getSplat(NumElts, Builder.getInt8(0)); 263 264 // If shift is less than 16, emit a shuffle to move the bytes. Otherwise, 265 // we'll just return the zero vector. 266 if (Shift < 16) { 267 SmallVector<Constant*, 32> Idxs; 268 // 256-bit version is split into two 16-byte lanes. 269 for (unsigned l = 0; l != NumElts; l += 16) 270 for (unsigned i = 0; i != 16; ++i) { 271 unsigned Idx = NumElts + i - Shift; 272 if (Idx < NumElts) 273 Idx -= NumElts - 16; // end of lane, switch operand. 274 Idxs.push_back(Builder.getInt32(Idx + l)); 275 } 276 277 Res = Builder.CreateShuffleVector(Res, Op, ConstantVector::get(Idxs)); 278 } 279 280 // Bitcast back to a 64-bit element type. 281 return Builder.CreateBitCast(Res, 282 VectorType::get(Type::getInt64Ty(C), 2*NumLanes), 283 "cast"); 284 } 285 286 // Handles upgrading SSE2 and AVX2 PSRLDQ intrinsics by converting them 287 // to byte shuffles. 288 static Value *UpgradeX86PSRLDQIntrinsics(IRBuilder<> &Builder, LLVMContext &C, 289 Value *Op, unsigned NumLanes, 290 unsigned Shift) { 291 // Each lane is 16 bytes. 292 unsigned NumElts = NumLanes * 16; 293 294 // Bitcast from a 64-bit element type to a byte element type. 295 Op = Builder.CreateBitCast(Op, 296 VectorType::get(Type::getInt8Ty(C), NumElts), 297 "cast"); 298 // We'll be shuffling in zeroes. 299 Value *Res = ConstantVector::getSplat(NumElts, Builder.getInt8(0)); 300 301 // If shift is less than 16, emit a shuffle to move the bytes. Otherwise, 302 // we'll just return the zero vector. 303 if (Shift < 16) { 304 SmallVector<Constant*, 32> Idxs; 305 // 256-bit version is split into two 16-byte lanes. 306 for (unsigned l = 0; l != NumElts; l += 16) 307 for (unsigned i = 0; i != 16; ++i) { 308 unsigned Idx = i + Shift; 309 if (Idx >= 16) 310 Idx += NumElts - 16; // end of lane, switch operand. 311 Idxs.push_back(Builder.getInt32(Idx + l)); 312 } 313 314 Res = Builder.CreateShuffleVector(Op, Res, ConstantVector::get(Idxs)); 315 } 316 317 // Bitcast back to a 64-bit element type. 318 return Builder.CreateBitCast(Res, 319 VectorType::get(Type::getInt64Ty(C), 2*NumLanes), 320 "cast"); 321 } 322 323 // UpgradeIntrinsicCall - Upgrade a call to an old intrinsic to be a call the 324 // upgraded intrinsic. All argument and return casting must be provided in 325 // order to seamlessly integrate with existing context. 326 void llvm::UpgradeIntrinsicCall(CallInst *CI, Function *NewFn) { 327 Function *F = CI->getCalledFunction(); 328 LLVMContext &C = CI->getContext(); 329 IRBuilder<> Builder(C); 330 Builder.SetInsertPoint(CI->getParent(), CI); 331 332 assert(F && "Intrinsic call is not direct?"); 333 334 if (!NewFn) { 335 // Get the Function's name. 336 StringRef Name = F->getName(); 337 338 Value *Rep; 339 // Upgrade packed integer vector compares intrinsics to compare instructions 340 if (Name.startswith("llvm.x86.sse2.pcmpeq.") || 341 Name.startswith("llvm.x86.avx2.pcmpeq.")) { 342 Rep = Builder.CreateICmpEQ(CI->getArgOperand(0), CI->getArgOperand(1), 343 "pcmpeq"); 344 // need to sign extend since icmp returns vector of i1 345 Rep = Builder.CreateSExt(Rep, CI->getType(), ""); 346 } else if (Name.startswith("llvm.x86.sse2.pcmpgt.") || 347 Name.startswith("llvm.x86.avx2.pcmpgt.")) { 348 Rep = Builder.CreateICmpSGT(CI->getArgOperand(0), CI->getArgOperand(1), 349 "pcmpgt"); 350 // need to sign extend since icmp returns vector of i1 351 Rep = Builder.CreateSExt(Rep, CI->getType(), ""); 352 } else if (Name == "llvm.x86.avx.movnt.dq.256" || 353 Name == "llvm.x86.avx.movnt.ps.256" || 354 Name == "llvm.x86.avx.movnt.pd.256") { 355 IRBuilder<> Builder(C); 356 Builder.SetInsertPoint(CI->getParent(), CI); 357 358 Module *M = F->getParent(); 359 SmallVector<Metadata *, 1> Elts; 360 Elts.push_back( 361 ConstantAsMetadata::get(ConstantInt::get(Type::getInt32Ty(C), 1))); 362 MDNode *Node = MDNode::get(C, Elts); 363 364 Value *Arg0 = CI->getArgOperand(0); 365 Value *Arg1 = CI->getArgOperand(1); 366 367 // Convert the type of the pointer to a pointer to the stored type. 368 Value *BC = Builder.CreateBitCast(Arg0, 369 PointerType::getUnqual(Arg1->getType()), 370 "cast"); 371 StoreInst *SI = Builder.CreateStore(Arg1, BC); 372 SI->setMetadata(M->getMDKindID("nontemporal"), Node); 373 SI->setAlignment(32); 374 375 // Remove intrinsic. 376 CI->eraseFromParent(); 377 return; 378 } else if (Name.startswith("llvm.x86.xop.vpcom")) { 379 Intrinsic::ID intID; 380 if (Name.endswith("ub")) 381 intID = Intrinsic::x86_xop_vpcomub; 382 else if (Name.endswith("uw")) 383 intID = Intrinsic::x86_xop_vpcomuw; 384 else if (Name.endswith("ud")) 385 intID = Intrinsic::x86_xop_vpcomud; 386 else if (Name.endswith("uq")) 387 intID = Intrinsic::x86_xop_vpcomuq; 388 else if (Name.endswith("b")) 389 intID = Intrinsic::x86_xop_vpcomb; 390 else if (Name.endswith("w")) 391 intID = Intrinsic::x86_xop_vpcomw; 392 else if (Name.endswith("d")) 393 intID = Intrinsic::x86_xop_vpcomd; 394 else if (Name.endswith("q")) 395 intID = Intrinsic::x86_xop_vpcomq; 396 else 397 llvm_unreachable("Unknown suffix"); 398 399 Name = Name.substr(18); // strip off "llvm.x86.xop.vpcom" 400 unsigned Imm; 401 if (Name.startswith("lt")) 402 Imm = 0; 403 else if (Name.startswith("le")) 404 Imm = 1; 405 else if (Name.startswith("gt")) 406 Imm = 2; 407 else if (Name.startswith("ge")) 408 Imm = 3; 409 else if (Name.startswith("eq")) 410 Imm = 4; 411 else if (Name.startswith("ne")) 412 Imm = 5; 413 else if (Name.startswith("false")) 414 Imm = 6; 415 else if (Name.startswith("true")) 416 Imm = 7; 417 else 418 llvm_unreachable("Unknown condition"); 419 420 Function *VPCOM = Intrinsic::getDeclaration(F->getParent(), intID); 421 Rep = 422 Builder.CreateCall(VPCOM, {CI->getArgOperand(0), CI->getArgOperand(1), 423 Builder.getInt8(Imm)}); 424 } else if (Name == "llvm.x86.sse42.crc32.64.8") { 425 Function *CRC32 = Intrinsic::getDeclaration(F->getParent(), 426 Intrinsic::x86_sse42_crc32_32_8); 427 Value *Trunc0 = Builder.CreateTrunc(CI->getArgOperand(0), Type::getInt32Ty(C)); 428 Rep = Builder.CreateCall(CRC32, {Trunc0, CI->getArgOperand(1)}); 429 Rep = Builder.CreateZExt(Rep, CI->getType(), ""); 430 } else if (Name.startswith("llvm.x86.avx.vbroadcast")) { 431 // Replace broadcasts with a series of insertelements. 432 Type *VecTy = CI->getType(); 433 Type *EltTy = VecTy->getVectorElementType(); 434 unsigned EltNum = VecTy->getVectorNumElements(); 435 Value *Cast = Builder.CreateBitCast(CI->getArgOperand(0), 436 EltTy->getPointerTo()); 437 Value *Load = Builder.CreateLoad(EltTy, Cast); 438 Type *I32Ty = Type::getInt32Ty(C); 439 Rep = UndefValue::get(VecTy); 440 for (unsigned I = 0; I < EltNum; ++I) 441 Rep = Builder.CreateInsertElement(Rep, Load, 442 ConstantInt::get(I32Ty, I)); 443 } else if (Name == "llvm.x86.avx2.vbroadcasti128") { 444 // Replace vbroadcasts with a vector shuffle. 445 Type *VT = VectorType::get(Type::getInt64Ty(C), 2); 446 Value *Op = Builder.CreatePointerCast(CI->getArgOperand(0), 447 PointerType::getUnqual(VT)); 448 Value *Load = Builder.CreateLoad(VT, Op); 449 const int Idxs[4] = { 0, 1, 0, 1 }; 450 Rep = Builder.CreateShuffleVector(Load, UndefValue::get(Load->getType()), 451 Idxs); 452 } else if (Name.startswith("llvm.x86.avx2.pbroadcast") || 453 Name.startswith("llvm.x86.avx2.vbroadcast")) { 454 // Replace vp?broadcasts with a vector shuffle. 455 Value *Op = CI->getArgOperand(0); 456 unsigned NumElts = CI->getType()->getVectorNumElements(); 457 Type *MaskTy = VectorType::get(Type::getInt32Ty(C), NumElts); 458 Rep = Builder.CreateShuffleVector(Op, UndefValue::get(Op->getType()), 459 Constant::getNullValue(MaskTy)); 460 } else if (Name == "llvm.x86.sse2.psll.dq") { 461 // 128-bit shift left specified in bits. 462 unsigned Shift = cast<ConstantInt>(CI->getArgOperand(1))->getZExtValue(); 463 Rep = UpgradeX86PSLLDQIntrinsics(Builder, C, CI->getArgOperand(0), 1, 464 Shift / 8); // Shift is in bits. 465 } else if (Name == "llvm.x86.sse2.psrl.dq") { 466 // 128-bit shift right specified in bits. 467 unsigned Shift = cast<ConstantInt>(CI->getArgOperand(1))->getZExtValue(); 468 Rep = UpgradeX86PSRLDQIntrinsics(Builder, C, CI->getArgOperand(0), 1, 469 Shift / 8); // Shift is in bits. 470 } else if (Name == "llvm.x86.avx2.psll.dq") { 471 // 256-bit shift left specified in bits. 472 unsigned Shift = cast<ConstantInt>(CI->getArgOperand(1))->getZExtValue(); 473 Rep = UpgradeX86PSLLDQIntrinsics(Builder, C, CI->getArgOperand(0), 2, 474 Shift / 8); // Shift is in bits. 475 } else if (Name == "llvm.x86.avx2.psrl.dq") { 476 // 256-bit shift right specified in bits. 477 unsigned Shift = cast<ConstantInt>(CI->getArgOperand(1))->getZExtValue(); 478 Rep = UpgradeX86PSRLDQIntrinsics(Builder, C, CI->getArgOperand(0), 2, 479 Shift / 8); // Shift is in bits. 480 } else if (Name == "llvm.x86.sse2.psll.dq.bs") { 481 // 128-bit shift left specified in bytes. 482 unsigned Shift = cast<ConstantInt>(CI->getArgOperand(1))->getZExtValue(); 483 Rep = UpgradeX86PSLLDQIntrinsics(Builder, C, CI->getArgOperand(0), 1, 484 Shift); 485 } else if (Name == "llvm.x86.sse2.psrl.dq.bs") { 486 // 128-bit shift right specified in bytes. 487 unsigned Shift = cast<ConstantInt>(CI->getArgOperand(1))->getZExtValue(); 488 Rep = UpgradeX86PSRLDQIntrinsics(Builder, C, CI->getArgOperand(0), 1, 489 Shift); 490 } else if (Name == "llvm.x86.avx2.psll.dq.bs") { 491 // 256-bit shift left specified in bytes. 492 unsigned Shift = cast<ConstantInt>(CI->getArgOperand(1))->getZExtValue(); 493 Rep = UpgradeX86PSLLDQIntrinsics(Builder, C, CI->getArgOperand(0), 2, 494 Shift); 495 } else if (Name == "llvm.x86.avx2.psrl.dq.bs") { 496 // 256-bit shift right specified in bytes. 497 unsigned Shift = cast<ConstantInt>(CI->getArgOperand(1))->getZExtValue(); 498 Rep = UpgradeX86PSRLDQIntrinsics(Builder, C, CI->getArgOperand(0), 2, 499 Shift); 500 } else if (Name == "llvm.x86.sse41.pblendw" || 501 Name == "llvm.x86.sse41.blendpd" || 502 Name == "llvm.x86.sse41.blendps" || 503 Name == "llvm.x86.avx.blend.pd.256" || 504 Name == "llvm.x86.avx.blend.ps.256" || 505 Name == "llvm.x86.avx2.pblendw" || 506 Name == "llvm.x86.avx2.pblendd.128" || 507 Name == "llvm.x86.avx2.pblendd.256") { 508 Value *Op0 = CI->getArgOperand(0); 509 Value *Op1 = CI->getArgOperand(1); 510 unsigned Imm = cast <ConstantInt>(CI->getArgOperand(2))->getZExtValue(); 511 VectorType *VecTy = cast<VectorType>(CI->getType()); 512 unsigned NumElts = VecTy->getNumElements(); 513 514 SmallVector<Constant*, 16> Idxs; 515 for (unsigned i = 0; i != NumElts; ++i) { 516 unsigned Idx = ((Imm >> (i%8)) & 1) ? i + NumElts : i; 517 Idxs.push_back(Builder.getInt32(Idx)); 518 } 519 520 Rep = Builder.CreateShuffleVector(Op0, Op1, ConstantVector::get(Idxs)); 521 } else if (Name == "llvm.x86.avx.vinsertf128.pd.256" || 522 Name == "llvm.x86.avx.vinsertf128.ps.256" || 523 Name == "llvm.x86.avx.vinsertf128.si.256" || 524 Name == "llvm.x86.avx2.vinserti128") { 525 Value *Op0 = CI->getArgOperand(0); 526 Value *Op1 = CI->getArgOperand(1); 527 unsigned Imm = cast<ConstantInt>(CI->getArgOperand(2))->getZExtValue(); 528 VectorType *VecTy = cast<VectorType>(CI->getType()); 529 unsigned NumElts = VecTy->getNumElements(); 530 531 // Mask off the high bits of the immediate value; hardware ignores those. 532 Imm = Imm & 1; 533 534 // Extend the second operand into a vector that is twice as big. 535 Value *UndefV = UndefValue::get(Op1->getType()); 536 SmallVector<Constant*, 8> Idxs; 537 for (unsigned i = 0; i != NumElts; ++i) { 538 Idxs.push_back(Builder.getInt32(i)); 539 } 540 Rep = Builder.CreateShuffleVector(Op1, UndefV, ConstantVector::get(Idxs)); 541 542 // Insert the second operand into the first operand. 543 544 // Note that there is no guarantee that instruction lowering will actually 545 // produce a vinsertf128 instruction for the created shuffles. In 546 // particular, the 0 immediate case involves no lane changes, so it can 547 // be handled as a blend. 548 549 // Example of shuffle mask for 32-bit elements: 550 // Imm = 1 <i32 0, i32 1, i32 2, i32 3, i32 8, i32 9, i32 10, i32 11> 551 // Imm = 0 <i32 8, i32 9, i32 10, i32 11, i32 4, i32 5, i32 6, i32 7 > 552 553 SmallVector<Constant*, 8> Idxs2; 554 // The low half of the result is either the low half of the 1st operand 555 // or the low half of the 2nd operand (the inserted vector). 556 for (unsigned i = 0; i != NumElts / 2; ++i) { 557 unsigned Idx = Imm ? i : (i + NumElts); 558 Idxs2.push_back(Builder.getInt32(Idx)); 559 } 560 // The high half of the result is either the low half of the 2nd operand 561 // (the inserted vector) or the high half of the 1st operand. 562 for (unsigned i = NumElts / 2; i != NumElts; ++i) { 563 unsigned Idx = Imm ? (i + NumElts / 2) : i; 564 Idxs2.push_back(Builder.getInt32(Idx)); 565 } 566 Rep = Builder.CreateShuffleVector(Op0, Rep, ConstantVector::get(Idxs2)); 567 } else if (Name == "llvm.x86.avx.vextractf128.pd.256" || 568 Name == "llvm.x86.avx.vextractf128.ps.256" || 569 Name == "llvm.x86.avx.vextractf128.si.256" || 570 Name == "llvm.x86.avx2.vextracti128") { 571 Value *Op0 = CI->getArgOperand(0); 572 unsigned Imm = cast<ConstantInt>(CI->getArgOperand(1))->getZExtValue(); 573 VectorType *VecTy = cast<VectorType>(CI->getType()); 574 unsigned NumElts = VecTy->getNumElements(); 575 576 // Mask off the high bits of the immediate value; hardware ignores those. 577 Imm = Imm & 1; 578 579 // Get indexes for either the high half or low half of the input vector. 580 SmallVector<Constant*, 4> Idxs(NumElts); 581 for (unsigned i = 0; i != NumElts; ++i) { 582 unsigned Idx = Imm ? (i + NumElts) : i; 583 Idxs[i] = Builder.getInt32(Idx); 584 } 585 586 Value *UndefV = UndefValue::get(Op0->getType()); 587 Rep = Builder.CreateShuffleVector(Op0, UndefV, ConstantVector::get(Idxs)); 588 } else { 589 bool PD128 = false, PD256 = false, PS128 = false, PS256 = false; 590 if (Name == "llvm.x86.avx.vpermil.pd.256") 591 PD256 = true; 592 else if (Name == "llvm.x86.avx.vpermil.pd") 593 PD128 = true; 594 else if (Name == "llvm.x86.avx.vpermil.ps.256") 595 PS256 = true; 596 else if (Name == "llvm.x86.avx.vpermil.ps") 597 PS128 = true; 598 599 if (PD256 || PD128 || PS256 || PS128) { 600 Value *Op0 = CI->getArgOperand(0); 601 unsigned Imm = cast<ConstantInt>(CI->getArgOperand(1))->getZExtValue(); 602 SmallVector<Constant*, 8> Idxs; 603 604 if (PD128) 605 for (unsigned i = 0; i != 2; ++i) 606 Idxs.push_back(Builder.getInt32((Imm >> i) & 0x1)); 607 else if (PD256) 608 for (unsigned l = 0; l != 4; l+=2) 609 for (unsigned i = 0; i != 2; ++i) 610 Idxs.push_back(Builder.getInt32(((Imm >> (l+i)) & 0x1) + l)); 611 else if (PS128) 612 for (unsigned i = 0; i != 4; ++i) 613 Idxs.push_back(Builder.getInt32((Imm >> (2 * i)) & 0x3)); 614 else if (PS256) 615 for (unsigned l = 0; l != 8; l+=4) 616 for (unsigned i = 0; i != 4; ++i) 617 Idxs.push_back(Builder.getInt32(((Imm >> (2 * i)) & 0x3) + l)); 618 else 619 llvm_unreachable("Unexpected function"); 620 621 Rep = Builder.CreateShuffleVector(Op0, Op0, ConstantVector::get(Idxs)); 622 } else { 623 llvm_unreachable("Unknown function for CallInst upgrade."); 624 } 625 } 626 627 CI->replaceAllUsesWith(Rep); 628 CI->eraseFromParent(); 629 return; 630 } 631 632 std::string Name = CI->getName(); 633 if (!Name.empty()) 634 CI->setName(Name + ".old"); 635 636 switch (NewFn->getIntrinsicID()) { 637 default: 638 llvm_unreachable("Unknown function for CallInst upgrade."); 639 640 case Intrinsic::ctlz: 641 case Intrinsic::cttz: 642 assert(CI->getNumArgOperands() == 1 && 643 "Mismatch between function args and call args"); 644 CI->replaceAllUsesWith(Builder.CreateCall( 645 NewFn, {CI->getArgOperand(0), Builder.getFalse()}, Name)); 646 CI->eraseFromParent(); 647 return; 648 649 case Intrinsic::objectsize: 650 CI->replaceAllUsesWith(Builder.CreateCall( 651 NewFn, {CI->getArgOperand(0), CI->getArgOperand(1)}, Name)); 652 CI->eraseFromParent(); 653 return; 654 655 case Intrinsic::ctpop: { 656 CI->replaceAllUsesWith(Builder.CreateCall(NewFn, {CI->getArgOperand(0)})); 657 CI->eraseFromParent(); 658 return; 659 } 660 661 case Intrinsic::x86_xop_vfrcz_ss: 662 case Intrinsic::x86_xop_vfrcz_sd: 663 CI->replaceAllUsesWith( 664 Builder.CreateCall(NewFn, {CI->getArgOperand(1)}, Name)); 665 CI->eraseFromParent(); 666 return; 667 668 case Intrinsic::x86_sse41_ptestc: 669 case Intrinsic::x86_sse41_ptestz: 670 case Intrinsic::x86_sse41_ptestnzc: { 671 // The arguments for these intrinsics used to be v4f32, and changed 672 // to v2i64. This is purely a nop, since those are bitwise intrinsics. 673 // So, the only thing required is a bitcast for both arguments. 674 // First, check the arguments have the old type. 675 Value *Arg0 = CI->getArgOperand(0); 676 if (Arg0->getType() != VectorType::get(Type::getFloatTy(C), 4)) 677 return; 678 679 // Old intrinsic, add bitcasts 680 Value *Arg1 = CI->getArgOperand(1); 681 682 Type *NewVecTy = VectorType::get(Type::getInt64Ty(C), 2); 683 684 Value *BC0 = Builder.CreateBitCast(Arg0, NewVecTy, "cast"); 685 Value *BC1 = Builder.CreateBitCast(Arg1, NewVecTy, "cast"); 686 687 CallInst *NewCall = Builder.CreateCall(NewFn, {BC0, BC1}, Name); 688 CI->replaceAllUsesWith(NewCall); 689 CI->eraseFromParent(); 690 return; 691 } 692 693 case Intrinsic::x86_sse41_insertps: 694 case Intrinsic::x86_sse41_dppd: 695 case Intrinsic::x86_sse41_dpps: 696 case Intrinsic::x86_sse41_mpsadbw: 697 case Intrinsic::x86_avx_dp_ps_256: 698 case Intrinsic::x86_avx2_mpsadbw: { 699 // Need to truncate the last argument from i32 to i8 -- this argument models 700 // an inherently 8-bit immediate operand to these x86 instructions. 701 SmallVector<Value *, 4> Args(CI->arg_operands().begin(), 702 CI->arg_operands().end()); 703 704 // Replace the last argument with a trunc. 705 Args.back() = Builder.CreateTrunc(Args.back(), Type::getInt8Ty(C), "trunc"); 706 707 CallInst *NewCall = Builder.CreateCall(NewFn, Args); 708 CI->replaceAllUsesWith(NewCall); 709 CI->eraseFromParent(); 710 return; 711 } 712 } 713 } 714 715 // This tests each Function to determine if it needs upgrading. When we find 716 // one we are interested in, we then upgrade all calls to reflect the new 717 // function. 718 void llvm::UpgradeCallsToIntrinsic(Function* F) { 719 assert(F && "Illegal attempt to upgrade a non-existent intrinsic."); 720 721 // Upgrade the function and check if it is a totaly new function. 722 Function *NewFn; 723 if (UpgradeIntrinsicFunction(F, NewFn)) { 724 // Replace all uses to the old function with the new one if necessary. 725 for (Value::user_iterator UI = F->user_begin(), UE = F->user_end(); 726 UI != UE;) { 727 if (CallInst *CI = dyn_cast<CallInst>(*UI++)) 728 UpgradeIntrinsicCall(CI, NewFn); 729 } 730 // Remove old function, no longer used, from the module. 731 F->eraseFromParent(); 732 } 733 } 734 735 void llvm::UpgradeInstWithTBAATag(Instruction *I) { 736 MDNode *MD = I->getMetadata(LLVMContext::MD_tbaa); 737 assert(MD && "UpgradeInstWithTBAATag should have a TBAA tag"); 738 // Check if the tag uses struct-path aware TBAA format. 739 if (isa<MDNode>(MD->getOperand(0)) && MD->getNumOperands() >= 3) 740 return; 741 742 if (MD->getNumOperands() == 3) { 743 Metadata *Elts[] = {MD->getOperand(0), MD->getOperand(1)}; 744 MDNode *ScalarType = MDNode::get(I->getContext(), Elts); 745 // Create a MDNode <ScalarType, ScalarType, offset 0, const> 746 Metadata *Elts2[] = {ScalarType, ScalarType, 747 ConstantAsMetadata::get(Constant::getNullValue( 748 Type::getInt64Ty(I->getContext()))), 749 MD->getOperand(2)}; 750 I->setMetadata(LLVMContext::MD_tbaa, MDNode::get(I->getContext(), Elts2)); 751 } else { 752 // Create a MDNode <MD, MD, offset 0> 753 Metadata *Elts[] = {MD, MD, ConstantAsMetadata::get(Constant::getNullValue( 754 Type::getInt64Ty(I->getContext())))}; 755 I->setMetadata(LLVMContext::MD_tbaa, MDNode::get(I->getContext(), Elts)); 756 } 757 } 758 759 Instruction *llvm::UpgradeBitCastInst(unsigned Opc, Value *V, Type *DestTy, 760 Instruction *&Temp) { 761 if (Opc != Instruction::BitCast) 762 return nullptr; 763 764 Temp = nullptr; 765 Type *SrcTy = V->getType(); 766 if (SrcTy->isPtrOrPtrVectorTy() && DestTy->isPtrOrPtrVectorTy() && 767 SrcTy->getPointerAddressSpace() != DestTy->getPointerAddressSpace()) { 768 LLVMContext &Context = V->getContext(); 769 770 // We have no information about target data layout, so we assume that 771 // the maximum pointer size is 64bit. 772 Type *MidTy = Type::getInt64Ty(Context); 773 Temp = CastInst::Create(Instruction::PtrToInt, V, MidTy); 774 775 return CastInst::Create(Instruction::IntToPtr, Temp, DestTy); 776 } 777 778 return nullptr; 779 } 780 781 Value *llvm::UpgradeBitCastExpr(unsigned Opc, Constant *C, Type *DestTy) { 782 if (Opc != Instruction::BitCast) 783 return nullptr; 784 785 Type *SrcTy = C->getType(); 786 if (SrcTy->isPtrOrPtrVectorTy() && DestTy->isPtrOrPtrVectorTy() && 787 SrcTy->getPointerAddressSpace() != DestTy->getPointerAddressSpace()) { 788 LLVMContext &Context = C->getContext(); 789 790 // We have no information about target data layout, so we assume that 791 // the maximum pointer size is 64bit. 792 Type *MidTy = Type::getInt64Ty(Context); 793 794 return ConstantExpr::getIntToPtr(ConstantExpr::getPtrToInt(C, MidTy), 795 DestTy); 796 } 797 798 return nullptr; 799 } 800 801 /// Check the debug info version number, if it is out-dated, drop the debug 802 /// info. Return true if module is modified. 803 bool llvm::UpgradeDebugInfo(Module &M) { 804 unsigned Version = getDebugMetadataVersionFromModule(M); 805 if (Version == DEBUG_METADATA_VERSION) 806 return false; 807 808 bool RetCode = StripDebugInfo(M); 809 if (RetCode) { 810 DiagnosticInfoDebugMetadataVersion DiagVersion(M, Version); 811 M.getContext().diagnose(DiagVersion); 812 } 813 return RetCode; 814 } 815 816 void llvm::UpgradeMDStringConstant(std::string &String) { 817 const std::string OldPrefix = "llvm.vectorizer."; 818 if (String == "llvm.vectorizer.unroll") { 819 String = "llvm.loop.interleave.count"; 820 } else if (String.find(OldPrefix) == 0) { 821 String.replace(0, OldPrefix.size(), "llvm.loop.vectorize."); 822 } 823 } 824