1 //===-- AutoUpgrade.cpp - Implement auto-upgrade helper functions ---------===// 2 // 3 // The LLVM Compiler Infrastructure 4 // 5 // This file is distributed under the University of Illinois Open Source 6 // License. See LICENSE.TXT for details. 7 // 8 //===----------------------------------------------------------------------===// 9 // 10 // This file implements the auto-upgrade helper functions. 11 // This is where deprecated IR intrinsics and other IR features are updated to 12 // current specifications. 13 // 14 //===----------------------------------------------------------------------===// 15 16 #include "llvm/IR/AutoUpgrade.h" 17 #include "llvm/ADT/StringSwitch.h" 18 #include "llvm/IR/Constants.h" 19 #include "llvm/IR/DIBuilder.h" 20 #include "llvm/IR/DebugInfo.h" 21 #include "llvm/IR/DiagnosticInfo.h" 22 #include "llvm/IR/Function.h" 23 #include "llvm/IR/IRBuilder.h" 24 #include "llvm/IR/Instruction.h" 25 #include "llvm/IR/LLVMContext.h" 26 #include "llvm/IR/Module.h" 27 #include "llvm/IR/Verifier.h" 28 #include "llvm/Support/ErrorHandling.h" 29 #include "llvm/Support/Regex.h" 30 #include <cstring> 31 using namespace llvm; 32 33 static void rename(GlobalValue *GV) { GV->setName(GV->getName() + ".old"); } 34 35 // Upgrade the declarations of the SSE4.1 ptest intrinsics whose arguments have 36 // changed their type from v4f32 to v2i64. 37 static bool UpgradePTESTIntrinsic(Function* F, Intrinsic::ID IID, 38 Function *&NewFn) { 39 // Check whether this is an old version of the function, which received 40 // v4f32 arguments. 41 Type *Arg0Type = F->getFunctionType()->getParamType(0); 42 if (Arg0Type != VectorType::get(Type::getFloatTy(F->getContext()), 4)) 43 return false; 44 45 // Yes, it's old, replace it with new version. 46 rename(F); 47 NewFn = Intrinsic::getDeclaration(F->getParent(), IID); 48 return true; 49 } 50 51 // Upgrade the declarations of intrinsic functions whose 8-bit immediate mask 52 // arguments have changed their type from i32 to i8. 53 static bool UpgradeX86IntrinsicsWith8BitMask(Function *F, Intrinsic::ID IID, 54 Function *&NewFn) { 55 // Check that the last argument is an i32. 56 Type *LastArgType = F->getFunctionType()->getParamType( 57 F->getFunctionType()->getNumParams() - 1); 58 if (!LastArgType->isIntegerTy(32)) 59 return false; 60 61 // Move this function aside and map down. 62 rename(F); 63 NewFn = Intrinsic::getDeclaration(F->getParent(), IID); 64 return true; 65 } 66 67 static bool ShouldUpgradeX86Intrinsic(Function *F, StringRef Name) { 68 // All of the intrinsics matches below should be marked with which llvm 69 // version started autoupgrading them. At some point in the future we would 70 // like to use this information to remove upgrade code for some older 71 // intrinsics. It is currently undecided how we will determine that future 72 // point. 73 if (Name=="ssse3.pabs.b.128" || // Added in 6.0 74 Name=="ssse3.pabs.w.128" || // Added in 6.0 75 Name=="ssse3.pabs.d.128" || // Added in 6.0 76 Name.startswith("avx512.mask.shuf.i") || // Added in 6.0 77 Name.startswith("avx512.mask.shuf.f") || // Added in 6.0 78 Name.startswith("avx512.kunpck") || //added in 6.0 79 Name.startswith("avx2.pabs.") || // Added in 6.0 80 Name.startswith("avx512.mask.pabs.") || // Added in 6.0 81 Name.startswith("avx512.broadcastm") || // Added in 6.0 82 Name.startswith("avx512.mask.pbroadcast") || // Added in 6.0 83 Name.startswith("sse2.pcmpeq.") || // Added in 3.1 84 Name.startswith("sse2.pcmpgt.") || // Added in 3.1 85 Name.startswith("avx2.pcmpeq.") || // Added in 3.1 86 Name.startswith("avx2.pcmpgt.") || // Added in 3.1 87 Name.startswith("avx512.mask.pcmpeq.") || // Added in 3.9 88 Name.startswith("avx512.mask.pcmpgt.") || // Added in 3.9 89 Name.startswith("avx.vperm2f128.") || // Added in 6.0 90 Name == "avx2.vperm2i128" || // Added in 6.0 91 Name == "sse.add.ss" || // Added in 4.0 92 Name == "sse2.add.sd" || // Added in 4.0 93 Name == "sse.sub.ss" || // Added in 4.0 94 Name == "sse2.sub.sd" || // Added in 4.0 95 Name == "sse.mul.ss" || // Added in 4.0 96 Name == "sse2.mul.sd" || // Added in 4.0 97 Name == "sse.div.ss" || // Added in 4.0 98 Name == "sse2.div.sd" || // Added in 4.0 99 Name == "sse41.pmaxsb" || // Added in 3.9 100 Name == "sse2.pmaxs.w" || // Added in 3.9 101 Name == "sse41.pmaxsd" || // Added in 3.9 102 Name == "sse2.pmaxu.b" || // Added in 3.9 103 Name == "sse41.pmaxuw" || // Added in 3.9 104 Name == "sse41.pmaxud" || // Added in 3.9 105 Name == "sse41.pminsb" || // Added in 3.9 106 Name == "sse2.pmins.w" || // Added in 3.9 107 Name == "sse41.pminsd" || // Added in 3.9 108 Name == "sse2.pminu.b" || // Added in 3.9 109 Name == "sse41.pminuw" || // Added in 3.9 110 Name == "sse41.pminud" || // Added in 3.9 111 Name.startswith("avx512.mask.pshuf.b.") || // Added in 4.0 112 Name.startswith("avx2.pmax") || // Added in 3.9 113 Name.startswith("avx2.pmin") || // Added in 3.9 114 Name.startswith("avx512.mask.pmax") || // Added in 4.0 115 Name.startswith("avx512.mask.pmin") || // Added in 4.0 116 Name.startswith("avx2.vbroadcast") || // Added in 3.8 117 Name.startswith("avx2.pbroadcast") || // Added in 3.8 118 Name.startswith("avx.vpermil.") || // Added in 3.1 119 Name.startswith("sse2.pshuf") || // Added in 3.9 120 Name.startswith("avx512.pbroadcast") || // Added in 3.9 121 Name.startswith("avx512.mask.broadcast.s") || // Added in 3.9 122 Name.startswith("avx512.mask.movddup") || // Added in 3.9 123 Name.startswith("avx512.mask.movshdup") || // Added in 3.9 124 Name.startswith("avx512.mask.movsldup") || // Added in 3.9 125 Name.startswith("avx512.mask.pshuf.d.") || // Added in 3.9 126 Name.startswith("avx512.mask.pshufl.w.") || // Added in 3.9 127 Name.startswith("avx512.mask.pshufh.w.") || // Added in 3.9 128 Name.startswith("avx512.mask.shuf.p") || // Added in 4.0 129 Name.startswith("avx512.mask.vpermil.p") || // Added in 3.9 130 Name.startswith("avx512.mask.perm.df.") || // Added in 3.9 131 Name.startswith("avx512.mask.perm.di.") || // Added in 3.9 132 Name.startswith("avx512.mask.punpckl") || // Added in 3.9 133 Name.startswith("avx512.mask.punpckh") || // Added in 3.9 134 Name.startswith("avx512.mask.unpckl.") || // Added in 3.9 135 Name.startswith("avx512.mask.unpckh.") || // Added in 3.9 136 Name.startswith("avx512.mask.pand.") || // Added in 3.9 137 Name.startswith("avx512.mask.pandn.") || // Added in 3.9 138 Name.startswith("avx512.mask.por.") || // Added in 3.9 139 Name.startswith("avx512.mask.pxor.") || // Added in 3.9 140 Name.startswith("avx512.mask.and.") || // Added in 3.9 141 Name.startswith("avx512.mask.andn.") || // Added in 3.9 142 Name.startswith("avx512.mask.or.") || // Added in 3.9 143 Name.startswith("avx512.mask.xor.") || // Added in 3.9 144 Name.startswith("avx512.mask.padd.") || // Added in 4.0 145 Name.startswith("avx512.mask.psub.") || // Added in 4.0 146 Name.startswith("avx512.mask.pmull.") || // Added in 4.0 147 Name.startswith("avx512.mask.cvtdq2pd.") || // Added in 4.0 148 Name.startswith("avx512.mask.cvtudq2pd.") || // Added in 4.0 149 Name.startswith("avx512.mask.pmul.dq.") || // Added in 4.0 150 Name.startswith("avx512.mask.pmulu.dq.") || // Added in 4.0 151 Name.startswith("avx512.mask.packsswb.") || // Added in 5.0 152 Name.startswith("avx512.mask.packssdw.") || // Added in 5.0 153 Name.startswith("avx512.mask.packuswb.") || // Added in 5.0 154 Name.startswith("avx512.mask.packusdw.") || // Added in 5.0 155 Name.startswith("avx512.mask.cmp.b") || // Added in 5.0 156 Name.startswith("avx512.mask.cmp.d") || // Added in 5.0 157 Name.startswith("avx512.mask.cmp.q") || // Added in 5.0 158 Name.startswith("avx512.mask.cmp.w") || // Added in 5.0 159 Name.startswith("avx512.mask.ucmp.") || // Added in 5.0 160 Name == "avx512.mask.add.pd.128" || // Added in 4.0 161 Name == "avx512.mask.add.pd.256" || // Added in 4.0 162 Name == "avx512.mask.add.ps.128" || // Added in 4.0 163 Name == "avx512.mask.add.ps.256" || // Added in 4.0 164 Name == "avx512.mask.div.pd.128" || // Added in 4.0 165 Name == "avx512.mask.div.pd.256" || // Added in 4.0 166 Name == "avx512.mask.div.ps.128" || // Added in 4.0 167 Name == "avx512.mask.div.ps.256" || // Added in 4.0 168 Name == "avx512.mask.mul.pd.128" || // Added in 4.0 169 Name == "avx512.mask.mul.pd.256" || // Added in 4.0 170 Name == "avx512.mask.mul.ps.128" || // Added in 4.0 171 Name == "avx512.mask.mul.ps.256" || // Added in 4.0 172 Name == "avx512.mask.sub.pd.128" || // Added in 4.0 173 Name == "avx512.mask.sub.pd.256" || // Added in 4.0 174 Name == "avx512.mask.sub.ps.128" || // Added in 4.0 175 Name == "avx512.mask.sub.ps.256" || // Added in 4.0 176 Name == "avx512.mask.max.pd.128" || // Added in 5.0 177 Name == "avx512.mask.max.pd.256" || // Added in 5.0 178 Name == "avx512.mask.max.ps.128" || // Added in 5.0 179 Name == "avx512.mask.max.ps.256" || // Added in 5.0 180 Name == "avx512.mask.min.pd.128" || // Added in 5.0 181 Name == "avx512.mask.min.pd.256" || // Added in 5.0 182 Name == "avx512.mask.min.ps.128" || // Added in 5.0 183 Name == "avx512.mask.min.ps.256" || // Added in 5.0 184 Name.startswith("avx512.mask.vpermilvar.") || // Added in 4.0 185 Name.startswith("avx512.mask.psll.d") || // Added in 4.0 186 Name.startswith("avx512.mask.psll.q") || // Added in 4.0 187 Name.startswith("avx512.mask.psll.w") || // Added in 4.0 188 Name.startswith("avx512.mask.psra.d") || // Added in 4.0 189 Name.startswith("avx512.mask.psra.q") || // Added in 4.0 190 Name.startswith("avx512.mask.psra.w") || // Added in 4.0 191 Name.startswith("avx512.mask.psrl.d") || // Added in 4.0 192 Name.startswith("avx512.mask.psrl.q") || // Added in 4.0 193 Name.startswith("avx512.mask.psrl.w") || // Added in 4.0 194 Name.startswith("avx512.mask.pslli") || // Added in 4.0 195 Name.startswith("avx512.mask.psrai") || // Added in 4.0 196 Name.startswith("avx512.mask.psrli") || // Added in 4.0 197 Name.startswith("avx512.mask.psllv") || // Added in 4.0 198 Name.startswith("avx512.mask.psrav") || // Added in 4.0 199 Name.startswith("avx512.mask.psrlv") || // Added in 4.0 200 Name.startswith("sse41.pmovsx") || // Added in 3.8 201 Name.startswith("sse41.pmovzx") || // Added in 3.9 202 Name.startswith("avx2.pmovsx") || // Added in 3.9 203 Name.startswith("avx2.pmovzx") || // Added in 3.9 204 Name.startswith("avx512.mask.pmovsx") || // Added in 4.0 205 Name.startswith("avx512.mask.pmovzx") || // Added in 4.0 206 Name.startswith("avx512.mask.lzcnt.") || // Added in 5.0 207 Name == "sse2.cvtdq2pd" || // Added in 3.9 208 Name == "sse2.cvtps2pd" || // Added in 3.9 209 Name == "avx.cvtdq2.pd.256" || // Added in 3.9 210 Name == "avx.cvt.ps2.pd.256" || // Added in 3.9 211 Name.startswith("avx.vinsertf128.") || // Added in 3.7 212 Name == "avx2.vinserti128" || // Added in 3.7 213 Name.startswith("avx512.mask.insert") || // Added in 4.0 214 Name.startswith("avx.vextractf128.") || // Added in 3.7 215 Name == "avx2.vextracti128" || // Added in 3.7 216 Name.startswith("avx512.mask.vextract") || // Added in 4.0 217 Name.startswith("sse4a.movnt.") || // Added in 3.9 218 Name.startswith("avx.movnt.") || // Added in 3.2 219 Name.startswith("avx512.storent.") || // Added in 3.9 220 Name == "sse41.movntdqa" || // Added in 5.0 221 Name == "avx2.movntdqa" || // Added in 5.0 222 Name == "avx512.movntdqa" || // Added in 5.0 223 Name == "sse2.storel.dq" || // Added in 3.9 224 Name.startswith("sse.storeu.") || // Added in 3.9 225 Name.startswith("sse2.storeu.") || // Added in 3.9 226 Name.startswith("avx.storeu.") || // Added in 3.9 227 Name.startswith("avx512.mask.storeu.") || // Added in 3.9 228 Name.startswith("avx512.mask.store.p") || // Added in 3.9 229 Name.startswith("avx512.mask.store.b.") || // Added in 3.9 230 Name.startswith("avx512.mask.store.w.") || // Added in 3.9 231 Name.startswith("avx512.mask.store.d.") || // Added in 3.9 232 Name.startswith("avx512.mask.store.q.") || // Added in 3.9 233 Name.startswith("avx512.mask.loadu.") || // Added in 3.9 234 Name.startswith("avx512.mask.load.") || // Added in 3.9 235 Name == "sse42.crc32.64.8" || // Added in 3.4 236 Name.startswith("avx.vbroadcast.s") || // Added in 3.5 237 Name.startswith("avx512.mask.palignr.") || // Added in 3.9 238 Name.startswith("avx512.mask.valign.") || // Added in 4.0 239 Name.startswith("sse2.psll.dq") || // Added in 3.7 240 Name.startswith("sse2.psrl.dq") || // Added in 3.7 241 Name.startswith("avx2.psll.dq") || // Added in 3.7 242 Name.startswith("avx2.psrl.dq") || // Added in 3.7 243 Name.startswith("avx512.psll.dq") || // Added in 3.9 244 Name.startswith("avx512.psrl.dq") || // Added in 3.9 245 Name == "sse41.pblendw" || // Added in 3.7 246 Name.startswith("sse41.blendp") || // Added in 3.7 247 Name.startswith("avx.blend.p") || // Added in 3.7 248 Name == "avx2.pblendw" || // Added in 3.7 249 Name.startswith("avx2.pblendd.") || // Added in 3.7 250 Name.startswith("avx.vbroadcastf128") || // Added in 4.0 251 Name == "avx2.vbroadcasti128" || // Added in 3.7 252 Name.startswith("avx512.mask.broadcastf") || // Added in 6.0 253 Name.startswith("avx512.mask.broadcasti") || // Added in 6.0 254 Name == "xop.vpcmov" || // Added in 3.8 255 Name == "xop.vpcmov.256" || // Added in 5.0 256 Name.startswith("avx512.mask.move.s") || // Added in 4.0 257 Name.startswith("avx512.cvtmask2") || // Added in 5.0 258 (Name.startswith("xop.vpcom") && // Added in 3.2 259 F->arg_size() == 2) || 260 Name.startswith("avx512.ptestm") || //Added in 6.0 261 Name.startswith("avx512.ptestnm") || //Added in 6.0 262 Name.startswith("sse2.pavg") || // Added in 6.0 263 Name.startswith("avx2.pavg") || // Added in 6.0 264 Name.startswith("avx512.mask.pavg")) // Added in 6.0 265 return true; 266 267 return false; 268 } 269 270 static bool UpgradeX86IntrinsicFunction(Function *F, StringRef Name, 271 Function *&NewFn) { 272 // Only handle intrinsics that start with "x86.". 273 if (!Name.startswith("x86.")) 274 return false; 275 // Remove "x86." prefix. 276 Name = Name.substr(4); 277 278 if (ShouldUpgradeX86Intrinsic(F, Name)) { 279 NewFn = nullptr; 280 return true; 281 } 282 283 // SSE4.1 ptest functions may have an old signature. 284 if (Name.startswith("sse41.ptest")) { // Added in 3.2 285 if (Name.substr(11) == "c") 286 return UpgradePTESTIntrinsic(F, Intrinsic::x86_sse41_ptestc, NewFn); 287 if (Name.substr(11) == "z") 288 return UpgradePTESTIntrinsic(F, Intrinsic::x86_sse41_ptestz, NewFn); 289 if (Name.substr(11) == "nzc") 290 return UpgradePTESTIntrinsic(F, Intrinsic::x86_sse41_ptestnzc, NewFn); 291 } 292 // Several blend and other instructions with masks used the wrong number of 293 // bits. 294 if (Name == "sse41.insertps") // Added in 3.6 295 return UpgradeX86IntrinsicsWith8BitMask(F, Intrinsic::x86_sse41_insertps, 296 NewFn); 297 if (Name == "sse41.dppd") // Added in 3.6 298 return UpgradeX86IntrinsicsWith8BitMask(F, Intrinsic::x86_sse41_dppd, 299 NewFn); 300 if (Name == "sse41.dpps") // Added in 3.6 301 return UpgradeX86IntrinsicsWith8BitMask(F, Intrinsic::x86_sse41_dpps, 302 NewFn); 303 if (Name == "sse41.mpsadbw") // Added in 3.6 304 return UpgradeX86IntrinsicsWith8BitMask(F, Intrinsic::x86_sse41_mpsadbw, 305 NewFn); 306 if (Name == "avx.dp.ps.256") // Added in 3.6 307 return UpgradeX86IntrinsicsWith8BitMask(F, Intrinsic::x86_avx_dp_ps_256, 308 NewFn); 309 if (Name == "avx2.mpsadbw") // Added in 3.6 310 return UpgradeX86IntrinsicsWith8BitMask(F, Intrinsic::x86_avx2_mpsadbw, 311 NewFn); 312 313 // frcz.ss/sd may need to have an argument dropped. Added in 3.2 314 if (Name.startswith("xop.vfrcz.ss") && F->arg_size() == 2) { 315 rename(F); 316 NewFn = Intrinsic::getDeclaration(F->getParent(), 317 Intrinsic::x86_xop_vfrcz_ss); 318 return true; 319 } 320 if (Name.startswith("xop.vfrcz.sd") && F->arg_size() == 2) { 321 rename(F); 322 NewFn = Intrinsic::getDeclaration(F->getParent(), 323 Intrinsic::x86_xop_vfrcz_sd); 324 return true; 325 } 326 // Upgrade any XOP PERMIL2 index operand still using a float/double vector. 327 if (Name.startswith("xop.vpermil2")) { // Added in 3.9 328 auto Idx = F->getFunctionType()->getParamType(2); 329 if (Idx->isFPOrFPVectorTy()) { 330 rename(F); 331 unsigned IdxSize = Idx->getPrimitiveSizeInBits(); 332 unsigned EltSize = Idx->getScalarSizeInBits(); 333 Intrinsic::ID Permil2ID; 334 if (EltSize == 64 && IdxSize == 128) 335 Permil2ID = Intrinsic::x86_xop_vpermil2pd; 336 else if (EltSize == 32 && IdxSize == 128) 337 Permil2ID = Intrinsic::x86_xop_vpermil2ps; 338 else if (EltSize == 64 && IdxSize == 256) 339 Permil2ID = Intrinsic::x86_xop_vpermil2pd_256; 340 else 341 Permil2ID = Intrinsic::x86_xop_vpermil2ps_256; 342 NewFn = Intrinsic::getDeclaration(F->getParent(), Permil2ID); 343 return true; 344 } 345 } 346 347 return false; 348 } 349 350 static bool UpgradeIntrinsicFunction1(Function *F, Function *&NewFn) { 351 assert(F && "Illegal to upgrade a non-existent Function."); 352 353 // Quickly eliminate it, if it's not a candidate. 354 StringRef Name = F->getName(); 355 if (Name.size() <= 8 || !Name.startswith("llvm.")) 356 return false; 357 Name = Name.substr(5); // Strip off "llvm." 358 359 switch (Name[0]) { 360 default: break; 361 case 'a': { 362 if (Name.startswith("arm.rbit") || Name.startswith("aarch64.rbit")) { 363 NewFn = Intrinsic::getDeclaration(F->getParent(), Intrinsic::bitreverse, 364 F->arg_begin()->getType()); 365 return true; 366 } 367 if (Name.startswith("arm.neon.vclz")) { 368 Type* args[2] = { 369 F->arg_begin()->getType(), 370 Type::getInt1Ty(F->getContext()) 371 }; 372 // Can't use Intrinsic::getDeclaration here as it adds a ".i1" to 373 // the end of the name. Change name from llvm.arm.neon.vclz.* to 374 // llvm.ctlz.* 375 FunctionType* fType = FunctionType::get(F->getReturnType(), args, false); 376 NewFn = Function::Create(fType, F->getLinkage(), 377 "llvm.ctlz." + Name.substr(14), F->getParent()); 378 return true; 379 } 380 if (Name.startswith("arm.neon.vcnt")) { 381 NewFn = Intrinsic::getDeclaration(F->getParent(), Intrinsic::ctpop, 382 F->arg_begin()->getType()); 383 return true; 384 } 385 Regex vldRegex("^arm\\.neon\\.vld([1234]|[234]lane)\\.v[a-z0-9]*$"); 386 if (vldRegex.match(Name)) { 387 auto fArgs = F->getFunctionType()->params(); 388 SmallVector<Type *, 4> Tys(fArgs.begin(), fArgs.end()); 389 // Can't use Intrinsic::getDeclaration here as the return types might 390 // then only be structurally equal. 391 FunctionType* fType = FunctionType::get(F->getReturnType(), Tys, false); 392 NewFn = Function::Create(fType, F->getLinkage(), 393 "llvm." + Name + ".p0i8", F->getParent()); 394 return true; 395 } 396 Regex vstRegex("^arm\\.neon\\.vst([1234]|[234]lane)\\.v[a-z0-9]*$"); 397 if (vstRegex.match(Name)) { 398 static const Intrinsic::ID StoreInts[] = {Intrinsic::arm_neon_vst1, 399 Intrinsic::arm_neon_vst2, 400 Intrinsic::arm_neon_vst3, 401 Intrinsic::arm_neon_vst4}; 402 403 static const Intrinsic::ID StoreLaneInts[] = { 404 Intrinsic::arm_neon_vst2lane, Intrinsic::arm_neon_vst3lane, 405 Intrinsic::arm_neon_vst4lane 406 }; 407 408 auto fArgs = F->getFunctionType()->params(); 409 Type *Tys[] = {fArgs[0], fArgs[1]}; 410 if (Name.find("lane") == StringRef::npos) 411 NewFn = Intrinsic::getDeclaration(F->getParent(), 412 StoreInts[fArgs.size() - 3], Tys); 413 else 414 NewFn = Intrinsic::getDeclaration(F->getParent(), 415 StoreLaneInts[fArgs.size() - 5], Tys); 416 return true; 417 } 418 if (Name == "aarch64.thread.pointer" || Name == "arm.thread.pointer") { 419 NewFn = Intrinsic::getDeclaration(F->getParent(), Intrinsic::thread_pointer); 420 return true; 421 } 422 break; 423 } 424 425 case 'c': { 426 if (Name.startswith("ctlz.") && F->arg_size() == 1) { 427 rename(F); 428 NewFn = Intrinsic::getDeclaration(F->getParent(), Intrinsic::ctlz, 429 F->arg_begin()->getType()); 430 return true; 431 } 432 if (Name.startswith("cttz.") && F->arg_size() == 1) { 433 rename(F); 434 NewFn = Intrinsic::getDeclaration(F->getParent(), Intrinsic::cttz, 435 F->arg_begin()->getType()); 436 return true; 437 } 438 break; 439 } 440 case 'd': { 441 if (Name == "dbg.value" && F->arg_size() == 4) { 442 rename(F); 443 NewFn = Intrinsic::getDeclaration(F->getParent(), Intrinsic::dbg_value); 444 return true; 445 } 446 break; 447 } 448 case 'i': 449 case 'l': { 450 bool IsLifetimeStart = Name.startswith("lifetime.start"); 451 if (IsLifetimeStart || Name.startswith("invariant.start")) { 452 Intrinsic::ID ID = IsLifetimeStart ? 453 Intrinsic::lifetime_start : Intrinsic::invariant_start; 454 auto Args = F->getFunctionType()->params(); 455 Type* ObjectPtr[1] = {Args[1]}; 456 if (F->getName() != Intrinsic::getName(ID, ObjectPtr)) { 457 rename(F); 458 NewFn = Intrinsic::getDeclaration(F->getParent(), ID, ObjectPtr); 459 return true; 460 } 461 } 462 463 bool IsLifetimeEnd = Name.startswith("lifetime.end"); 464 if (IsLifetimeEnd || Name.startswith("invariant.end")) { 465 Intrinsic::ID ID = IsLifetimeEnd ? 466 Intrinsic::lifetime_end : Intrinsic::invariant_end; 467 468 auto Args = F->getFunctionType()->params(); 469 Type* ObjectPtr[1] = {Args[IsLifetimeEnd ? 1 : 2]}; 470 if (F->getName() != Intrinsic::getName(ID, ObjectPtr)) { 471 rename(F); 472 NewFn = Intrinsic::getDeclaration(F->getParent(), ID, ObjectPtr); 473 return true; 474 } 475 } 476 break; 477 } 478 case 'm': { 479 if (Name.startswith("masked.load.")) { 480 Type *Tys[] = { F->getReturnType(), F->arg_begin()->getType() }; 481 if (F->getName() != Intrinsic::getName(Intrinsic::masked_load, Tys)) { 482 rename(F); 483 NewFn = Intrinsic::getDeclaration(F->getParent(), 484 Intrinsic::masked_load, 485 Tys); 486 return true; 487 } 488 } 489 if (Name.startswith("masked.store.")) { 490 auto Args = F->getFunctionType()->params(); 491 Type *Tys[] = { Args[0], Args[1] }; 492 if (F->getName() != Intrinsic::getName(Intrinsic::masked_store, Tys)) { 493 rename(F); 494 NewFn = Intrinsic::getDeclaration(F->getParent(), 495 Intrinsic::masked_store, 496 Tys); 497 return true; 498 } 499 } 500 // Renaming gather/scatter intrinsics with no address space overloading 501 // to the new overload which includes an address space 502 if (Name.startswith("masked.gather.")) { 503 Type *Tys[] = {F->getReturnType(), F->arg_begin()->getType()}; 504 if (F->getName() != Intrinsic::getName(Intrinsic::masked_gather, Tys)) { 505 rename(F); 506 NewFn = Intrinsic::getDeclaration(F->getParent(), 507 Intrinsic::masked_gather, Tys); 508 return true; 509 } 510 } 511 if (Name.startswith("masked.scatter.")) { 512 auto Args = F->getFunctionType()->params(); 513 Type *Tys[] = {Args[0], Args[1]}; 514 if (F->getName() != Intrinsic::getName(Intrinsic::masked_scatter, Tys)) { 515 rename(F); 516 NewFn = Intrinsic::getDeclaration(F->getParent(), 517 Intrinsic::masked_scatter, Tys); 518 return true; 519 } 520 } 521 break; 522 } 523 case 'n': { 524 if (Name.startswith("nvvm.")) { 525 Name = Name.substr(5); 526 527 // The following nvvm intrinsics correspond exactly to an LLVM intrinsic. 528 Intrinsic::ID IID = StringSwitch<Intrinsic::ID>(Name) 529 .Cases("brev32", "brev64", Intrinsic::bitreverse) 530 .Case("clz.i", Intrinsic::ctlz) 531 .Case("popc.i", Intrinsic::ctpop) 532 .Default(Intrinsic::not_intrinsic); 533 if (IID != Intrinsic::not_intrinsic && F->arg_size() == 1) { 534 NewFn = Intrinsic::getDeclaration(F->getParent(), IID, 535 {F->getReturnType()}); 536 return true; 537 } 538 539 // The following nvvm intrinsics correspond exactly to an LLVM idiom, but 540 // not to an intrinsic alone. We expand them in UpgradeIntrinsicCall. 541 // 542 // TODO: We could add lohi.i2d. 543 bool Expand = StringSwitch<bool>(Name) 544 .Cases("abs.i", "abs.ll", true) 545 .Cases("clz.ll", "popc.ll", "h2f", true) 546 .Cases("max.i", "max.ll", "max.ui", "max.ull", true) 547 .Cases("min.i", "min.ll", "min.ui", "min.ull", true) 548 .Default(false); 549 if (Expand) { 550 NewFn = nullptr; 551 return true; 552 } 553 } 554 break; 555 } 556 case 'o': 557 // We only need to change the name to match the mangling including the 558 // address space. 559 if (Name.startswith("objectsize.")) { 560 Type *Tys[2] = { F->getReturnType(), F->arg_begin()->getType() }; 561 if (F->arg_size() == 2 || 562 F->getName() != Intrinsic::getName(Intrinsic::objectsize, Tys)) { 563 rename(F); 564 NewFn = Intrinsic::getDeclaration(F->getParent(), Intrinsic::objectsize, 565 Tys); 566 return true; 567 } 568 } 569 break; 570 571 case 's': 572 if (Name == "stackprotectorcheck") { 573 NewFn = nullptr; 574 return true; 575 } 576 break; 577 578 case 'x': 579 if (UpgradeX86IntrinsicFunction(F, Name, NewFn)) 580 return true; 581 } 582 // Remangle our intrinsic since we upgrade the mangling 583 auto Result = llvm::Intrinsic::remangleIntrinsicFunction(F); 584 if (Result != None) { 585 NewFn = Result.getValue(); 586 return true; 587 } 588 589 // This may not belong here. This function is effectively being overloaded 590 // to both detect an intrinsic which needs upgrading, and to provide the 591 // upgraded form of the intrinsic. We should perhaps have two separate 592 // functions for this. 593 return false; 594 } 595 596 bool llvm::UpgradeIntrinsicFunction(Function *F, Function *&NewFn) { 597 NewFn = nullptr; 598 bool Upgraded = UpgradeIntrinsicFunction1(F, NewFn); 599 assert(F != NewFn && "Intrinsic function upgraded to the same function"); 600 601 // Upgrade intrinsic attributes. This does not change the function. 602 if (NewFn) 603 F = NewFn; 604 if (Intrinsic::ID id = F->getIntrinsicID()) 605 F->setAttributes(Intrinsic::getAttributes(F->getContext(), id)); 606 return Upgraded; 607 } 608 609 bool llvm::UpgradeGlobalVariable(GlobalVariable *GV) { 610 // Nothing to do yet. 611 return false; 612 } 613 614 // Handles upgrading SSE2/AVX2/AVX512BW PSLLDQ intrinsics by converting them 615 // to byte shuffles. 616 static Value *UpgradeX86PSLLDQIntrinsics(IRBuilder<> &Builder, 617 Value *Op, unsigned Shift) { 618 Type *ResultTy = Op->getType(); 619 unsigned NumElts = ResultTy->getVectorNumElements() * 8; 620 621 // Bitcast from a 64-bit element type to a byte element type. 622 Type *VecTy = VectorType::get(Builder.getInt8Ty(), NumElts); 623 Op = Builder.CreateBitCast(Op, VecTy, "cast"); 624 625 // We'll be shuffling in zeroes. 626 Value *Res = Constant::getNullValue(VecTy); 627 628 // If shift is less than 16, emit a shuffle to move the bytes. Otherwise, 629 // we'll just return the zero vector. 630 if (Shift < 16) { 631 uint32_t Idxs[64]; 632 // 256/512-bit version is split into 2/4 16-byte lanes. 633 for (unsigned l = 0; l != NumElts; l += 16) 634 for (unsigned i = 0; i != 16; ++i) { 635 unsigned Idx = NumElts + i - Shift; 636 if (Idx < NumElts) 637 Idx -= NumElts - 16; // end of lane, switch operand. 638 Idxs[l + i] = Idx + l; 639 } 640 641 Res = Builder.CreateShuffleVector(Res, Op, makeArrayRef(Idxs, NumElts)); 642 } 643 644 // Bitcast back to a 64-bit element type. 645 return Builder.CreateBitCast(Res, ResultTy, "cast"); 646 } 647 648 // Handles upgrading SSE2/AVX2/AVX512BW PSRLDQ intrinsics by converting them 649 // to byte shuffles. 650 static Value *UpgradeX86PSRLDQIntrinsics(IRBuilder<> &Builder, Value *Op, 651 unsigned Shift) { 652 Type *ResultTy = Op->getType(); 653 unsigned NumElts = ResultTy->getVectorNumElements() * 8; 654 655 // Bitcast from a 64-bit element type to a byte element type. 656 Type *VecTy = VectorType::get(Builder.getInt8Ty(), NumElts); 657 Op = Builder.CreateBitCast(Op, VecTy, "cast"); 658 659 // We'll be shuffling in zeroes. 660 Value *Res = Constant::getNullValue(VecTy); 661 662 // If shift is less than 16, emit a shuffle to move the bytes. Otherwise, 663 // we'll just return the zero vector. 664 if (Shift < 16) { 665 uint32_t Idxs[64]; 666 // 256/512-bit version is split into 2/4 16-byte lanes. 667 for (unsigned l = 0; l != NumElts; l += 16) 668 for (unsigned i = 0; i != 16; ++i) { 669 unsigned Idx = i + Shift; 670 if (Idx >= 16) 671 Idx += NumElts - 16; // end of lane, switch operand. 672 Idxs[l + i] = Idx + l; 673 } 674 675 Res = Builder.CreateShuffleVector(Op, Res, makeArrayRef(Idxs, NumElts)); 676 } 677 678 // Bitcast back to a 64-bit element type. 679 return Builder.CreateBitCast(Res, ResultTy, "cast"); 680 } 681 682 static Value *getX86MaskVec(IRBuilder<> &Builder, Value *Mask, 683 unsigned NumElts) { 684 llvm::VectorType *MaskTy = llvm::VectorType::get(Builder.getInt1Ty(), 685 cast<IntegerType>(Mask->getType())->getBitWidth()); 686 Mask = Builder.CreateBitCast(Mask, MaskTy); 687 688 // If we have less than 8 elements, then the starting mask was an i8 and 689 // we need to extract down to the right number of elements. 690 if (NumElts < 8) { 691 uint32_t Indices[4]; 692 for (unsigned i = 0; i != NumElts; ++i) 693 Indices[i] = i; 694 Mask = Builder.CreateShuffleVector(Mask, Mask, 695 makeArrayRef(Indices, NumElts), 696 "extract"); 697 } 698 699 return Mask; 700 } 701 702 static Value *EmitX86Select(IRBuilder<> &Builder, Value *Mask, 703 Value *Op0, Value *Op1) { 704 // If the mask is all ones just emit the align operation. 705 if (const auto *C = dyn_cast<Constant>(Mask)) 706 if (C->isAllOnesValue()) 707 return Op0; 708 709 Mask = getX86MaskVec(Builder, Mask, Op0->getType()->getVectorNumElements()); 710 return Builder.CreateSelect(Mask, Op0, Op1); 711 } 712 713 // Handle autoupgrade for masked PALIGNR and VALIGND/Q intrinsics. 714 // PALIGNR handles large immediates by shifting while VALIGN masks the immediate 715 // so we need to handle both cases. VALIGN also doesn't have 128-bit lanes. 716 static Value *UpgradeX86ALIGNIntrinsics(IRBuilder<> &Builder, Value *Op0, 717 Value *Op1, Value *Shift, 718 Value *Passthru, Value *Mask, 719 bool IsVALIGN) { 720 unsigned ShiftVal = cast<llvm::ConstantInt>(Shift)->getZExtValue(); 721 722 unsigned NumElts = Op0->getType()->getVectorNumElements(); 723 assert((IsVALIGN || NumElts % 16 == 0) && "Illegal NumElts for PALIGNR!"); 724 assert((!IsVALIGN || NumElts <= 16) && "NumElts too large for VALIGN!"); 725 assert(isPowerOf2_32(NumElts) && "NumElts not a power of 2!"); 726 727 // Mask the immediate for VALIGN. 728 if (IsVALIGN) 729 ShiftVal &= (NumElts - 1); 730 731 // If palignr is shifting the pair of vectors more than the size of two 732 // lanes, emit zero. 733 if (ShiftVal >= 32) 734 return llvm::Constant::getNullValue(Op0->getType()); 735 736 // If palignr is shifting the pair of input vectors more than one lane, 737 // but less than two lanes, convert to shifting in zeroes. 738 if (ShiftVal > 16) { 739 ShiftVal -= 16; 740 Op1 = Op0; 741 Op0 = llvm::Constant::getNullValue(Op0->getType()); 742 } 743 744 uint32_t Indices[64]; 745 // 256-bit palignr operates on 128-bit lanes so we need to handle that 746 for (unsigned l = 0; l < NumElts; l += 16) { 747 for (unsigned i = 0; i != 16; ++i) { 748 unsigned Idx = ShiftVal + i; 749 if (!IsVALIGN && Idx >= 16) // Disable wrap for VALIGN. 750 Idx += NumElts - 16; // End of lane, switch operand. 751 Indices[l + i] = Idx + l; 752 } 753 } 754 755 Value *Align = Builder.CreateShuffleVector(Op1, Op0, 756 makeArrayRef(Indices, NumElts), 757 "palignr"); 758 759 return EmitX86Select(Builder, Mask, Align, Passthru); 760 } 761 762 static Value *UpgradeMaskedStore(IRBuilder<> &Builder, 763 Value *Ptr, Value *Data, Value *Mask, 764 bool Aligned) { 765 // Cast the pointer to the right type. 766 Ptr = Builder.CreateBitCast(Ptr, 767 llvm::PointerType::getUnqual(Data->getType())); 768 unsigned Align = 769 Aligned ? cast<VectorType>(Data->getType())->getBitWidth() / 8 : 1; 770 771 // If the mask is all ones just emit a regular store. 772 if (const auto *C = dyn_cast<Constant>(Mask)) 773 if (C->isAllOnesValue()) 774 return Builder.CreateAlignedStore(Data, Ptr, Align); 775 776 // Convert the mask from an integer type to a vector of i1. 777 unsigned NumElts = Data->getType()->getVectorNumElements(); 778 Mask = getX86MaskVec(Builder, Mask, NumElts); 779 return Builder.CreateMaskedStore(Data, Ptr, Align, Mask); 780 } 781 782 static Value *UpgradeMaskedLoad(IRBuilder<> &Builder, 783 Value *Ptr, Value *Passthru, Value *Mask, 784 bool Aligned) { 785 // Cast the pointer to the right type. 786 Ptr = Builder.CreateBitCast(Ptr, 787 llvm::PointerType::getUnqual(Passthru->getType())); 788 unsigned Align = 789 Aligned ? cast<VectorType>(Passthru->getType())->getBitWidth() / 8 : 1; 790 791 // If the mask is all ones just emit a regular store. 792 if (const auto *C = dyn_cast<Constant>(Mask)) 793 if (C->isAllOnesValue()) 794 return Builder.CreateAlignedLoad(Ptr, Align); 795 796 // Convert the mask from an integer type to a vector of i1. 797 unsigned NumElts = Passthru->getType()->getVectorNumElements(); 798 Mask = getX86MaskVec(Builder, Mask, NumElts); 799 return Builder.CreateMaskedLoad(Ptr, Align, Mask, Passthru); 800 } 801 802 static Value *upgradeAbs(IRBuilder<> &Builder, CallInst &CI) { 803 Value *Op0 = CI.getArgOperand(0); 804 llvm::Type *Ty = Op0->getType(); 805 Value *Zero = llvm::Constant::getNullValue(Ty); 806 Value *Cmp = Builder.CreateICmp(ICmpInst::ICMP_SGT, Op0, Zero); 807 Value *Neg = Builder.CreateNeg(Op0); 808 Value *Res = Builder.CreateSelect(Cmp, Op0, Neg); 809 810 if (CI.getNumArgOperands() == 3) 811 Res = EmitX86Select(Builder,CI.getArgOperand(2), Res, CI.getArgOperand(1)); 812 813 return Res; 814 } 815 816 static Value *upgradeIntMinMax(IRBuilder<> &Builder, CallInst &CI, 817 ICmpInst::Predicate Pred) { 818 Value *Op0 = CI.getArgOperand(0); 819 Value *Op1 = CI.getArgOperand(1); 820 Value *Cmp = Builder.CreateICmp(Pred, Op0, Op1); 821 Value *Res = Builder.CreateSelect(Cmp, Op0, Op1); 822 823 if (CI.getNumArgOperands() == 4) 824 Res = EmitX86Select(Builder, CI.getArgOperand(3), Res, CI.getArgOperand(2)); 825 826 return Res; 827 } 828 829 // Applying mask on vector of i1's and make sure result is at least 8 bits wide. 830 static Value *ApplyX86MaskOn1BitsVec(IRBuilder<> &Builder,Value *Vec, Value *Mask, 831 unsigned NumElts) { 832 const auto *C = dyn_cast<Constant>(Mask); 833 if (!C || !C->isAllOnesValue()) 834 Vec = Builder.CreateAnd(Vec, getX86MaskVec(Builder, Mask, NumElts)); 835 836 if (NumElts < 8) { 837 uint32_t Indices[8]; 838 for (unsigned i = 0; i != NumElts; ++i) 839 Indices[i] = i; 840 for (unsigned i = NumElts; i != 8; ++i) 841 Indices[i] = NumElts + i % NumElts; 842 Vec = Builder.CreateShuffleVector(Vec, 843 Constant::getNullValue(Vec->getType()), 844 Indices); 845 } 846 return Builder.CreateBitCast(Vec, Builder.getIntNTy(std::max(NumElts, 8U))); 847 } 848 849 static Value *upgradeMaskedCompare(IRBuilder<> &Builder, CallInst &CI, 850 unsigned CC, bool Signed) { 851 Value *Op0 = CI.getArgOperand(0); 852 unsigned NumElts = Op0->getType()->getVectorNumElements(); 853 854 Value *Cmp; 855 if (CC == 3) { 856 Cmp = Constant::getNullValue(llvm::VectorType::get(Builder.getInt1Ty(), NumElts)); 857 } else if (CC == 7) { 858 Cmp = Constant::getAllOnesValue(llvm::VectorType::get(Builder.getInt1Ty(), NumElts)); 859 } else { 860 ICmpInst::Predicate Pred; 861 switch (CC) { 862 default: llvm_unreachable("Unknown condition code"); 863 case 0: Pred = ICmpInst::ICMP_EQ; break; 864 case 1: Pred = Signed ? ICmpInst::ICMP_SLT : ICmpInst::ICMP_ULT; break; 865 case 2: Pred = Signed ? ICmpInst::ICMP_SLE : ICmpInst::ICMP_ULE; break; 866 case 4: Pred = ICmpInst::ICMP_NE; break; 867 case 5: Pred = Signed ? ICmpInst::ICMP_SGE : ICmpInst::ICMP_UGE; break; 868 case 6: Pred = Signed ? ICmpInst::ICMP_SGT : ICmpInst::ICMP_UGT; break; 869 } 870 Cmp = Builder.CreateICmp(Pred, Op0, CI.getArgOperand(1)); 871 } 872 873 Value *Mask = CI.getArgOperand(CI.getNumArgOperands() - 1); 874 875 return ApplyX86MaskOn1BitsVec(Builder, Cmp, Mask, NumElts); 876 } 877 878 // Replace a masked intrinsic with an older unmasked intrinsic. 879 static Value *UpgradeX86MaskedShift(IRBuilder<> &Builder, CallInst &CI, 880 Intrinsic::ID IID) { 881 Function *F = CI.getCalledFunction(); 882 Function *Intrin = Intrinsic::getDeclaration(F->getParent(), IID); 883 Value *Rep = Builder.CreateCall(Intrin, 884 { CI.getArgOperand(0), CI.getArgOperand(1) }); 885 return EmitX86Select(Builder, CI.getArgOperand(3), Rep, CI.getArgOperand(2)); 886 } 887 888 static Value* upgradeMaskedMove(IRBuilder<> &Builder, CallInst &CI) { 889 Value* A = CI.getArgOperand(0); 890 Value* B = CI.getArgOperand(1); 891 Value* Src = CI.getArgOperand(2); 892 Value* Mask = CI.getArgOperand(3); 893 894 Value* AndNode = Builder.CreateAnd(Mask, APInt(8, 1)); 895 Value* Cmp = Builder.CreateIsNotNull(AndNode); 896 Value* Extract1 = Builder.CreateExtractElement(B, (uint64_t)0); 897 Value* Extract2 = Builder.CreateExtractElement(Src, (uint64_t)0); 898 Value* Select = Builder.CreateSelect(Cmp, Extract1, Extract2); 899 return Builder.CreateInsertElement(A, Select, (uint64_t)0); 900 } 901 902 903 static Value* UpgradeMaskToInt(IRBuilder<> &Builder, CallInst &CI) { 904 Value* Op = CI.getArgOperand(0); 905 Type* ReturnOp = CI.getType(); 906 unsigned NumElts = CI.getType()->getVectorNumElements(); 907 Value *Mask = getX86MaskVec(Builder, Op, NumElts); 908 return Builder.CreateSExt(Mask, ReturnOp, "vpmovm2"); 909 } 910 911 /// Upgrade a call to an old intrinsic. All argument and return casting must be 912 /// provided to seamlessly integrate with existing context. 913 void llvm::UpgradeIntrinsicCall(CallInst *CI, Function *NewFn) { 914 Function *F = CI->getCalledFunction(); 915 LLVMContext &C = CI->getContext(); 916 IRBuilder<> Builder(C); 917 Builder.SetInsertPoint(CI->getParent(), CI->getIterator()); 918 919 assert(F && "Intrinsic call is not direct?"); 920 921 if (!NewFn) { 922 // Get the Function's name. 923 StringRef Name = F->getName(); 924 925 assert(Name.startswith("llvm.") && "Intrinsic doesn't start with 'llvm.'"); 926 Name = Name.substr(5); 927 928 bool IsX86 = Name.startswith("x86."); 929 if (IsX86) 930 Name = Name.substr(4); 931 bool IsNVVM = Name.startswith("nvvm."); 932 if (IsNVVM) 933 Name = Name.substr(5); 934 935 if (IsX86 && Name.startswith("sse4a.movnt.")) { 936 Module *M = F->getParent(); 937 SmallVector<Metadata *, 1> Elts; 938 Elts.push_back( 939 ConstantAsMetadata::get(ConstantInt::get(Type::getInt32Ty(C), 1))); 940 MDNode *Node = MDNode::get(C, Elts); 941 942 Value *Arg0 = CI->getArgOperand(0); 943 Value *Arg1 = CI->getArgOperand(1); 944 945 // Nontemporal (unaligned) store of the 0'th element of the float/double 946 // vector. 947 Type *SrcEltTy = cast<VectorType>(Arg1->getType())->getElementType(); 948 PointerType *EltPtrTy = PointerType::getUnqual(SrcEltTy); 949 Value *Addr = Builder.CreateBitCast(Arg0, EltPtrTy, "cast"); 950 Value *Extract = 951 Builder.CreateExtractElement(Arg1, (uint64_t)0, "extractelement"); 952 953 StoreInst *SI = Builder.CreateAlignedStore(Extract, Addr, 1); 954 SI->setMetadata(M->getMDKindID("nontemporal"), Node); 955 956 // Remove intrinsic. 957 CI->eraseFromParent(); 958 return; 959 } 960 961 if (IsX86 && (Name.startswith("avx.movnt.") || 962 Name.startswith("avx512.storent."))) { 963 Module *M = F->getParent(); 964 SmallVector<Metadata *, 1> Elts; 965 Elts.push_back( 966 ConstantAsMetadata::get(ConstantInt::get(Type::getInt32Ty(C), 1))); 967 MDNode *Node = MDNode::get(C, Elts); 968 969 Value *Arg0 = CI->getArgOperand(0); 970 Value *Arg1 = CI->getArgOperand(1); 971 972 // Convert the type of the pointer to a pointer to the stored type. 973 Value *BC = Builder.CreateBitCast(Arg0, 974 PointerType::getUnqual(Arg1->getType()), 975 "cast"); 976 VectorType *VTy = cast<VectorType>(Arg1->getType()); 977 StoreInst *SI = Builder.CreateAlignedStore(Arg1, BC, 978 VTy->getBitWidth() / 8); 979 SI->setMetadata(M->getMDKindID("nontemporal"), Node); 980 981 // Remove intrinsic. 982 CI->eraseFromParent(); 983 return; 984 } 985 986 if (IsX86 && Name == "sse2.storel.dq") { 987 Value *Arg0 = CI->getArgOperand(0); 988 Value *Arg1 = CI->getArgOperand(1); 989 990 Type *NewVecTy = VectorType::get(Type::getInt64Ty(C), 2); 991 Value *BC0 = Builder.CreateBitCast(Arg1, NewVecTy, "cast"); 992 Value *Elt = Builder.CreateExtractElement(BC0, (uint64_t)0); 993 Value *BC = Builder.CreateBitCast(Arg0, 994 PointerType::getUnqual(Elt->getType()), 995 "cast"); 996 Builder.CreateAlignedStore(Elt, BC, 1); 997 998 // Remove intrinsic. 999 CI->eraseFromParent(); 1000 return; 1001 } 1002 1003 if (IsX86 && (Name.startswith("sse.storeu.") || 1004 Name.startswith("sse2.storeu.") || 1005 Name.startswith("avx.storeu."))) { 1006 Value *Arg0 = CI->getArgOperand(0); 1007 Value *Arg1 = CI->getArgOperand(1); 1008 1009 Arg0 = Builder.CreateBitCast(Arg0, 1010 PointerType::getUnqual(Arg1->getType()), 1011 "cast"); 1012 Builder.CreateAlignedStore(Arg1, Arg0, 1); 1013 1014 // Remove intrinsic. 1015 CI->eraseFromParent(); 1016 return; 1017 } 1018 1019 if (IsX86 && (Name.startswith("avx512.mask.store"))) { 1020 // "avx512.mask.storeu." or "avx512.mask.store." 1021 bool Aligned = Name[17] != 'u'; // "avx512.mask.storeu". 1022 UpgradeMaskedStore(Builder, CI->getArgOperand(0), CI->getArgOperand(1), 1023 CI->getArgOperand(2), Aligned); 1024 1025 // Remove intrinsic. 1026 CI->eraseFromParent(); 1027 return; 1028 } 1029 1030 Value *Rep; 1031 // Upgrade packed integer vector compare intrinsics to compare instructions. 1032 if (IsX86 && (Name.startswith("sse2.pcmp") || 1033 Name.startswith("avx2.pcmp"))) { 1034 // "sse2.pcpmpeq." "sse2.pcmpgt." "avx2.pcmpeq." or "avx2.pcmpgt." 1035 bool CmpEq = Name[9] == 'e'; 1036 Rep = Builder.CreateICmp(CmpEq ? ICmpInst::ICMP_EQ : ICmpInst::ICMP_SGT, 1037 CI->getArgOperand(0), CI->getArgOperand(1)); 1038 Rep = Builder.CreateSExt(Rep, CI->getType(), ""); 1039 } else if (IsX86 && (Name.startswith("avx512.broadcastm"))) { 1040 Type *ExtTy = Type::getInt32Ty(C); 1041 if (CI->getOperand(0)->getType()->isIntegerTy(8)) 1042 ExtTy = Type::getInt64Ty(C); 1043 unsigned NumElts = CI->getType()->getPrimitiveSizeInBits() / 1044 ExtTy->getPrimitiveSizeInBits(); 1045 Rep = Builder.CreateZExt(CI->getArgOperand(0), ExtTy); 1046 Rep = Builder.CreateVectorSplat(NumElts, Rep); 1047 } else if (IsX86 && (Name.startswith("avx512.ptestm") || 1048 Name.startswith("avx512.ptestnm"))) { 1049 Value *Op0 = CI->getArgOperand(0); 1050 Value *Op1 = CI->getArgOperand(1); 1051 Value *Mask = CI->getArgOperand(2); 1052 Rep = Builder.CreateAnd(Op0, Op1); 1053 llvm::Type *Ty = Op0->getType(); 1054 Value *Zero = llvm::Constant::getNullValue(Ty); 1055 ICmpInst::Predicate Pred = 1056 Name.startswith("avx512.ptestm") ? ICmpInst::ICMP_NE : ICmpInst::ICMP_EQ; 1057 Rep = Builder.CreateICmp(Pred, Rep, Zero); 1058 unsigned NumElts = Op0->getType()->getVectorNumElements(); 1059 Rep = ApplyX86MaskOn1BitsVec(Builder, Rep, Mask, NumElts); 1060 } else if (IsX86 && (Name.startswith("avx512.mask.pbroadcast"))){ 1061 unsigned NumElts = 1062 CI->getArgOperand(1)->getType()->getVectorNumElements(); 1063 Rep = Builder.CreateVectorSplat(NumElts, CI->getArgOperand(0)); 1064 Rep = EmitX86Select(Builder, CI->getArgOperand(2), Rep, 1065 CI->getArgOperand(1)); 1066 } else if (IsX86 && (Name.startswith("avx512.kunpck"))) { 1067 uint64_t Shift = CI->getType()->getScalarSizeInBits() / 2; 1068 uint64_t And = (1ULL << Shift) - 1; 1069 Value* LowBits = Builder.CreateAnd(CI->getArgOperand(0), And); 1070 Value* HighBits = Builder.CreateShl(CI->getArgOperand(1), Shift); 1071 Rep = Builder.CreateOr(LowBits, HighBits); 1072 } else if (IsX86 && (Name == "sse.add.ss" || Name == "sse2.add.sd")) { 1073 Type *I32Ty = Type::getInt32Ty(C); 1074 Value *Elt0 = Builder.CreateExtractElement(CI->getArgOperand(0), 1075 ConstantInt::get(I32Ty, 0)); 1076 Value *Elt1 = Builder.CreateExtractElement(CI->getArgOperand(1), 1077 ConstantInt::get(I32Ty, 0)); 1078 Rep = Builder.CreateInsertElement(CI->getArgOperand(0), 1079 Builder.CreateFAdd(Elt0, Elt1), 1080 ConstantInt::get(I32Ty, 0)); 1081 } else if (IsX86 && (Name == "sse.sub.ss" || Name == "sse2.sub.sd")) { 1082 Type *I32Ty = Type::getInt32Ty(C); 1083 Value *Elt0 = Builder.CreateExtractElement(CI->getArgOperand(0), 1084 ConstantInt::get(I32Ty, 0)); 1085 Value *Elt1 = Builder.CreateExtractElement(CI->getArgOperand(1), 1086 ConstantInt::get(I32Ty, 0)); 1087 Rep = Builder.CreateInsertElement(CI->getArgOperand(0), 1088 Builder.CreateFSub(Elt0, Elt1), 1089 ConstantInt::get(I32Ty, 0)); 1090 } else if (IsX86 && (Name == "sse.mul.ss" || Name == "sse2.mul.sd")) { 1091 Type *I32Ty = Type::getInt32Ty(C); 1092 Value *Elt0 = Builder.CreateExtractElement(CI->getArgOperand(0), 1093 ConstantInt::get(I32Ty, 0)); 1094 Value *Elt1 = Builder.CreateExtractElement(CI->getArgOperand(1), 1095 ConstantInt::get(I32Ty, 0)); 1096 Rep = Builder.CreateInsertElement(CI->getArgOperand(0), 1097 Builder.CreateFMul(Elt0, Elt1), 1098 ConstantInt::get(I32Ty, 0)); 1099 } else if (IsX86 && (Name == "sse.div.ss" || Name == "sse2.div.sd")) { 1100 Type *I32Ty = Type::getInt32Ty(C); 1101 Value *Elt0 = Builder.CreateExtractElement(CI->getArgOperand(0), 1102 ConstantInt::get(I32Ty, 0)); 1103 Value *Elt1 = Builder.CreateExtractElement(CI->getArgOperand(1), 1104 ConstantInt::get(I32Ty, 0)); 1105 Rep = Builder.CreateInsertElement(CI->getArgOperand(0), 1106 Builder.CreateFDiv(Elt0, Elt1), 1107 ConstantInt::get(I32Ty, 0)); 1108 } else if (IsX86 && Name.startswith("avx512.mask.pcmp")) { 1109 // "avx512.mask.pcmpeq." or "avx512.mask.pcmpgt." 1110 bool CmpEq = Name[16] == 'e'; 1111 Rep = upgradeMaskedCompare(Builder, *CI, CmpEq ? 0 : 6, true); 1112 } else if (IsX86 && Name.startswith("avx512.mask.cmp")) { 1113 unsigned Imm = cast<ConstantInt>(CI->getArgOperand(2))->getZExtValue(); 1114 Rep = upgradeMaskedCompare(Builder, *CI, Imm, true); 1115 } else if (IsX86 && Name.startswith("avx512.mask.ucmp")) { 1116 unsigned Imm = cast<ConstantInt>(CI->getArgOperand(2))->getZExtValue(); 1117 Rep = upgradeMaskedCompare(Builder, *CI, Imm, false); 1118 } else if(IsX86 && (Name == "ssse3.pabs.b.128" || 1119 Name == "ssse3.pabs.w.128" || 1120 Name == "ssse3.pabs.d.128" || 1121 Name.startswith("avx2.pabs") || 1122 Name.startswith("avx512.mask.pabs"))) { 1123 Rep = upgradeAbs(Builder, *CI); 1124 } else if (IsX86 && (Name == "sse41.pmaxsb" || 1125 Name == "sse2.pmaxs.w" || 1126 Name == "sse41.pmaxsd" || 1127 Name.startswith("avx2.pmaxs") || 1128 Name.startswith("avx512.mask.pmaxs"))) { 1129 Rep = upgradeIntMinMax(Builder, *CI, ICmpInst::ICMP_SGT); 1130 } else if (IsX86 && (Name == "sse2.pmaxu.b" || 1131 Name == "sse41.pmaxuw" || 1132 Name == "sse41.pmaxud" || 1133 Name.startswith("avx2.pmaxu") || 1134 Name.startswith("avx512.mask.pmaxu"))) { 1135 Rep = upgradeIntMinMax(Builder, *CI, ICmpInst::ICMP_UGT); 1136 } else if (IsX86 && (Name == "sse41.pminsb" || 1137 Name == "sse2.pmins.w" || 1138 Name == "sse41.pminsd" || 1139 Name.startswith("avx2.pmins") || 1140 Name.startswith("avx512.mask.pmins"))) { 1141 Rep = upgradeIntMinMax(Builder, *CI, ICmpInst::ICMP_SLT); 1142 } else if (IsX86 && (Name == "sse2.pminu.b" || 1143 Name == "sse41.pminuw" || 1144 Name == "sse41.pminud" || 1145 Name.startswith("avx2.pminu") || 1146 Name.startswith("avx512.mask.pminu"))) { 1147 Rep = upgradeIntMinMax(Builder, *CI, ICmpInst::ICMP_ULT); 1148 } else if (IsX86 && (Name == "sse2.cvtdq2pd" || 1149 Name == "sse2.cvtps2pd" || 1150 Name == "avx.cvtdq2.pd.256" || 1151 Name == "avx.cvt.ps2.pd.256" || 1152 Name.startswith("avx512.mask.cvtdq2pd.") || 1153 Name.startswith("avx512.mask.cvtudq2pd."))) { 1154 // Lossless i32/float to double conversion. 1155 // Extract the bottom elements if necessary and convert to double vector. 1156 Value *Src = CI->getArgOperand(0); 1157 VectorType *SrcTy = cast<VectorType>(Src->getType()); 1158 VectorType *DstTy = cast<VectorType>(CI->getType()); 1159 Rep = CI->getArgOperand(0); 1160 1161 unsigned NumDstElts = DstTy->getNumElements(); 1162 if (NumDstElts < SrcTy->getNumElements()) { 1163 assert(NumDstElts == 2 && "Unexpected vector size"); 1164 uint32_t ShuffleMask[2] = { 0, 1 }; 1165 Rep = Builder.CreateShuffleVector(Rep, UndefValue::get(SrcTy), 1166 ShuffleMask); 1167 } 1168 1169 bool SInt2Double = (StringRef::npos != Name.find("cvtdq2")); 1170 bool UInt2Double = (StringRef::npos != Name.find("cvtudq2")); 1171 if (SInt2Double) 1172 Rep = Builder.CreateSIToFP(Rep, DstTy, "cvtdq2pd"); 1173 else if (UInt2Double) 1174 Rep = Builder.CreateUIToFP(Rep, DstTy, "cvtudq2pd"); 1175 else 1176 Rep = Builder.CreateFPExt(Rep, DstTy, "cvtps2pd"); 1177 1178 if (CI->getNumArgOperands() == 3) 1179 Rep = EmitX86Select(Builder, CI->getArgOperand(2), Rep, 1180 CI->getArgOperand(1)); 1181 } else if (IsX86 && (Name.startswith("avx512.mask.loadu."))) { 1182 Rep = UpgradeMaskedLoad(Builder, CI->getArgOperand(0), 1183 CI->getArgOperand(1), CI->getArgOperand(2), 1184 /*Aligned*/false); 1185 } else if (IsX86 && (Name.startswith("avx512.mask.load."))) { 1186 Rep = UpgradeMaskedLoad(Builder, CI->getArgOperand(0), 1187 CI->getArgOperand(1),CI->getArgOperand(2), 1188 /*Aligned*/true); 1189 } else if (IsX86 && Name.startswith("xop.vpcom")) { 1190 Intrinsic::ID intID; 1191 if (Name.endswith("ub")) 1192 intID = Intrinsic::x86_xop_vpcomub; 1193 else if (Name.endswith("uw")) 1194 intID = Intrinsic::x86_xop_vpcomuw; 1195 else if (Name.endswith("ud")) 1196 intID = Intrinsic::x86_xop_vpcomud; 1197 else if (Name.endswith("uq")) 1198 intID = Intrinsic::x86_xop_vpcomuq; 1199 else if (Name.endswith("b")) 1200 intID = Intrinsic::x86_xop_vpcomb; 1201 else if (Name.endswith("w")) 1202 intID = Intrinsic::x86_xop_vpcomw; 1203 else if (Name.endswith("d")) 1204 intID = Intrinsic::x86_xop_vpcomd; 1205 else if (Name.endswith("q")) 1206 intID = Intrinsic::x86_xop_vpcomq; 1207 else 1208 llvm_unreachable("Unknown suffix"); 1209 1210 Name = Name.substr(9); // strip off "xop.vpcom" 1211 unsigned Imm; 1212 if (Name.startswith("lt")) 1213 Imm = 0; 1214 else if (Name.startswith("le")) 1215 Imm = 1; 1216 else if (Name.startswith("gt")) 1217 Imm = 2; 1218 else if (Name.startswith("ge")) 1219 Imm = 3; 1220 else if (Name.startswith("eq")) 1221 Imm = 4; 1222 else if (Name.startswith("ne")) 1223 Imm = 5; 1224 else if (Name.startswith("false")) 1225 Imm = 6; 1226 else if (Name.startswith("true")) 1227 Imm = 7; 1228 else 1229 llvm_unreachable("Unknown condition"); 1230 1231 Function *VPCOM = Intrinsic::getDeclaration(F->getParent(), intID); 1232 Rep = 1233 Builder.CreateCall(VPCOM, {CI->getArgOperand(0), CI->getArgOperand(1), 1234 Builder.getInt8(Imm)}); 1235 } else if (IsX86 && Name.startswith("xop.vpcmov")) { 1236 Value *Sel = CI->getArgOperand(2); 1237 Value *NotSel = Builder.CreateNot(Sel); 1238 Value *Sel0 = Builder.CreateAnd(CI->getArgOperand(0), Sel); 1239 Value *Sel1 = Builder.CreateAnd(CI->getArgOperand(1), NotSel); 1240 Rep = Builder.CreateOr(Sel0, Sel1); 1241 } else if (IsX86 && Name == "sse42.crc32.64.8") { 1242 Function *CRC32 = Intrinsic::getDeclaration(F->getParent(), 1243 Intrinsic::x86_sse42_crc32_32_8); 1244 Value *Trunc0 = Builder.CreateTrunc(CI->getArgOperand(0), Type::getInt32Ty(C)); 1245 Rep = Builder.CreateCall(CRC32, {Trunc0, CI->getArgOperand(1)}); 1246 Rep = Builder.CreateZExt(Rep, CI->getType(), ""); 1247 } else if (IsX86 && Name.startswith("avx.vbroadcast.s")) { 1248 // Replace broadcasts with a series of insertelements. 1249 Type *VecTy = CI->getType(); 1250 Type *EltTy = VecTy->getVectorElementType(); 1251 unsigned EltNum = VecTy->getVectorNumElements(); 1252 Value *Cast = Builder.CreateBitCast(CI->getArgOperand(0), 1253 EltTy->getPointerTo()); 1254 Value *Load = Builder.CreateLoad(EltTy, Cast); 1255 Type *I32Ty = Type::getInt32Ty(C); 1256 Rep = UndefValue::get(VecTy); 1257 for (unsigned I = 0; I < EltNum; ++I) 1258 Rep = Builder.CreateInsertElement(Rep, Load, 1259 ConstantInt::get(I32Ty, I)); 1260 } else if (IsX86 && (Name.startswith("sse41.pmovsx") || 1261 Name.startswith("sse41.pmovzx") || 1262 Name.startswith("avx2.pmovsx") || 1263 Name.startswith("avx2.pmovzx") || 1264 Name.startswith("avx512.mask.pmovsx") || 1265 Name.startswith("avx512.mask.pmovzx"))) { 1266 VectorType *SrcTy = cast<VectorType>(CI->getArgOperand(0)->getType()); 1267 VectorType *DstTy = cast<VectorType>(CI->getType()); 1268 unsigned NumDstElts = DstTy->getNumElements(); 1269 1270 // Extract a subvector of the first NumDstElts lanes and sign/zero extend. 1271 SmallVector<uint32_t, 8> ShuffleMask(NumDstElts); 1272 for (unsigned i = 0; i != NumDstElts; ++i) 1273 ShuffleMask[i] = i; 1274 1275 Value *SV = Builder.CreateShuffleVector( 1276 CI->getArgOperand(0), UndefValue::get(SrcTy), ShuffleMask); 1277 1278 bool DoSext = (StringRef::npos != Name.find("pmovsx")); 1279 Rep = DoSext ? Builder.CreateSExt(SV, DstTy) 1280 : Builder.CreateZExt(SV, DstTy); 1281 // If there are 3 arguments, it's a masked intrinsic so we need a select. 1282 if (CI->getNumArgOperands() == 3) 1283 Rep = EmitX86Select(Builder, CI->getArgOperand(2), Rep, 1284 CI->getArgOperand(1)); 1285 } else if (IsX86 && (Name.startswith("avx.vbroadcastf128") || 1286 Name == "avx2.vbroadcasti128")) { 1287 // Replace vbroadcastf128/vbroadcasti128 with a vector load+shuffle. 1288 Type *EltTy = CI->getType()->getVectorElementType(); 1289 unsigned NumSrcElts = 128 / EltTy->getPrimitiveSizeInBits(); 1290 Type *VT = VectorType::get(EltTy, NumSrcElts); 1291 Value *Op = Builder.CreatePointerCast(CI->getArgOperand(0), 1292 PointerType::getUnqual(VT)); 1293 Value *Load = Builder.CreateAlignedLoad(Op, 1); 1294 if (NumSrcElts == 2) 1295 Rep = Builder.CreateShuffleVector(Load, UndefValue::get(Load->getType()), 1296 { 0, 1, 0, 1 }); 1297 else 1298 Rep = Builder.CreateShuffleVector(Load, UndefValue::get(Load->getType()), 1299 { 0, 1, 2, 3, 0, 1, 2, 3 }); 1300 } else if (IsX86 && (Name.startswith("avx512.mask.shuf.i") || 1301 Name.startswith("avx512.mask.shuf.f"))) { 1302 unsigned Imm = cast<ConstantInt>(CI->getArgOperand(2))->getZExtValue(); 1303 Type *VT = CI->getType(); 1304 unsigned NumLanes = VT->getPrimitiveSizeInBits() / 128; 1305 unsigned NumElementsInLane = 128 / VT->getScalarSizeInBits(); 1306 unsigned ControlBitsMask = NumLanes - 1; 1307 unsigned NumControlBits = NumLanes / 2; 1308 SmallVector<uint32_t, 8> ShuffleMask(0); 1309 1310 for (unsigned l = 0; l != NumLanes; ++l) { 1311 unsigned LaneMask = (Imm >> (l * NumControlBits)) & ControlBitsMask; 1312 // We actually need the other source. 1313 if (l >= NumLanes / 2) 1314 LaneMask += NumLanes; 1315 for (unsigned i = 0; i != NumElementsInLane; ++i) 1316 ShuffleMask.push_back(LaneMask * NumElementsInLane + i); 1317 } 1318 Rep = Builder.CreateShuffleVector(CI->getArgOperand(0), 1319 CI->getArgOperand(1), ShuffleMask); 1320 Rep = EmitX86Select(Builder, CI->getArgOperand(4), Rep, 1321 CI->getArgOperand(3)); 1322 }else if (IsX86 && (Name.startswith("avx512.mask.broadcastf") || 1323 Name.startswith("avx512.mask.broadcasti"))) { 1324 unsigned NumSrcElts = 1325 CI->getArgOperand(0)->getType()->getVectorNumElements(); 1326 unsigned NumDstElts = CI->getType()->getVectorNumElements(); 1327 1328 SmallVector<uint32_t, 8> ShuffleMask(NumDstElts); 1329 for (unsigned i = 0; i != NumDstElts; ++i) 1330 ShuffleMask[i] = i % NumSrcElts; 1331 1332 Rep = Builder.CreateShuffleVector(CI->getArgOperand(0), 1333 CI->getArgOperand(0), 1334 ShuffleMask); 1335 Rep = EmitX86Select(Builder, CI->getArgOperand(2), Rep, 1336 CI->getArgOperand(1)); 1337 } else if (IsX86 && (Name.startswith("avx2.pbroadcast") || 1338 Name.startswith("avx2.vbroadcast") || 1339 Name.startswith("avx512.pbroadcast") || 1340 Name.startswith("avx512.mask.broadcast.s"))) { 1341 // Replace vp?broadcasts with a vector shuffle. 1342 Value *Op = CI->getArgOperand(0); 1343 unsigned NumElts = CI->getType()->getVectorNumElements(); 1344 Type *MaskTy = VectorType::get(Type::getInt32Ty(C), NumElts); 1345 Rep = Builder.CreateShuffleVector(Op, UndefValue::get(Op->getType()), 1346 Constant::getNullValue(MaskTy)); 1347 1348 if (CI->getNumArgOperands() == 3) 1349 Rep = EmitX86Select(Builder, CI->getArgOperand(2), Rep, 1350 CI->getArgOperand(1)); 1351 } else if (IsX86 && Name.startswith("avx512.mask.palignr.")) { 1352 Rep = UpgradeX86ALIGNIntrinsics(Builder, CI->getArgOperand(0), 1353 CI->getArgOperand(1), 1354 CI->getArgOperand(2), 1355 CI->getArgOperand(3), 1356 CI->getArgOperand(4), 1357 false); 1358 } else if (IsX86 && Name.startswith("avx512.mask.valign.")) { 1359 Rep = UpgradeX86ALIGNIntrinsics(Builder, CI->getArgOperand(0), 1360 CI->getArgOperand(1), 1361 CI->getArgOperand(2), 1362 CI->getArgOperand(3), 1363 CI->getArgOperand(4), 1364 true); 1365 } else if (IsX86 && (Name == "sse2.psll.dq" || 1366 Name == "avx2.psll.dq")) { 1367 // 128/256-bit shift left specified in bits. 1368 unsigned Shift = cast<ConstantInt>(CI->getArgOperand(1))->getZExtValue(); 1369 Rep = UpgradeX86PSLLDQIntrinsics(Builder, CI->getArgOperand(0), 1370 Shift / 8); // Shift is in bits. 1371 } else if (IsX86 && (Name == "sse2.psrl.dq" || 1372 Name == "avx2.psrl.dq")) { 1373 // 128/256-bit shift right specified in bits. 1374 unsigned Shift = cast<ConstantInt>(CI->getArgOperand(1))->getZExtValue(); 1375 Rep = UpgradeX86PSRLDQIntrinsics(Builder, CI->getArgOperand(0), 1376 Shift / 8); // Shift is in bits. 1377 } else if (IsX86 && (Name == "sse2.psll.dq.bs" || 1378 Name == "avx2.psll.dq.bs" || 1379 Name == "avx512.psll.dq.512")) { 1380 // 128/256/512-bit shift left specified in bytes. 1381 unsigned Shift = cast<ConstantInt>(CI->getArgOperand(1))->getZExtValue(); 1382 Rep = UpgradeX86PSLLDQIntrinsics(Builder, CI->getArgOperand(0), Shift); 1383 } else if (IsX86 && (Name == "sse2.psrl.dq.bs" || 1384 Name == "avx2.psrl.dq.bs" || 1385 Name == "avx512.psrl.dq.512")) { 1386 // 128/256/512-bit shift right specified in bytes. 1387 unsigned Shift = cast<ConstantInt>(CI->getArgOperand(1))->getZExtValue(); 1388 Rep = UpgradeX86PSRLDQIntrinsics(Builder, CI->getArgOperand(0), Shift); 1389 } else if (IsX86 && (Name == "sse41.pblendw" || 1390 Name.startswith("sse41.blendp") || 1391 Name.startswith("avx.blend.p") || 1392 Name == "avx2.pblendw" || 1393 Name.startswith("avx2.pblendd."))) { 1394 Value *Op0 = CI->getArgOperand(0); 1395 Value *Op1 = CI->getArgOperand(1); 1396 unsigned Imm = cast <ConstantInt>(CI->getArgOperand(2))->getZExtValue(); 1397 VectorType *VecTy = cast<VectorType>(CI->getType()); 1398 unsigned NumElts = VecTy->getNumElements(); 1399 1400 SmallVector<uint32_t, 16> Idxs(NumElts); 1401 for (unsigned i = 0; i != NumElts; ++i) 1402 Idxs[i] = ((Imm >> (i%8)) & 1) ? i + NumElts : i; 1403 1404 Rep = Builder.CreateShuffleVector(Op0, Op1, Idxs); 1405 } else if (IsX86 && (Name.startswith("avx.vinsertf128.") || 1406 Name == "avx2.vinserti128" || 1407 Name.startswith("avx512.mask.insert"))) { 1408 Value *Op0 = CI->getArgOperand(0); 1409 Value *Op1 = CI->getArgOperand(1); 1410 unsigned Imm = cast<ConstantInt>(CI->getArgOperand(2))->getZExtValue(); 1411 unsigned DstNumElts = CI->getType()->getVectorNumElements(); 1412 unsigned SrcNumElts = Op1->getType()->getVectorNumElements(); 1413 unsigned Scale = DstNumElts / SrcNumElts; 1414 1415 // Mask off the high bits of the immediate value; hardware ignores those. 1416 Imm = Imm % Scale; 1417 1418 // Extend the second operand into a vector the size of the destination. 1419 Value *UndefV = UndefValue::get(Op1->getType()); 1420 SmallVector<uint32_t, 8> Idxs(DstNumElts); 1421 for (unsigned i = 0; i != SrcNumElts; ++i) 1422 Idxs[i] = i; 1423 for (unsigned i = SrcNumElts; i != DstNumElts; ++i) 1424 Idxs[i] = SrcNumElts; 1425 Rep = Builder.CreateShuffleVector(Op1, UndefV, Idxs); 1426 1427 // Insert the second operand into the first operand. 1428 1429 // Note that there is no guarantee that instruction lowering will actually 1430 // produce a vinsertf128 instruction for the created shuffles. In 1431 // particular, the 0 immediate case involves no lane changes, so it can 1432 // be handled as a blend. 1433 1434 // Example of shuffle mask for 32-bit elements: 1435 // Imm = 1 <i32 0, i32 1, i32 2, i32 3, i32 8, i32 9, i32 10, i32 11> 1436 // Imm = 0 <i32 8, i32 9, i32 10, i32 11, i32 4, i32 5, i32 6, i32 7 > 1437 1438 // First fill with identify mask. 1439 for (unsigned i = 0; i != DstNumElts; ++i) 1440 Idxs[i] = i; 1441 // Then replace the elements where we need to insert. 1442 for (unsigned i = 0; i != SrcNumElts; ++i) 1443 Idxs[i + Imm * SrcNumElts] = i + DstNumElts; 1444 Rep = Builder.CreateShuffleVector(Op0, Rep, Idxs); 1445 1446 // If the intrinsic has a mask operand, handle that. 1447 if (CI->getNumArgOperands() == 5) 1448 Rep = EmitX86Select(Builder, CI->getArgOperand(4), Rep, 1449 CI->getArgOperand(3)); 1450 } else if (IsX86 && (Name.startswith("avx.vextractf128.") || 1451 Name == "avx2.vextracti128" || 1452 Name.startswith("avx512.mask.vextract"))) { 1453 Value *Op0 = CI->getArgOperand(0); 1454 unsigned Imm = cast<ConstantInt>(CI->getArgOperand(1))->getZExtValue(); 1455 unsigned DstNumElts = CI->getType()->getVectorNumElements(); 1456 unsigned SrcNumElts = Op0->getType()->getVectorNumElements(); 1457 unsigned Scale = SrcNumElts / DstNumElts; 1458 1459 // Mask off the high bits of the immediate value; hardware ignores those. 1460 Imm = Imm % Scale; 1461 1462 // Get indexes for the subvector of the input vector. 1463 SmallVector<uint32_t, 8> Idxs(DstNumElts); 1464 for (unsigned i = 0; i != DstNumElts; ++i) { 1465 Idxs[i] = i + (Imm * DstNumElts); 1466 } 1467 Rep = Builder.CreateShuffleVector(Op0, Op0, Idxs); 1468 1469 // If the intrinsic has a mask operand, handle that. 1470 if (CI->getNumArgOperands() == 4) 1471 Rep = EmitX86Select(Builder, CI->getArgOperand(3), Rep, 1472 CI->getArgOperand(2)); 1473 } else if (!IsX86 && Name == "stackprotectorcheck") { 1474 Rep = nullptr; 1475 } else if (IsX86 && (Name.startswith("avx512.mask.perm.df.") || 1476 Name.startswith("avx512.mask.perm.di."))) { 1477 Value *Op0 = CI->getArgOperand(0); 1478 unsigned Imm = cast<ConstantInt>(CI->getArgOperand(1))->getZExtValue(); 1479 VectorType *VecTy = cast<VectorType>(CI->getType()); 1480 unsigned NumElts = VecTy->getNumElements(); 1481 1482 SmallVector<uint32_t, 8> Idxs(NumElts); 1483 for (unsigned i = 0; i != NumElts; ++i) 1484 Idxs[i] = (i & ~0x3) + ((Imm >> (2 * (i & 0x3))) & 3); 1485 1486 Rep = Builder.CreateShuffleVector(Op0, Op0, Idxs); 1487 1488 if (CI->getNumArgOperands() == 4) 1489 Rep = EmitX86Select(Builder, CI->getArgOperand(3), Rep, 1490 CI->getArgOperand(2)); 1491 } else if (IsX86 && (Name.startswith("avx.vperm2f128.") || 1492 Name == "avx2.vperm2i128")) { 1493 // The immediate permute control byte looks like this: 1494 // [1:0] - select 128 bits from sources for low half of destination 1495 // [2] - ignore 1496 // [3] - zero low half of destination 1497 // [5:4] - select 128 bits from sources for high half of destination 1498 // [6] - ignore 1499 // [7] - zero high half of destination 1500 1501 uint8_t Imm = cast<ConstantInt>(CI->getArgOperand(2))->getZExtValue(); 1502 1503 unsigned NumElts = CI->getType()->getVectorNumElements(); 1504 unsigned HalfSize = NumElts / 2; 1505 SmallVector<uint32_t, 8> ShuffleMask(NumElts); 1506 1507 // Determine which operand(s) are actually in use for this instruction. 1508 Value *V0 = (Imm & 0x02) ? CI->getArgOperand(1) : CI->getArgOperand(0); 1509 Value *V1 = (Imm & 0x20) ? CI->getArgOperand(1) : CI->getArgOperand(0); 1510 1511 // If needed, replace operands based on zero mask. 1512 V0 = (Imm & 0x08) ? ConstantAggregateZero::get(CI->getType()) : V0; 1513 V1 = (Imm & 0x80) ? ConstantAggregateZero::get(CI->getType()) : V1; 1514 1515 // Permute low half of result. 1516 unsigned StartIndex = (Imm & 0x01) ? HalfSize : 0; 1517 for (unsigned i = 0; i < HalfSize; ++i) 1518 ShuffleMask[i] = StartIndex + i; 1519 1520 // Permute high half of result. 1521 StartIndex = (Imm & 0x10) ? HalfSize : 0; 1522 for (unsigned i = 0; i < HalfSize; ++i) 1523 ShuffleMask[i + HalfSize] = NumElts + StartIndex + i; 1524 1525 Rep = Builder.CreateShuffleVector(V0, V1, ShuffleMask); 1526 1527 } else if (IsX86 && (Name.startswith("avx.vpermil.") || 1528 Name == "sse2.pshuf.d" || 1529 Name.startswith("avx512.mask.vpermil.p") || 1530 Name.startswith("avx512.mask.pshuf.d."))) { 1531 Value *Op0 = CI->getArgOperand(0); 1532 unsigned Imm = cast<ConstantInt>(CI->getArgOperand(1))->getZExtValue(); 1533 VectorType *VecTy = cast<VectorType>(CI->getType()); 1534 unsigned NumElts = VecTy->getNumElements(); 1535 // Calculate the size of each index in the immediate. 1536 unsigned IdxSize = 64 / VecTy->getScalarSizeInBits(); 1537 unsigned IdxMask = ((1 << IdxSize) - 1); 1538 1539 SmallVector<uint32_t, 8> Idxs(NumElts); 1540 // Lookup the bits for this element, wrapping around the immediate every 1541 // 8-bits. Elements are grouped into sets of 2 or 4 elements so we need 1542 // to offset by the first index of each group. 1543 for (unsigned i = 0; i != NumElts; ++i) 1544 Idxs[i] = ((Imm >> ((i * IdxSize) % 8)) & IdxMask) | (i & ~IdxMask); 1545 1546 Rep = Builder.CreateShuffleVector(Op0, Op0, Idxs); 1547 1548 if (CI->getNumArgOperands() == 4) 1549 Rep = EmitX86Select(Builder, CI->getArgOperand(3), Rep, 1550 CI->getArgOperand(2)); 1551 } else if (IsX86 && (Name == "sse2.pshufl.w" || 1552 Name.startswith("avx512.mask.pshufl.w."))) { 1553 Value *Op0 = CI->getArgOperand(0); 1554 unsigned Imm = cast<ConstantInt>(CI->getArgOperand(1))->getZExtValue(); 1555 unsigned NumElts = CI->getType()->getVectorNumElements(); 1556 1557 SmallVector<uint32_t, 16> Idxs(NumElts); 1558 for (unsigned l = 0; l != NumElts; l += 8) { 1559 for (unsigned i = 0; i != 4; ++i) 1560 Idxs[i + l] = ((Imm >> (2 * i)) & 0x3) + l; 1561 for (unsigned i = 4; i != 8; ++i) 1562 Idxs[i + l] = i + l; 1563 } 1564 1565 Rep = Builder.CreateShuffleVector(Op0, Op0, Idxs); 1566 1567 if (CI->getNumArgOperands() == 4) 1568 Rep = EmitX86Select(Builder, CI->getArgOperand(3), Rep, 1569 CI->getArgOperand(2)); 1570 } else if (IsX86 && (Name == "sse2.pshufh.w" || 1571 Name.startswith("avx512.mask.pshufh.w."))) { 1572 Value *Op0 = CI->getArgOperand(0); 1573 unsigned Imm = cast<ConstantInt>(CI->getArgOperand(1))->getZExtValue(); 1574 unsigned NumElts = CI->getType()->getVectorNumElements(); 1575 1576 SmallVector<uint32_t, 16> Idxs(NumElts); 1577 for (unsigned l = 0; l != NumElts; l += 8) { 1578 for (unsigned i = 0; i != 4; ++i) 1579 Idxs[i + l] = i + l; 1580 for (unsigned i = 0; i != 4; ++i) 1581 Idxs[i + l + 4] = ((Imm >> (2 * i)) & 0x3) + 4 + l; 1582 } 1583 1584 Rep = Builder.CreateShuffleVector(Op0, Op0, Idxs); 1585 1586 if (CI->getNumArgOperands() == 4) 1587 Rep = EmitX86Select(Builder, CI->getArgOperand(3), Rep, 1588 CI->getArgOperand(2)); 1589 } else if (IsX86 && Name.startswith("avx512.mask.shuf.p")) { 1590 Value *Op0 = CI->getArgOperand(0); 1591 Value *Op1 = CI->getArgOperand(1); 1592 unsigned Imm = cast<ConstantInt>(CI->getArgOperand(2))->getZExtValue(); 1593 unsigned NumElts = CI->getType()->getVectorNumElements(); 1594 1595 unsigned NumLaneElts = 128/CI->getType()->getScalarSizeInBits(); 1596 unsigned HalfLaneElts = NumLaneElts / 2; 1597 1598 SmallVector<uint32_t, 16> Idxs(NumElts); 1599 for (unsigned i = 0; i != NumElts; ++i) { 1600 // Base index is the starting element of the lane. 1601 Idxs[i] = i - (i % NumLaneElts); 1602 // If we are half way through the lane switch to the other source. 1603 if ((i % NumLaneElts) >= HalfLaneElts) 1604 Idxs[i] += NumElts; 1605 // Now select the specific element. By adding HalfLaneElts bits from 1606 // the immediate. Wrapping around the immediate every 8-bits. 1607 Idxs[i] += (Imm >> ((i * HalfLaneElts) % 8)) & ((1 << HalfLaneElts) - 1); 1608 } 1609 1610 Rep = Builder.CreateShuffleVector(Op0, Op1, Idxs); 1611 1612 Rep = EmitX86Select(Builder, CI->getArgOperand(4), Rep, 1613 CI->getArgOperand(3)); 1614 } else if (IsX86 && (Name.startswith("avx512.mask.movddup") || 1615 Name.startswith("avx512.mask.movshdup") || 1616 Name.startswith("avx512.mask.movsldup"))) { 1617 Value *Op0 = CI->getArgOperand(0); 1618 unsigned NumElts = CI->getType()->getVectorNumElements(); 1619 unsigned NumLaneElts = 128/CI->getType()->getScalarSizeInBits(); 1620 1621 unsigned Offset = 0; 1622 if (Name.startswith("avx512.mask.movshdup.")) 1623 Offset = 1; 1624 1625 SmallVector<uint32_t, 16> Idxs(NumElts); 1626 for (unsigned l = 0; l != NumElts; l += NumLaneElts) 1627 for (unsigned i = 0; i != NumLaneElts; i += 2) { 1628 Idxs[i + l + 0] = i + l + Offset; 1629 Idxs[i + l + 1] = i + l + Offset; 1630 } 1631 1632 Rep = Builder.CreateShuffleVector(Op0, Op0, Idxs); 1633 1634 Rep = EmitX86Select(Builder, CI->getArgOperand(2), Rep, 1635 CI->getArgOperand(1)); 1636 } else if (IsX86 && (Name.startswith("avx512.mask.punpckl") || 1637 Name.startswith("avx512.mask.unpckl."))) { 1638 Value *Op0 = CI->getArgOperand(0); 1639 Value *Op1 = CI->getArgOperand(1); 1640 int NumElts = CI->getType()->getVectorNumElements(); 1641 int NumLaneElts = 128/CI->getType()->getScalarSizeInBits(); 1642 1643 SmallVector<uint32_t, 64> Idxs(NumElts); 1644 for (int l = 0; l != NumElts; l += NumLaneElts) 1645 for (int i = 0; i != NumLaneElts; ++i) 1646 Idxs[i + l] = l + (i / 2) + NumElts * (i % 2); 1647 1648 Rep = Builder.CreateShuffleVector(Op0, Op1, Idxs); 1649 1650 Rep = EmitX86Select(Builder, CI->getArgOperand(3), Rep, 1651 CI->getArgOperand(2)); 1652 } else if (IsX86 && (Name.startswith("avx512.mask.punpckh") || 1653 Name.startswith("avx512.mask.unpckh."))) { 1654 Value *Op0 = CI->getArgOperand(0); 1655 Value *Op1 = CI->getArgOperand(1); 1656 int NumElts = CI->getType()->getVectorNumElements(); 1657 int NumLaneElts = 128/CI->getType()->getScalarSizeInBits(); 1658 1659 SmallVector<uint32_t, 64> Idxs(NumElts); 1660 for (int l = 0; l != NumElts; l += NumLaneElts) 1661 for (int i = 0; i != NumLaneElts; ++i) 1662 Idxs[i + l] = (NumLaneElts / 2) + l + (i / 2) + NumElts * (i % 2); 1663 1664 Rep = Builder.CreateShuffleVector(Op0, Op1, Idxs); 1665 1666 Rep = EmitX86Select(Builder, CI->getArgOperand(3), Rep, 1667 CI->getArgOperand(2)); 1668 } else if (IsX86 && Name.startswith("avx512.mask.pand.")) { 1669 Rep = Builder.CreateAnd(CI->getArgOperand(0), CI->getArgOperand(1)); 1670 Rep = EmitX86Select(Builder, CI->getArgOperand(3), Rep, 1671 CI->getArgOperand(2)); 1672 } else if (IsX86 && Name.startswith("avx512.mask.pandn.")) { 1673 Rep = Builder.CreateAnd(Builder.CreateNot(CI->getArgOperand(0)), 1674 CI->getArgOperand(1)); 1675 Rep = EmitX86Select(Builder, CI->getArgOperand(3), Rep, 1676 CI->getArgOperand(2)); 1677 } else if (IsX86 && Name.startswith("avx512.mask.por.")) { 1678 Rep = Builder.CreateOr(CI->getArgOperand(0), CI->getArgOperand(1)); 1679 Rep = EmitX86Select(Builder, CI->getArgOperand(3), Rep, 1680 CI->getArgOperand(2)); 1681 } else if (IsX86 && Name.startswith("avx512.mask.pxor.")) { 1682 Rep = Builder.CreateXor(CI->getArgOperand(0), CI->getArgOperand(1)); 1683 Rep = EmitX86Select(Builder, CI->getArgOperand(3), Rep, 1684 CI->getArgOperand(2)); 1685 } else if (IsX86 && Name.startswith("avx512.mask.and.")) { 1686 VectorType *FTy = cast<VectorType>(CI->getType()); 1687 VectorType *ITy = VectorType::getInteger(FTy); 1688 Rep = Builder.CreateAnd(Builder.CreateBitCast(CI->getArgOperand(0), ITy), 1689 Builder.CreateBitCast(CI->getArgOperand(1), ITy)); 1690 Rep = Builder.CreateBitCast(Rep, FTy); 1691 Rep = EmitX86Select(Builder, CI->getArgOperand(3), Rep, 1692 CI->getArgOperand(2)); 1693 } else if (IsX86 && Name.startswith("avx512.mask.andn.")) { 1694 VectorType *FTy = cast<VectorType>(CI->getType()); 1695 VectorType *ITy = VectorType::getInteger(FTy); 1696 Rep = Builder.CreateNot(Builder.CreateBitCast(CI->getArgOperand(0), ITy)); 1697 Rep = Builder.CreateAnd(Rep, 1698 Builder.CreateBitCast(CI->getArgOperand(1), ITy)); 1699 Rep = Builder.CreateBitCast(Rep, FTy); 1700 Rep = EmitX86Select(Builder, CI->getArgOperand(3), Rep, 1701 CI->getArgOperand(2)); 1702 } else if (IsX86 && Name.startswith("avx512.mask.or.")) { 1703 VectorType *FTy = cast<VectorType>(CI->getType()); 1704 VectorType *ITy = VectorType::getInteger(FTy); 1705 Rep = Builder.CreateOr(Builder.CreateBitCast(CI->getArgOperand(0), ITy), 1706 Builder.CreateBitCast(CI->getArgOperand(1), ITy)); 1707 Rep = Builder.CreateBitCast(Rep, FTy); 1708 Rep = EmitX86Select(Builder, CI->getArgOperand(3), Rep, 1709 CI->getArgOperand(2)); 1710 } else if (IsX86 && Name.startswith("avx512.mask.xor.")) { 1711 VectorType *FTy = cast<VectorType>(CI->getType()); 1712 VectorType *ITy = VectorType::getInteger(FTy); 1713 Rep = Builder.CreateXor(Builder.CreateBitCast(CI->getArgOperand(0), ITy), 1714 Builder.CreateBitCast(CI->getArgOperand(1), ITy)); 1715 Rep = Builder.CreateBitCast(Rep, FTy); 1716 Rep = EmitX86Select(Builder, CI->getArgOperand(3), Rep, 1717 CI->getArgOperand(2)); 1718 } else if (IsX86 && Name.startswith("avx512.mask.padd.")) { 1719 Rep = Builder.CreateAdd(CI->getArgOperand(0), CI->getArgOperand(1)); 1720 Rep = EmitX86Select(Builder, CI->getArgOperand(3), Rep, 1721 CI->getArgOperand(2)); 1722 } else if (IsX86 && Name.startswith("avx512.mask.psub.")) { 1723 Rep = Builder.CreateSub(CI->getArgOperand(0), CI->getArgOperand(1)); 1724 Rep = EmitX86Select(Builder, CI->getArgOperand(3), Rep, 1725 CI->getArgOperand(2)); 1726 } else if (IsX86 && Name.startswith("avx512.mask.pmull.")) { 1727 Rep = Builder.CreateMul(CI->getArgOperand(0), CI->getArgOperand(1)); 1728 Rep = EmitX86Select(Builder, CI->getArgOperand(3), Rep, 1729 CI->getArgOperand(2)); 1730 } else if (IsX86 && (Name.startswith("avx512.mask.add.p"))) { 1731 Rep = Builder.CreateFAdd(CI->getArgOperand(0), CI->getArgOperand(1)); 1732 Rep = EmitX86Select(Builder, CI->getArgOperand(3), Rep, 1733 CI->getArgOperand(2)); 1734 } else if (IsX86 && Name.startswith("avx512.mask.div.p")) { 1735 Rep = Builder.CreateFDiv(CI->getArgOperand(0), CI->getArgOperand(1)); 1736 Rep = EmitX86Select(Builder, CI->getArgOperand(3), Rep, 1737 CI->getArgOperand(2)); 1738 } else if (IsX86 && Name.startswith("avx512.mask.mul.p")) { 1739 Rep = Builder.CreateFMul(CI->getArgOperand(0), CI->getArgOperand(1)); 1740 Rep = EmitX86Select(Builder, CI->getArgOperand(3), Rep, 1741 CI->getArgOperand(2)); 1742 } else if (IsX86 && Name.startswith("avx512.mask.sub.p")) { 1743 Rep = Builder.CreateFSub(CI->getArgOperand(0), CI->getArgOperand(1)); 1744 Rep = EmitX86Select(Builder, CI->getArgOperand(3), Rep, 1745 CI->getArgOperand(2)); 1746 } else if (IsX86 && Name.startswith("avx512.mask.lzcnt.")) { 1747 Rep = Builder.CreateCall(Intrinsic::getDeclaration(F->getParent(), 1748 Intrinsic::ctlz, 1749 CI->getType()), 1750 { CI->getArgOperand(0), Builder.getInt1(false) }); 1751 Rep = EmitX86Select(Builder, CI->getArgOperand(2), Rep, 1752 CI->getArgOperand(1)); 1753 } else if (IsX86 && (Name.startswith("avx512.mask.max.p") || 1754 Name.startswith("avx512.mask.min.p"))) { 1755 bool IsMin = Name[13] == 'i'; 1756 VectorType *VecTy = cast<VectorType>(CI->getType()); 1757 unsigned VecWidth = VecTy->getPrimitiveSizeInBits(); 1758 unsigned EltWidth = VecTy->getScalarSizeInBits(); 1759 Intrinsic::ID IID; 1760 if (!IsMin && VecWidth == 128 && EltWidth == 32) 1761 IID = Intrinsic::x86_sse_max_ps; 1762 else if (!IsMin && VecWidth == 128 && EltWidth == 64) 1763 IID = Intrinsic::x86_sse2_max_pd; 1764 else if (!IsMin && VecWidth == 256 && EltWidth == 32) 1765 IID = Intrinsic::x86_avx_max_ps_256; 1766 else if (!IsMin && VecWidth == 256 && EltWidth == 64) 1767 IID = Intrinsic::x86_avx_max_pd_256; 1768 else if (IsMin && VecWidth == 128 && EltWidth == 32) 1769 IID = Intrinsic::x86_sse_min_ps; 1770 else if (IsMin && VecWidth == 128 && EltWidth == 64) 1771 IID = Intrinsic::x86_sse2_min_pd; 1772 else if (IsMin && VecWidth == 256 && EltWidth == 32) 1773 IID = Intrinsic::x86_avx_min_ps_256; 1774 else if (IsMin && VecWidth == 256 && EltWidth == 64) 1775 IID = Intrinsic::x86_avx_min_pd_256; 1776 else 1777 llvm_unreachable("Unexpected intrinsic"); 1778 1779 Rep = Builder.CreateCall(Intrinsic::getDeclaration(F->getParent(), IID), 1780 { CI->getArgOperand(0), CI->getArgOperand(1) }); 1781 Rep = EmitX86Select(Builder, CI->getArgOperand(3), Rep, 1782 CI->getArgOperand(2)); 1783 } else if (IsX86 && Name.startswith("avx512.mask.pshuf.b.")) { 1784 VectorType *VecTy = cast<VectorType>(CI->getType()); 1785 Intrinsic::ID IID; 1786 if (VecTy->getPrimitiveSizeInBits() == 128) 1787 IID = Intrinsic::x86_ssse3_pshuf_b_128; 1788 else if (VecTy->getPrimitiveSizeInBits() == 256) 1789 IID = Intrinsic::x86_avx2_pshuf_b; 1790 else if (VecTy->getPrimitiveSizeInBits() == 512) 1791 IID = Intrinsic::x86_avx512_pshuf_b_512; 1792 else 1793 llvm_unreachable("Unexpected intrinsic"); 1794 1795 Rep = Builder.CreateCall(Intrinsic::getDeclaration(F->getParent(), IID), 1796 { CI->getArgOperand(0), CI->getArgOperand(1) }); 1797 Rep = EmitX86Select(Builder, CI->getArgOperand(3), Rep, 1798 CI->getArgOperand(2)); 1799 } else if (IsX86 && (Name.startswith("avx512.mask.pmul.dq.") || 1800 Name.startswith("avx512.mask.pmulu.dq."))) { 1801 bool IsUnsigned = Name[16] == 'u'; 1802 VectorType *VecTy = cast<VectorType>(CI->getType()); 1803 Intrinsic::ID IID; 1804 if (!IsUnsigned && VecTy->getPrimitiveSizeInBits() == 128) 1805 IID = Intrinsic::x86_sse41_pmuldq; 1806 else if (!IsUnsigned && VecTy->getPrimitiveSizeInBits() == 256) 1807 IID = Intrinsic::x86_avx2_pmul_dq; 1808 else if (!IsUnsigned && VecTy->getPrimitiveSizeInBits() == 512) 1809 IID = Intrinsic::x86_avx512_pmul_dq_512; 1810 else if (IsUnsigned && VecTy->getPrimitiveSizeInBits() == 128) 1811 IID = Intrinsic::x86_sse2_pmulu_dq; 1812 else if (IsUnsigned && VecTy->getPrimitiveSizeInBits() == 256) 1813 IID = Intrinsic::x86_avx2_pmulu_dq; 1814 else if (IsUnsigned && VecTy->getPrimitiveSizeInBits() == 512) 1815 IID = Intrinsic::x86_avx512_pmulu_dq_512; 1816 else 1817 llvm_unreachable("Unexpected intrinsic"); 1818 1819 Rep = Builder.CreateCall(Intrinsic::getDeclaration(F->getParent(), IID), 1820 { CI->getArgOperand(0), CI->getArgOperand(1) }); 1821 Rep = EmitX86Select(Builder, CI->getArgOperand(3), Rep, 1822 CI->getArgOperand(2)); 1823 } else if (IsX86 && Name.startswith("avx512.mask.pack")) { 1824 bool IsUnsigned = Name[16] == 'u'; 1825 bool IsDW = Name[18] == 'd'; 1826 VectorType *VecTy = cast<VectorType>(CI->getType()); 1827 Intrinsic::ID IID; 1828 if (!IsUnsigned && !IsDW && VecTy->getPrimitiveSizeInBits() == 128) 1829 IID = Intrinsic::x86_sse2_packsswb_128; 1830 else if (!IsUnsigned && !IsDW && VecTy->getPrimitiveSizeInBits() == 256) 1831 IID = Intrinsic::x86_avx2_packsswb; 1832 else if (!IsUnsigned && !IsDW && VecTy->getPrimitiveSizeInBits() == 512) 1833 IID = Intrinsic::x86_avx512_packsswb_512; 1834 else if (!IsUnsigned && IsDW && VecTy->getPrimitiveSizeInBits() == 128) 1835 IID = Intrinsic::x86_sse2_packssdw_128; 1836 else if (!IsUnsigned && IsDW && VecTy->getPrimitiveSizeInBits() == 256) 1837 IID = Intrinsic::x86_avx2_packssdw; 1838 else if (!IsUnsigned && IsDW && VecTy->getPrimitiveSizeInBits() == 512) 1839 IID = Intrinsic::x86_avx512_packssdw_512; 1840 else if (IsUnsigned && !IsDW && VecTy->getPrimitiveSizeInBits() == 128) 1841 IID = Intrinsic::x86_sse2_packuswb_128; 1842 else if (IsUnsigned && !IsDW && VecTy->getPrimitiveSizeInBits() == 256) 1843 IID = Intrinsic::x86_avx2_packuswb; 1844 else if (IsUnsigned && !IsDW && VecTy->getPrimitiveSizeInBits() == 512) 1845 IID = Intrinsic::x86_avx512_packuswb_512; 1846 else if (IsUnsigned && IsDW && VecTy->getPrimitiveSizeInBits() == 128) 1847 IID = Intrinsic::x86_sse41_packusdw; 1848 else if (IsUnsigned && IsDW && VecTy->getPrimitiveSizeInBits() == 256) 1849 IID = Intrinsic::x86_avx2_packusdw; 1850 else if (IsUnsigned && IsDW && VecTy->getPrimitiveSizeInBits() == 512) 1851 IID = Intrinsic::x86_avx512_packusdw_512; 1852 else 1853 llvm_unreachable("Unexpected intrinsic"); 1854 1855 Rep = Builder.CreateCall(Intrinsic::getDeclaration(F->getParent(), IID), 1856 { CI->getArgOperand(0), CI->getArgOperand(1) }); 1857 Rep = EmitX86Select(Builder, CI->getArgOperand(3), Rep, 1858 CI->getArgOperand(2)); 1859 } else if (IsX86 && Name.startswith("avx512.mask.psll")) { 1860 bool IsImmediate = Name[16] == 'i' || 1861 (Name.size() > 18 && Name[18] == 'i'); 1862 bool IsVariable = Name[16] == 'v'; 1863 char Size = Name[16] == '.' ? Name[17] : 1864 Name[17] == '.' ? Name[18] : 1865 Name[18] == '.' ? Name[19] : 1866 Name[20]; 1867 1868 Intrinsic::ID IID; 1869 if (IsVariable && Name[17] != '.') { 1870 if (Size == 'd' && Name[17] == '2') // avx512.mask.psllv2.di 1871 IID = Intrinsic::x86_avx2_psllv_q; 1872 else if (Size == 'd' && Name[17] == '4') // avx512.mask.psllv4.di 1873 IID = Intrinsic::x86_avx2_psllv_q_256; 1874 else if (Size == 's' && Name[17] == '4') // avx512.mask.psllv4.si 1875 IID = Intrinsic::x86_avx2_psllv_d; 1876 else if (Size == 's' && Name[17] == '8') // avx512.mask.psllv8.si 1877 IID = Intrinsic::x86_avx2_psllv_d_256; 1878 else if (Size == 'h' && Name[17] == '8') // avx512.mask.psllv8.hi 1879 IID = Intrinsic::x86_avx512_psllv_w_128; 1880 else if (Size == 'h' && Name[17] == '1') // avx512.mask.psllv16.hi 1881 IID = Intrinsic::x86_avx512_psllv_w_256; 1882 else if (Name[17] == '3' && Name[18] == '2') // avx512.mask.psllv32hi 1883 IID = Intrinsic::x86_avx512_psllv_w_512; 1884 else 1885 llvm_unreachable("Unexpected size"); 1886 } else if (Name.endswith(".128")) { 1887 if (Size == 'd') // avx512.mask.psll.d.128, avx512.mask.psll.di.128 1888 IID = IsImmediate ? Intrinsic::x86_sse2_pslli_d 1889 : Intrinsic::x86_sse2_psll_d; 1890 else if (Size == 'q') // avx512.mask.psll.q.128, avx512.mask.psll.qi.128 1891 IID = IsImmediate ? Intrinsic::x86_sse2_pslli_q 1892 : Intrinsic::x86_sse2_psll_q; 1893 else if (Size == 'w') // avx512.mask.psll.w.128, avx512.mask.psll.wi.128 1894 IID = IsImmediate ? Intrinsic::x86_sse2_pslli_w 1895 : Intrinsic::x86_sse2_psll_w; 1896 else 1897 llvm_unreachable("Unexpected size"); 1898 } else if (Name.endswith(".256")) { 1899 if (Size == 'd') // avx512.mask.psll.d.256, avx512.mask.psll.di.256 1900 IID = IsImmediate ? Intrinsic::x86_avx2_pslli_d 1901 : Intrinsic::x86_avx2_psll_d; 1902 else if (Size == 'q') // avx512.mask.psll.q.256, avx512.mask.psll.qi.256 1903 IID = IsImmediate ? Intrinsic::x86_avx2_pslli_q 1904 : Intrinsic::x86_avx2_psll_q; 1905 else if (Size == 'w') // avx512.mask.psll.w.256, avx512.mask.psll.wi.256 1906 IID = IsImmediate ? Intrinsic::x86_avx2_pslli_w 1907 : Intrinsic::x86_avx2_psll_w; 1908 else 1909 llvm_unreachable("Unexpected size"); 1910 } else { 1911 if (Size == 'd') // psll.di.512, pslli.d, psll.d, psllv.d.512 1912 IID = IsImmediate ? Intrinsic::x86_avx512_pslli_d_512 : 1913 IsVariable ? Intrinsic::x86_avx512_psllv_d_512 : 1914 Intrinsic::x86_avx512_psll_d_512; 1915 else if (Size == 'q') // psll.qi.512, pslli.q, psll.q, psllv.q.512 1916 IID = IsImmediate ? Intrinsic::x86_avx512_pslli_q_512 : 1917 IsVariable ? Intrinsic::x86_avx512_psllv_q_512 : 1918 Intrinsic::x86_avx512_psll_q_512; 1919 else if (Size == 'w') // psll.wi.512, pslli.w, psll.w 1920 IID = IsImmediate ? Intrinsic::x86_avx512_pslli_w_512 1921 : Intrinsic::x86_avx512_psll_w_512; 1922 else 1923 llvm_unreachable("Unexpected size"); 1924 } 1925 1926 Rep = UpgradeX86MaskedShift(Builder, *CI, IID); 1927 } else if (IsX86 && Name.startswith("avx512.mask.psrl")) { 1928 bool IsImmediate = Name[16] == 'i' || 1929 (Name.size() > 18 && Name[18] == 'i'); 1930 bool IsVariable = Name[16] == 'v'; 1931 char Size = Name[16] == '.' ? Name[17] : 1932 Name[17] == '.' ? Name[18] : 1933 Name[18] == '.' ? Name[19] : 1934 Name[20]; 1935 1936 Intrinsic::ID IID; 1937 if (IsVariable && Name[17] != '.') { 1938 if (Size == 'd' && Name[17] == '2') // avx512.mask.psrlv2.di 1939 IID = Intrinsic::x86_avx2_psrlv_q; 1940 else if (Size == 'd' && Name[17] == '4') // avx512.mask.psrlv4.di 1941 IID = Intrinsic::x86_avx2_psrlv_q_256; 1942 else if (Size == 's' && Name[17] == '4') // avx512.mask.psrlv4.si 1943 IID = Intrinsic::x86_avx2_psrlv_d; 1944 else if (Size == 's' && Name[17] == '8') // avx512.mask.psrlv8.si 1945 IID = Intrinsic::x86_avx2_psrlv_d_256; 1946 else if (Size == 'h' && Name[17] == '8') // avx512.mask.psrlv8.hi 1947 IID = Intrinsic::x86_avx512_psrlv_w_128; 1948 else if (Size == 'h' && Name[17] == '1') // avx512.mask.psrlv16.hi 1949 IID = Intrinsic::x86_avx512_psrlv_w_256; 1950 else if (Name[17] == '3' && Name[18] == '2') // avx512.mask.psrlv32hi 1951 IID = Intrinsic::x86_avx512_psrlv_w_512; 1952 else 1953 llvm_unreachable("Unexpected size"); 1954 } else if (Name.endswith(".128")) { 1955 if (Size == 'd') // avx512.mask.psrl.d.128, avx512.mask.psrl.di.128 1956 IID = IsImmediate ? Intrinsic::x86_sse2_psrli_d 1957 : Intrinsic::x86_sse2_psrl_d; 1958 else if (Size == 'q') // avx512.mask.psrl.q.128, avx512.mask.psrl.qi.128 1959 IID = IsImmediate ? Intrinsic::x86_sse2_psrli_q 1960 : Intrinsic::x86_sse2_psrl_q; 1961 else if (Size == 'w') // avx512.mask.psrl.w.128, avx512.mask.psrl.wi.128 1962 IID = IsImmediate ? Intrinsic::x86_sse2_psrli_w 1963 : Intrinsic::x86_sse2_psrl_w; 1964 else 1965 llvm_unreachable("Unexpected size"); 1966 } else if (Name.endswith(".256")) { 1967 if (Size == 'd') // avx512.mask.psrl.d.256, avx512.mask.psrl.di.256 1968 IID = IsImmediate ? Intrinsic::x86_avx2_psrli_d 1969 : Intrinsic::x86_avx2_psrl_d; 1970 else if (Size == 'q') // avx512.mask.psrl.q.256, avx512.mask.psrl.qi.256 1971 IID = IsImmediate ? Intrinsic::x86_avx2_psrli_q 1972 : Intrinsic::x86_avx2_psrl_q; 1973 else if (Size == 'w') // avx512.mask.psrl.w.256, avx512.mask.psrl.wi.256 1974 IID = IsImmediate ? Intrinsic::x86_avx2_psrli_w 1975 : Intrinsic::x86_avx2_psrl_w; 1976 else 1977 llvm_unreachable("Unexpected size"); 1978 } else { 1979 if (Size == 'd') // psrl.di.512, psrli.d, psrl.d, psrl.d.512 1980 IID = IsImmediate ? Intrinsic::x86_avx512_psrli_d_512 : 1981 IsVariable ? Intrinsic::x86_avx512_psrlv_d_512 : 1982 Intrinsic::x86_avx512_psrl_d_512; 1983 else if (Size == 'q') // psrl.qi.512, psrli.q, psrl.q, psrl.q.512 1984 IID = IsImmediate ? Intrinsic::x86_avx512_psrli_q_512 : 1985 IsVariable ? Intrinsic::x86_avx512_psrlv_q_512 : 1986 Intrinsic::x86_avx512_psrl_q_512; 1987 else if (Size == 'w') // psrl.wi.512, psrli.w, psrl.w) 1988 IID = IsImmediate ? Intrinsic::x86_avx512_psrli_w_512 1989 : Intrinsic::x86_avx512_psrl_w_512; 1990 else 1991 llvm_unreachable("Unexpected size"); 1992 } 1993 1994 Rep = UpgradeX86MaskedShift(Builder, *CI, IID); 1995 } else if (IsX86 && Name.startswith("avx512.mask.psra")) { 1996 bool IsImmediate = Name[16] == 'i' || 1997 (Name.size() > 18 && Name[18] == 'i'); 1998 bool IsVariable = Name[16] == 'v'; 1999 char Size = Name[16] == '.' ? Name[17] : 2000 Name[17] == '.' ? Name[18] : 2001 Name[18] == '.' ? Name[19] : 2002 Name[20]; 2003 2004 Intrinsic::ID IID; 2005 if (IsVariable && Name[17] != '.') { 2006 if (Size == 's' && Name[17] == '4') // avx512.mask.psrav4.si 2007 IID = Intrinsic::x86_avx2_psrav_d; 2008 else if (Size == 's' && Name[17] == '8') // avx512.mask.psrav8.si 2009 IID = Intrinsic::x86_avx2_psrav_d_256; 2010 else if (Size == 'h' && Name[17] == '8') // avx512.mask.psrav8.hi 2011 IID = Intrinsic::x86_avx512_psrav_w_128; 2012 else if (Size == 'h' && Name[17] == '1') // avx512.mask.psrav16.hi 2013 IID = Intrinsic::x86_avx512_psrav_w_256; 2014 else if (Name[17] == '3' && Name[18] == '2') // avx512.mask.psrav32hi 2015 IID = Intrinsic::x86_avx512_psrav_w_512; 2016 else 2017 llvm_unreachable("Unexpected size"); 2018 } else if (Name.endswith(".128")) { 2019 if (Size == 'd') // avx512.mask.psra.d.128, avx512.mask.psra.di.128 2020 IID = IsImmediate ? Intrinsic::x86_sse2_psrai_d 2021 : Intrinsic::x86_sse2_psra_d; 2022 else if (Size == 'q') // avx512.mask.psra.q.128, avx512.mask.psra.qi.128 2023 IID = IsImmediate ? Intrinsic::x86_avx512_psrai_q_128 : 2024 IsVariable ? Intrinsic::x86_avx512_psrav_q_128 : 2025 Intrinsic::x86_avx512_psra_q_128; 2026 else if (Size == 'w') // avx512.mask.psra.w.128, avx512.mask.psra.wi.128 2027 IID = IsImmediate ? Intrinsic::x86_sse2_psrai_w 2028 : Intrinsic::x86_sse2_psra_w; 2029 else 2030 llvm_unreachable("Unexpected size"); 2031 } else if (Name.endswith(".256")) { 2032 if (Size == 'd') // avx512.mask.psra.d.256, avx512.mask.psra.di.256 2033 IID = IsImmediate ? Intrinsic::x86_avx2_psrai_d 2034 : Intrinsic::x86_avx2_psra_d; 2035 else if (Size == 'q') // avx512.mask.psra.q.256, avx512.mask.psra.qi.256 2036 IID = IsImmediate ? Intrinsic::x86_avx512_psrai_q_256 : 2037 IsVariable ? Intrinsic::x86_avx512_psrav_q_256 : 2038 Intrinsic::x86_avx512_psra_q_256; 2039 else if (Size == 'w') // avx512.mask.psra.w.256, avx512.mask.psra.wi.256 2040 IID = IsImmediate ? Intrinsic::x86_avx2_psrai_w 2041 : Intrinsic::x86_avx2_psra_w; 2042 else 2043 llvm_unreachable("Unexpected size"); 2044 } else { 2045 if (Size == 'd') // psra.di.512, psrai.d, psra.d, psrav.d.512 2046 IID = IsImmediate ? Intrinsic::x86_avx512_psrai_d_512 : 2047 IsVariable ? Intrinsic::x86_avx512_psrav_d_512 : 2048 Intrinsic::x86_avx512_psra_d_512; 2049 else if (Size == 'q') // psra.qi.512, psrai.q, psra.q 2050 IID = IsImmediate ? Intrinsic::x86_avx512_psrai_q_512 : 2051 IsVariable ? Intrinsic::x86_avx512_psrav_q_512 : 2052 Intrinsic::x86_avx512_psra_q_512; 2053 else if (Size == 'w') // psra.wi.512, psrai.w, psra.w 2054 IID = IsImmediate ? Intrinsic::x86_avx512_psrai_w_512 2055 : Intrinsic::x86_avx512_psra_w_512; 2056 else 2057 llvm_unreachable("Unexpected size"); 2058 } 2059 2060 Rep = UpgradeX86MaskedShift(Builder, *CI, IID); 2061 } else if (IsX86 && Name.startswith("avx512.mask.move.s")) { 2062 Rep = upgradeMaskedMove(Builder, *CI); 2063 } else if (IsX86 && Name.startswith("avx512.cvtmask2")) { 2064 Rep = UpgradeMaskToInt(Builder, *CI); 2065 } else if (IsX86 && Name.startswith("avx512.mask.vpermilvar.")) { 2066 Intrinsic::ID IID; 2067 if (Name.endswith("ps.128")) 2068 IID = Intrinsic::x86_avx_vpermilvar_ps; 2069 else if (Name.endswith("pd.128")) 2070 IID = Intrinsic::x86_avx_vpermilvar_pd; 2071 else if (Name.endswith("ps.256")) 2072 IID = Intrinsic::x86_avx_vpermilvar_ps_256; 2073 else if (Name.endswith("pd.256")) 2074 IID = Intrinsic::x86_avx_vpermilvar_pd_256; 2075 else if (Name.endswith("ps.512")) 2076 IID = Intrinsic::x86_avx512_vpermilvar_ps_512; 2077 else if (Name.endswith("pd.512")) 2078 IID = Intrinsic::x86_avx512_vpermilvar_pd_512; 2079 else 2080 llvm_unreachable("Unexpected vpermilvar intrinsic"); 2081 2082 Function *Intrin = Intrinsic::getDeclaration(F->getParent(), IID); 2083 Rep = Builder.CreateCall(Intrin, 2084 { CI->getArgOperand(0), CI->getArgOperand(1) }); 2085 Rep = EmitX86Select(Builder, CI->getArgOperand(3), Rep, 2086 CI->getArgOperand(2)); 2087 } else if (IsX86 && Name.endswith(".movntdqa")) { 2088 Module *M = F->getParent(); 2089 MDNode *Node = MDNode::get( 2090 C, ConstantAsMetadata::get(ConstantInt::get(Type::getInt32Ty(C), 1))); 2091 2092 Value *Ptr = CI->getArgOperand(0); 2093 VectorType *VTy = cast<VectorType>(CI->getType()); 2094 2095 // Convert the type of the pointer to a pointer to the stored type. 2096 Value *BC = 2097 Builder.CreateBitCast(Ptr, PointerType::getUnqual(VTy), "cast"); 2098 LoadInst *LI = Builder.CreateAlignedLoad(BC, VTy->getBitWidth() / 8); 2099 LI->setMetadata(M->getMDKindID("nontemporal"), Node); 2100 Rep = LI; 2101 } else if (IsX86 && 2102 (Name.startswith("sse2.pavg") || Name.startswith("avx2.pavg") || 2103 Name.startswith("avx512.mask.pavg"))) { 2104 // llvm.x86.sse2.pavg.b/w, llvm.x86.avx2.pavg.b/w, 2105 // llvm.x86.avx512.mask.pavg.b/w 2106 Value *A = CI->getArgOperand(0); 2107 Value *B = CI->getArgOperand(1); 2108 VectorType *ZextType = VectorType::getExtendedElementVectorType( 2109 cast<VectorType>(A->getType())); 2110 Value *ExtendedA = Builder.CreateZExt(A, ZextType); 2111 Value *ExtendedB = Builder.CreateZExt(B, ZextType); 2112 Value *Sum = Builder.CreateAdd(ExtendedA, ExtendedB); 2113 Value *AddOne = Builder.CreateAdd(Sum, ConstantInt::get(ZextType, 1)); 2114 Value *ShiftR = Builder.CreateLShr(AddOne, ConstantInt::get(ZextType, 1)); 2115 Rep = Builder.CreateTrunc(ShiftR, A->getType()); 2116 if (CI->getNumArgOperands() > 2) { 2117 Rep = EmitX86Select(Builder, CI->getArgOperand(3), Rep, 2118 CI->getArgOperand(2)); 2119 } 2120 } else if (IsNVVM && (Name == "abs.i" || Name == "abs.ll")) { 2121 Value *Arg = CI->getArgOperand(0); 2122 Value *Neg = Builder.CreateNeg(Arg, "neg"); 2123 Value *Cmp = Builder.CreateICmpSGE( 2124 Arg, llvm::Constant::getNullValue(Arg->getType()), "abs.cond"); 2125 Rep = Builder.CreateSelect(Cmp, Arg, Neg, "abs"); 2126 } else if (IsNVVM && (Name == "max.i" || Name == "max.ll" || 2127 Name == "max.ui" || Name == "max.ull")) { 2128 Value *Arg0 = CI->getArgOperand(0); 2129 Value *Arg1 = CI->getArgOperand(1); 2130 Value *Cmp = Name.endswith(".ui") || Name.endswith(".ull") 2131 ? Builder.CreateICmpUGE(Arg0, Arg1, "max.cond") 2132 : Builder.CreateICmpSGE(Arg0, Arg1, "max.cond"); 2133 Rep = Builder.CreateSelect(Cmp, Arg0, Arg1, "max"); 2134 } else if (IsNVVM && (Name == "min.i" || Name == "min.ll" || 2135 Name == "min.ui" || Name == "min.ull")) { 2136 Value *Arg0 = CI->getArgOperand(0); 2137 Value *Arg1 = CI->getArgOperand(1); 2138 Value *Cmp = Name.endswith(".ui") || Name.endswith(".ull") 2139 ? Builder.CreateICmpULE(Arg0, Arg1, "min.cond") 2140 : Builder.CreateICmpSLE(Arg0, Arg1, "min.cond"); 2141 Rep = Builder.CreateSelect(Cmp, Arg0, Arg1, "min"); 2142 } else if (IsNVVM && Name == "clz.ll") { 2143 // llvm.nvvm.clz.ll returns an i32, but llvm.ctlz.i64 and returns an i64. 2144 Value *Arg = CI->getArgOperand(0); 2145 Value *Ctlz = Builder.CreateCall( 2146 Intrinsic::getDeclaration(F->getParent(), Intrinsic::ctlz, 2147 {Arg->getType()}), 2148 {Arg, Builder.getFalse()}, "ctlz"); 2149 Rep = Builder.CreateTrunc(Ctlz, Builder.getInt32Ty(), "ctlz.trunc"); 2150 } else if (IsNVVM && Name == "popc.ll") { 2151 // llvm.nvvm.popc.ll returns an i32, but llvm.ctpop.i64 and returns an 2152 // i64. 2153 Value *Arg = CI->getArgOperand(0); 2154 Value *Popc = Builder.CreateCall( 2155 Intrinsic::getDeclaration(F->getParent(), Intrinsic::ctpop, 2156 {Arg->getType()}), 2157 Arg, "ctpop"); 2158 Rep = Builder.CreateTrunc(Popc, Builder.getInt32Ty(), "ctpop.trunc"); 2159 } else if (IsNVVM && Name == "h2f") { 2160 Rep = Builder.CreateCall(Intrinsic::getDeclaration( 2161 F->getParent(), Intrinsic::convert_from_fp16, 2162 {Builder.getFloatTy()}), 2163 CI->getArgOperand(0), "h2f"); 2164 } else { 2165 llvm_unreachable("Unknown function for CallInst upgrade."); 2166 } 2167 2168 if (Rep) 2169 CI->replaceAllUsesWith(Rep); 2170 CI->eraseFromParent(); 2171 return; 2172 } 2173 2174 CallInst *NewCall = nullptr; 2175 switch (NewFn->getIntrinsicID()) { 2176 default: { 2177 // Handle generic mangling change, but nothing else 2178 assert( 2179 (CI->getCalledFunction()->getName() != NewFn->getName()) && 2180 "Unknown function for CallInst upgrade and isn't just a name change"); 2181 CI->setCalledFunction(NewFn); 2182 return; 2183 } 2184 2185 case Intrinsic::arm_neon_vld1: 2186 case Intrinsic::arm_neon_vld2: 2187 case Intrinsic::arm_neon_vld3: 2188 case Intrinsic::arm_neon_vld4: 2189 case Intrinsic::arm_neon_vld2lane: 2190 case Intrinsic::arm_neon_vld3lane: 2191 case Intrinsic::arm_neon_vld4lane: 2192 case Intrinsic::arm_neon_vst1: 2193 case Intrinsic::arm_neon_vst2: 2194 case Intrinsic::arm_neon_vst3: 2195 case Intrinsic::arm_neon_vst4: 2196 case Intrinsic::arm_neon_vst2lane: 2197 case Intrinsic::arm_neon_vst3lane: 2198 case Intrinsic::arm_neon_vst4lane: { 2199 SmallVector<Value *, 4> Args(CI->arg_operands().begin(), 2200 CI->arg_operands().end()); 2201 NewCall = Builder.CreateCall(NewFn, Args); 2202 break; 2203 } 2204 2205 case Intrinsic::bitreverse: 2206 NewCall = Builder.CreateCall(NewFn, {CI->getArgOperand(0)}); 2207 break; 2208 2209 case Intrinsic::ctlz: 2210 case Intrinsic::cttz: 2211 assert(CI->getNumArgOperands() == 1 && 2212 "Mismatch between function args and call args"); 2213 NewCall = 2214 Builder.CreateCall(NewFn, {CI->getArgOperand(0), Builder.getFalse()}); 2215 break; 2216 2217 case Intrinsic::objectsize: { 2218 Value *NullIsUnknownSize = CI->getNumArgOperands() == 2 2219 ? Builder.getFalse() 2220 : CI->getArgOperand(2); 2221 NewCall = Builder.CreateCall( 2222 NewFn, {CI->getArgOperand(0), CI->getArgOperand(1), NullIsUnknownSize}); 2223 break; 2224 } 2225 2226 case Intrinsic::ctpop: 2227 NewCall = Builder.CreateCall(NewFn, {CI->getArgOperand(0)}); 2228 break; 2229 2230 case Intrinsic::convert_from_fp16: 2231 NewCall = Builder.CreateCall(NewFn, {CI->getArgOperand(0)}); 2232 break; 2233 2234 case Intrinsic::dbg_value: 2235 // Upgrade from the old version that had an extra offset argument. 2236 assert(CI->getNumArgOperands() == 4); 2237 // Drop nonzero offsets instead of attempting to upgrade them. 2238 if (auto *Offset = dyn_cast_or_null<Constant>(CI->getArgOperand(1))) 2239 if (Offset->isZeroValue()) { 2240 NewCall = Builder.CreateCall( 2241 NewFn, 2242 {CI->getArgOperand(0), CI->getArgOperand(2), CI->getArgOperand(3)}); 2243 break; 2244 } 2245 CI->eraseFromParent(); 2246 return; 2247 2248 case Intrinsic::x86_xop_vfrcz_ss: 2249 case Intrinsic::x86_xop_vfrcz_sd: 2250 NewCall = Builder.CreateCall(NewFn, {CI->getArgOperand(1)}); 2251 break; 2252 2253 case Intrinsic::x86_xop_vpermil2pd: 2254 case Intrinsic::x86_xop_vpermil2ps: 2255 case Intrinsic::x86_xop_vpermil2pd_256: 2256 case Intrinsic::x86_xop_vpermil2ps_256: { 2257 SmallVector<Value *, 4> Args(CI->arg_operands().begin(), 2258 CI->arg_operands().end()); 2259 VectorType *FltIdxTy = cast<VectorType>(Args[2]->getType()); 2260 VectorType *IntIdxTy = VectorType::getInteger(FltIdxTy); 2261 Args[2] = Builder.CreateBitCast(Args[2], IntIdxTy); 2262 NewCall = Builder.CreateCall(NewFn, Args); 2263 break; 2264 } 2265 2266 case Intrinsic::x86_sse41_ptestc: 2267 case Intrinsic::x86_sse41_ptestz: 2268 case Intrinsic::x86_sse41_ptestnzc: { 2269 // The arguments for these intrinsics used to be v4f32, and changed 2270 // to v2i64. This is purely a nop, since those are bitwise intrinsics. 2271 // So, the only thing required is a bitcast for both arguments. 2272 // First, check the arguments have the old type. 2273 Value *Arg0 = CI->getArgOperand(0); 2274 if (Arg0->getType() != VectorType::get(Type::getFloatTy(C), 4)) 2275 return; 2276 2277 // Old intrinsic, add bitcasts 2278 Value *Arg1 = CI->getArgOperand(1); 2279 2280 Type *NewVecTy = VectorType::get(Type::getInt64Ty(C), 2); 2281 2282 Value *BC0 = Builder.CreateBitCast(Arg0, NewVecTy, "cast"); 2283 Value *BC1 = Builder.CreateBitCast(Arg1, NewVecTy, "cast"); 2284 2285 NewCall = Builder.CreateCall(NewFn, {BC0, BC1}); 2286 break; 2287 } 2288 2289 case Intrinsic::x86_sse41_insertps: 2290 case Intrinsic::x86_sse41_dppd: 2291 case Intrinsic::x86_sse41_dpps: 2292 case Intrinsic::x86_sse41_mpsadbw: 2293 case Intrinsic::x86_avx_dp_ps_256: 2294 case Intrinsic::x86_avx2_mpsadbw: { 2295 // Need to truncate the last argument from i32 to i8 -- this argument models 2296 // an inherently 8-bit immediate operand to these x86 instructions. 2297 SmallVector<Value *, 4> Args(CI->arg_operands().begin(), 2298 CI->arg_operands().end()); 2299 2300 // Replace the last argument with a trunc. 2301 Args.back() = Builder.CreateTrunc(Args.back(), Type::getInt8Ty(C), "trunc"); 2302 NewCall = Builder.CreateCall(NewFn, Args); 2303 break; 2304 } 2305 2306 case Intrinsic::thread_pointer: { 2307 NewCall = Builder.CreateCall(NewFn, {}); 2308 break; 2309 } 2310 2311 case Intrinsic::invariant_start: 2312 case Intrinsic::invariant_end: 2313 case Intrinsic::masked_load: 2314 case Intrinsic::masked_store: 2315 case Intrinsic::masked_gather: 2316 case Intrinsic::masked_scatter: { 2317 SmallVector<Value *, 4> Args(CI->arg_operands().begin(), 2318 CI->arg_operands().end()); 2319 NewCall = Builder.CreateCall(NewFn, Args); 2320 break; 2321 } 2322 } 2323 assert(NewCall && "Should have either set this variable or returned through " 2324 "the default case"); 2325 std::string Name = CI->getName(); 2326 if (!Name.empty()) { 2327 CI->setName(Name + ".old"); 2328 NewCall->setName(Name); 2329 } 2330 CI->replaceAllUsesWith(NewCall); 2331 CI->eraseFromParent(); 2332 } 2333 2334 void llvm::UpgradeCallsToIntrinsic(Function *F) { 2335 assert(F && "Illegal attempt to upgrade a non-existent intrinsic."); 2336 2337 // Check if this function should be upgraded and get the replacement function 2338 // if there is one. 2339 Function *NewFn; 2340 if (UpgradeIntrinsicFunction(F, NewFn)) { 2341 // Replace all users of the old function with the new function or new 2342 // instructions. This is not a range loop because the call is deleted. 2343 for (auto UI = F->user_begin(), UE = F->user_end(); UI != UE; ) 2344 if (CallInst *CI = dyn_cast<CallInst>(*UI++)) 2345 UpgradeIntrinsicCall(CI, NewFn); 2346 2347 // Remove old function, no longer used, from the module. 2348 F->eraseFromParent(); 2349 } 2350 } 2351 2352 MDNode *llvm::UpgradeTBAANode(MDNode &MD) { 2353 // Check if the tag uses struct-path aware TBAA format. 2354 if (isa<MDNode>(MD.getOperand(0)) && MD.getNumOperands() >= 3) 2355 return &MD; 2356 2357 auto &Context = MD.getContext(); 2358 if (MD.getNumOperands() == 3) { 2359 Metadata *Elts[] = {MD.getOperand(0), MD.getOperand(1)}; 2360 MDNode *ScalarType = MDNode::get(Context, Elts); 2361 // Create a MDNode <ScalarType, ScalarType, offset 0, const> 2362 Metadata *Elts2[] = {ScalarType, ScalarType, 2363 ConstantAsMetadata::get( 2364 Constant::getNullValue(Type::getInt64Ty(Context))), 2365 MD.getOperand(2)}; 2366 return MDNode::get(Context, Elts2); 2367 } 2368 // Create a MDNode <MD, MD, offset 0> 2369 Metadata *Elts[] = {&MD, &MD, ConstantAsMetadata::get(Constant::getNullValue( 2370 Type::getInt64Ty(Context)))}; 2371 return MDNode::get(Context, Elts); 2372 } 2373 2374 Instruction *llvm::UpgradeBitCastInst(unsigned Opc, Value *V, Type *DestTy, 2375 Instruction *&Temp) { 2376 if (Opc != Instruction::BitCast) 2377 return nullptr; 2378 2379 Temp = nullptr; 2380 Type *SrcTy = V->getType(); 2381 if (SrcTy->isPtrOrPtrVectorTy() && DestTy->isPtrOrPtrVectorTy() && 2382 SrcTy->getPointerAddressSpace() != DestTy->getPointerAddressSpace()) { 2383 LLVMContext &Context = V->getContext(); 2384 2385 // We have no information about target data layout, so we assume that 2386 // the maximum pointer size is 64bit. 2387 Type *MidTy = Type::getInt64Ty(Context); 2388 Temp = CastInst::Create(Instruction::PtrToInt, V, MidTy); 2389 2390 return CastInst::Create(Instruction::IntToPtr, Temp, DestTy); 2391 } 2392 2393 return nullptr; 2394 } 2395 2396 Value *llvm::UpgradeBitCastExpr(unsigned Opc, Constant *C, Type *DestTy) { 2397 if (Opc != Instruction::BitCast) 2398 return nullptr; 2399 2400 Type *SrcTy = C->getType(); 2401 if (SrcTy->isPtrOrPtrVectorTy() && DestTy->isPtrOrPtrVectorTy() && 2402 SrcTy->getPointerAddressSpace() != DestTy->getPointerAddressSpace()) { 2403 LLVMContext &Context = C->getContext(); 2404 2405 // We have no information about target data layout, so we assume that 2406 // the maximum pointer size is 64bit. 2407 Type *MidTy = Type::getInt64Ty(Context); 2408 2409 return ConstantExpr::getIntToPtr(ConstantExpr::getPtrToInt(C, MidTy), 2410 DestTy); 2411 } 2412 2413 return nullptr; 2414 } 2415 2416 /// Check the debug info version number, if it is out-dated, drop the debug 2417 /// info. Return true if module is modified. 2418 bool llvm::UpgradeDebugInfo(Module &M) { 2419 unsigned Version = getDebugMetadataVersionFromModule(M); 2420 if (Version == DEBUG_METADATA_VERSION) { 2421 bool BrokenDebugInfo = false; 2422 if (verifyModule(M, &llvm::errs(), &BrokenDebugInfo)) 2423 report_fatal_error("Broken module found, compilation aborted!"); 2424 if (!BrokenDebugInfo) 2425 // Everything is ok. 2426 return false; 2427 else { 2428 // Diagnose malformed debug info. 2429 DiagnosticInfoIgnoringInvalidDebugMetadata Diag(M); 2430 M.getContext().diagnose(Diag); 2431 } 2432 } 2433 bool Modified = StripDebugInfo(M); 2434 if (Modified && Version != DEBUG_METADATA_VERSION) { 2435 // Diagnose a version mismatch. 2436 DiagnosticInfoDebugMetadataVersion DiagVersion(M, Version); 2437 M.getContext().diagnose(DiagVersion); 2438 } 2439 return Modified; 2440 } 2441 2442 bool llvm::UpgradeModuleFlags(Module &M) { 2443 NamedMDNode *ModFlags = M.getModuleFlagsMetadata(); 2444 if (!ModFlags) 2445 return false; 2446 2447 bool HasObjCFlag = false, HasClassProperties = false, Changed = false; 2448 for (unsigned I = 0, E = ModFlags->getNumOperands(); I != E; ++I) { 2449 MDNode *Op = ModFlags->getOperand(I); 2450 if (Op->getNumOperands() != 3) 2451 continue; 2452 MDString *ID = dyn_cast_or_null<MDString>(Op->getOperand(1)); 2453 if (!ID) 2454 continue; 2455 if (ID->getString() == "Objective-C Image Info Version") 2456 HasObjCFlag = true; 2457 if (ID->getString() == "Objective-C Class Properties") 2458 HasClassProperties = true; 2459 // Upgrade PIC/PIE Module Flags. The module flag behavior for these two 2460 // field was Error and now they are Max. 2461 if (ID->getString() == "PIC Level" || ID->getString() == "PIE Level") { 2462 if (auto *Behavior = 2463 mdconst::dyn_extract_or_null<ConstantInt>(Op->getOperand(0))) { 2464 if (Behavior->getLimitedValue() == Module::Error) { 2465 Type *Int32Ty = Type::getInt32Ty(M.getContext()); 2466 Metadata *Ops[3] = { 2467 ConstantAsMetadata::get(ConstantInt::get(Int32Ty, Module::Max)), 2468 MDString::get(M.getContext(), ID->getString()), 2469 Op->getOperand(2)}; 2470 ModFlags->setOperand(I, MDNode::get(M.getContext(), Ops)); 2471 Changed = true; 2472 } 2473 } 2474 } 2475 // Upgrade Objective-C Image Info Section. Removed the whitespce in the 2476 // section name so that llvm-lto will not complain about mismatching 2477 // module flags that is functionally the same. 2478 if (ID->getString() == "Objective-C Image Info Section") { 2479 if (auto *Value = dyn_cast_or_null<MDString>(Op->getOperand(2))) { 2480 SmallVector<StringRef, 4> ValueComp; 2481 Value->getString().split(ValueComp, " "); 2482 if (ValueComp.size() != 1) { 2483 std::string NewValue; 2484 for (auto &S : ValueComp) 2485 NewValue += S.str(); 2486 Metadata *Ops[3] = {Op->getOperand(0), Op->getOperand(1), 2487 MDString::get(M.getContext(), NewValue)}; 2488 ModFlags->setOperand(I, MDNode::get(M.getContext(), Ops)); 2489 Changed = true; 2490 } 2491 } 2492 } 2493 } 2494 2495 // "Objective-C Class Properties" is recently added for Objective-C. We 2496 // upgrade ObjC bitcodes to contain a "Objective-C Class Properties" module 2497 // flag of value 0, so we can correclty downgrade this flag when trying to 2498 // link an ObjC bitcode without this module flag with an ObjC bitcode with 2499 // this module flag. 2500 if (HasObjCFlag && !HasClassProperties) { 2501 M.addModuleFlag(llvm::Module::Override, "Objective-C Class Properties", 2502 (uint32_t)0); 2503 Changed = true; 2504 } 2505 2506 return Changed; 2507 } 2508 2509 void llvm::UpgradeSectionAttributes(Module &M) { 2510 auto TrimSpaces = [](StringRef Section) -> std::string { 2511 SmallVector<StringRef, 5> Components; 2512 Section.split(Components, ','); 2513 2514 SmallString<32> Buffer; 2515 raw_svector_ostream OS(Buffer); 2516 2517 for (auto Component : Components) 2518 OS << ',' << Component.trim(); 2519 2520 return OS.str().substr(1); 2521 }; 2522 2523 for (auto &GV : M.globals()) { 2524 if (!GV.hasSection()) 2525 continue; 2526 2527 StringRef Section = GV.getSection(); 2528 2529 if (!Section.startswith("__DATA, __objc_catlist")) 2530 continue; 2531 2532 // __DATA, __objc_catlist, regular, no_dead_strip 2533 // __DATA,__objc_catlist,regular,no_dead_strip 2534 GV.setSection(TrimSpaces(Section)); 2535 } 2536 } 2537 2538 static bool isOldLoopArgument(Metadata *MD) { 2539 auto *T = dyn_cast_or_null<MDTuple>(MD); 2540 if (!T) 2541 return false; 2542 if (T->getNumOperands() < 1) 2543 return false; 2544 auto *S = dyn_cast_or_null<MDString>(T->getOperand(0)); 2545 if (!S) 2546 return false; 2547 return S->getString().startswith("llvm.vectorizer."); 2548 } 2549 2550 static MDString *upgradeLoopTag(LLVMContext &C, StringRef OldTag) { 2551 StringRef OldPrefix = "llvm.vectorizer."; 2552 assert(OldTag.startswith(OldPrefix) && "Expected old prefix"); 2553 2554 if (OldTag == "llvm.vectorizer.unroll") 2555 return MDString::get(C, "llvm.loop.interleave.count"); 2556 2557 return MDString::get( 2558 C, (Twine("llvm.loop.vectorize.") + OldTag.drop_front(OldPrefix.size())) 2559 .str()); 2560 } 2561 2562 static Metadata *upgradeLoopArgument(Metadata *MD) { 2563 auto *T = dyn_cast_or_null<MDTuple>(MD); 2564 if (!T) 2565 return MD; 2566 if (T->getNumOperands() < 1) 2567 return MD; 2568 auto *OldTag = dyn_cast_or_null<MDString>(T->getOperand(0)); 2569 if (!OldTag) 2570 return MD; 2571 if (!OldTag->getString().startswith("llvm.vectorizer.")) 2572 return MD; 2573 2574 // This has an old tag. Upgrade it. 2575 SmallVector<Metadata *, 8> Ops; 2576 Ops.reserve(T->getNumOperands()); 2577 Ops.push_back(upgradeLoopTag(T->getContext(), OldTag->getString())); 2578 for (unsigned I = 1, E = T->getNumOperands(); I != E; ++I) 2579 Ops.push_back(T->getOperand(I)); 2580 2581 return MDTuple::get(T->getContext(), Ops); 2582 } 2583 2584 MDNode *llvm::upgradeInstructionLoopAttachment(MDNode &N) { 2585 auto *T = dyn_cast<MDTuple>(&N); 2586 if (!T) 2587 return &N; 2588 2589 if (none_of(T->operands(), isOldLoopArgument)) 2590 return &N; 2591 2592 SmallVector<Metadata *, 8> Ops; 2593 Ops.reserve(T->getNumOperands()); 2594 for (Metadata *MD : T->operands()) 2595 Ops.push_back(upgradeLoopArgument(MD)); 2596 2597 return MDTuple::get(T->getContext(), Ops); 2598 } 2599