1 //===-- AutoUpgrade.cpp - Implement auto-upgrade helper functions ---------===// 2 // 3 // The LLVM Compiler Infrastructure 4 // 5 // This file is distributed under the University of Illinois Open Source 6 // License. See LICENSE.TXT for details. 7 // 8 //===----------------------------------------------------------------------===// 9 // 10 // This file implements the auto-upgrade helper functions. 11 // This is where deprecated IR intrinsics and other IR features are updated to 12 // current specifications. 13 // 14 //===----------------------------------------------------------------------===// 15 16 #include "llvm/IR/AutoUpgrade.h" 17 #include "llvm/ADT/StringSwitch.h" 18 #include "llvm/IR/Constants.h" 19 #include "llvm/IR/DIBuilder.h" 20 #include "llvm/IR/DebugInfo.h" 21 #include "llvm/IR/DiagnosticInfo.h" 22 #include "llvm/IR/Function.h" 23 #include "llvm/IR/IRBuilder.h" 24 #include "llvm/IR/Instruction.h" 25 #include "llvm/IR/IntrinsicInst.h" 26 #include "llvm/IR/LLVMContext.h" 27 #include "llvm/IR/Module.h" 28 #include "llvm/IR/Verifier.h" 29 #include "llvm/Support/ErrorHandling.h" 30 #include "llvm/Support/Regex.h" 31 #include <cstring> 32 using namespace llvm; 33 34 static void rename(GlobalValue *GV) { GV->setName(GV->getName() + ".old"); } 35 36 // Upgrade the declarations of the SSE4.1 ptest intrinsics whose arguments have 37 // changed their type from v4f32 to v2i64. 38 static bool UpgradePTESTIntrinsic(Function* F, Intrinsic::ID IID, 39 Function *&NewFn) { 40 // Check whether this is an old version of the function, which received 41 // v4f32 arguments. 42 Type *Arg0Type = F->getFunctionType()->getParamType(0); 43 if (Arg0Type != VectorType::get(Type::getFloatTy(F->getContext()), 4)) 44 return false; 45 46 // Yes, it's old, replace it with new version. 47 rename(F); 48 NewFn = Intrinsic::getDeclaration(F->getParent(), IID); 49 return true; 50 } 51 52 // Upgrade the declarations of intrinsic functions whose 8-bit immediate mask 53 // arguments have changed their type from i32 to i8. 54 static bool UpgradeX86IntrinsicsWith8BitMask(Function *F, Intrinsic::ID IID, 55 Function *&NewFn) { 56 // Check that the last argument is an i32. 57 Type *LastArgType = F->getFunctionType()->getParamType( 58 F->getFunctionType()->getNumParams() - 1); 59 if (!LastArgType->isIntegerTy(32)) 60 return false; 61 62 // Move this function aside and map down. 63 rename(F); 64 NewFn = Intrinsic::getDeclaration(F->getParent(), IID); 65 return true; 66 } 67 68 static bool ShouldUpgradeX86Intrinsic(Function *F, StringRef Name) { 69 // All of the intrinsics matches below should be marked with which llvm 70 // version started autoupgrading them. At some point in the future we would 71 // like to use this information to remove upgrade code for some older 72 // intrinsics. It is currently undecided how we will determine that future 73 // point. 74 if (Name == "addcarryx.u32" || // Added in 8.0 75 Name == "addcarryx.u64" || // Added in 8.0 76 Name == "addcarry.u32" || // Added in 8.0 77 Name == "addcarry.u64" || // Added in 8.0 78 Name == "subborrow.u32" || // Added in 8.0 79 Name == "subborrow.u64" || // Added in 8.0 80 Name.startswith("sse2.paddus.") || // Added in 8.0 81 Name.startswith("sse2.psubus.") || // Added in 8.0 82 Name.startswith("avx2.paddus.") || // Added in 8.0 83 Name.startswith("avx2.psubus.") || // Added in 8.0 84 Name.startswith("avx512.mask.paddus.") || // Added in 8.0 85 Name.startswith("avx512.mask.psubus.") || // Added in 8.0 86 Name=="ssse3.pabs.b.128" || // Added in 6.0 87 Name=="ssse3.pabs.w.128" || // Added in 6.0 88 Name=="ssse3.pabs.d.128" || // Added in 6.0 89 Name.startswith("fma4.vfmadd.s") || // Added in 7.0 90 Name.startswith("fma.vfmadd.") || // Added in 7.0 91 Name.startswith("fma.vfmsub.") || // Added in 7.0 92 Name.startswith("fma.vfmaddsub.") || // Added in 7.0 93 Name.startswith("fma.vfmsubadd.") || // Added in 7.0 94 Name.startswith("fma.vfnmadd.") || // Added in 7.0 95 Name.startswith("fma.vfnmsub.") || // Added in 7.0 96 Name.startswith("avx512.mask.vfmadd.") || // Added in 7.0 97 Name.startswith("avx512.mask.vfnmadd.") || // Added in 7.0 98 Name.startswith("avx512.mask.vfnmsub.") || // Added in 7.0 99 Name.startswith("avx512.mask3.vfmadd.") || // Added in 7.0 100 Name.startswith("avx512.maskz.vfmadd.") || // Added in 7.0 101 Name.startswith("avx512.mask3.vfmsub.") || // Added in 7.0 102 Name.startswith("avx512.mask3.vfnmsub.") || // Added in 7.0 103 Name.startswith("avx512.mask.vfmaddsub.") || // Added in 7.0 104 Name.startswith("avx512.maskz.vfmaddsub.") || // Added in 7.0 105 Name.startswith("avx512.mask3.vfmaddsub.") || // Added in 7.0 106 Name.startswith("avx512.mask3.vfmsubadd.") || // Added in 7.0 107 Name.startswith("avx512.mask.shuf.i") || // Added in 6.0 108 Name.startswith("avx512.mask.shuf.f") || // Added in 6.0 109 Name.startswith("avx512.kunpck") || //added in 6.0 110 Name.startswith("avx2.pabs.") || // Added in 6.0 111 Name.startswith("avx512.mask.pabs.") || // Added in 6.0 112 Name.startswith("avx512.broadcastm") || // Added in 6.0 113 Name == "sse.sqrt.ss" || // Added in 7.0 114 Name == "sse2.sqrt.sd" || // Added in 7.0 115 Name.startswith("avx512.mask.sqrt.p") || // Added in 7.0 116 Name.startswith("avx.sqrt.p") || // Added in 7.0 117 Name.startswith("sse2.sqrt.p") || // Added in 7.0 118 Name.startswith("sse.sqrt.p") || // Added in 7.0 119 Name.startswith("avx512.mask.pbroadcast") || // Added in 6.0 120 Name.startswith("sse2.pcmpeq.") || // Added in 3.1 121 Name.startswith("sse2.pcmpgt.") || // Added in 3.1 122 Name.startswith("avx2.pcmpeq.") || // Added in 3.1 123 Name.startswith("avx2.pcmpgt.") || // Added in 3.1 124 Name.startswith("avx512.mask.pcmpeq.") || // Added in 3.9 125 Name.startswith("avx512.mask.pcmpgt.") || // Added in 3.9 126 Name.startswith("avx.vperm2f128.") || // Added in 6.0 127 Name == "avx2.vperm2i128" || // Added in 6.0 128 Name == "sse.add.ss" || // Added in 4.0 129 Name == "sse2.add.sd" || // Added in 4.0 130 Name == "sse.sub.ss" || // Added in 4.0 131 Name == "sse2.sub.sd" || // Added in 4.0 132 Name == "sse.mul.ss" || // Added in 4.0 133 Name == "sse2.mul.sd" || // Added in 4.0 134 Name == "sse.div.ss" || // Added in 4.0 135 Name == "sse2.div.sd" || // Added in 4.0 136 Name == "sse41.pmaxsb" || // Added in 3.9 137 Name == "sse2.pmaxs.w" || // Added in 3.9 138 Name == "sse41.pmaxsd" || // Added in 3.9 139 Name == "sse2.pmaxu.b" || // Added in 3.9 140 Name == "sse41.pmaxuw" || // Added in 3.9 141 Name == "sse41.pmaxud" || // Added in 3.9 142 Name == "sse41.pminsb" || // Added in 3.9 143 Name == "sse2.pmins.w" || // Added in 3.9 144 Name == "sse41.pminsd" || // Added in 3.9 145 Name == "sse2.pminu.b" || // Added in 3.9 146 Name == "sse41.pminuw" || // Added in 3.9 147 Name == "sse41.pminud" || // Added in 3.9 148 Name == "avx512.kand.w" || // Added in 7.0 149 Name == "avx512.kandn.w" || // Added in 7.0 150 Name == "avx512.knot.w" || // Added in 7.0 151 Name == "avx512.kor.w" || // Added in 7.0 152 Name == "avx512.kxor.w" || // Added in 7.0 153 Name == "avx512.kxnor.w" || // Added in 7.0 154 Name == "avx512.kortestc.w" || // Added in 7.0 155 Name == "avx512.kortestz.w" || // Added in 7.0 156 Name.startswith("avx512.mask.pshuf.b.") || // Added in 4.0 157 Name.startswith("avx2.pmax") || // Added in 3.9 158 Name.startswith("avx2.pmin") || // Added in 3.9 159 Name.startswith("avx512.mask.pmax") || // Added in 4.0 160 Name.startswith("avx512.mask.pmin") || // Added in 4.0 161 Name.startswith("avx2.vbroadcast") || // Added in 3.8 162 Name.startswith("avx2.pbroadcast") || // Added in 3.8 163 Name.startswith("avx.vpermil.") || // Added in 3.1 164 Name.startswith("sse2.pshuf") || // Added in 3.9 165 Name.startswith("avx512.pbroadcast") || // Added in 3.9 166 Name.startswith("avx512.mask.broadcast.s") || // Added in 3.9 167 Name.startswith("avx512.mask.movddup") || // Added in 3.9 168 Name.startswith("avx512.mask.movshdup") || // Added in 3.9 169 Name.startswith("avx512.mask.movsldup") || // Added in 3.9 170 Name.startswith("avx512.mask.pshuf.d.") || // Added in 3.9 171 Name.startswith("avx512.mask.pshufl.w.") || // Added in 3.9 172 Name.startswith("avx512.mask.pshufh.w.") || // Added in 3.9 173 Name.startswith("avx512.mask.shuf.p") || // Added in 4.0 174 Name.startswith("avx512.mask.vpermil.p") || // Added in 3.9 175 Name.startswith("avx512.mask.perm.df.") || // Added in 3.9 176 Name.startswith("avx512.mask.perm.di.") || // Added in 3.9 177 Name.startswith("avx512.mask.punpckl") || // Added in 3.9 178 Name.startswith("avx512.mask.punpckh") || // Added in 3.9 179 Name.startswith("avx512.mask.unpckl.") || // Added in 3.9 180 Name.startswith("avx512.mask.unpckh.") || // Added in 3.9 181 Name.startswith("avx512.mask.pand.") || // Added in 3.9 182 Name.startswith("avx512.mask.pandn.") || // Added in 3.9 183 Name.startswith("avx512.mask.por.") || // Added in 3.9 184 Name.startswith("avx512.mask.pxor.") || // Added in 3.9 185 Name.startswith("avx512.mask.and.") || // Added in 3.9 186 Name.startswith("avx512.mask.andn.") || // Added in 3.9 187 Name.startswith("avx512.mask.or.") || // Added in 3.9 188 Name.startswith("avx512.mask.xor.") || // Added in 3.9 189 Name.startswith("avx512.mask.padd.") || // Added in 4.0 190 Name.startswith("avx512.mask.psub.") || // Added in 4.0 191 Name.startswith("avx512.mask.pmull.") || // Added in 4.0 192 Name.startswith("avx512.mask.cvtdq2pd.") || // Added in 4.0 193 Name.startswith("avx512.mask.cvtudq2pd.") || // Added in 4.0 194 Name == "avx512.mask.cvtudq2ps.128" || // Added in 7.0 195 Name == "avx512.mask.cvtudq2ps.256" || // Added in 7.0 196 Name == "avx512.mask.cvtqq2pd.128" || // Added in 7.0 197 Name == "avx512.mask.cvtqq2pd.256" || // Added in 7.0 198 Name == "avx512.mask.cvtuqq2pd.128" || // Added in 7.0 199 Name == "avx512.mask.cvtuqq2pd.256" || // Added in 7.0 200 Name == "avx512.mask.cvtdq2ps.128" || // Added in 7.0 201 Name == "avx512.mask.cvtdq2ps.256" || // Added in 7.0 202 Name == "avx512.mask.cvtpd2dq.256" || // Added in 7.0 203 Name == "avx512.mask.cvtpd2ps.256" || // Added in 7.0 204 Name == "avx512.mask.cvttpd2dq.256" || // Added in 7.0 205 Name == "avx512.mask.cvttps2dq.128" || // Added in 7.0 206 Name == "avx512.mask.cvttps2dq.256" || // Added in 7.0 207 Name == "avx512.mask.cvtps2pd.128" || // Added in 7.0 208 Name == "avx512.mask.cvtps2pd.256" || // Added in 7.0 209 Name == "avx512.cvtusi2sd" || // Added in 7.0 210 Name.startswith("avx512.mask.permvar.") || // Added in 7.0 211 Name.startswith("avx512.mask.permvar.") || // Added in 7.0 212 Name == "sse2.pmulu.dq" || // Added in 7.0 213 Name == "sse41.pmuldq" || // Added in 7.0 214 Name == "avx2.pmulu.dq" || // Added in 7.0 215 Name == "avx2.pmul.dq" || // Added in 7.0 216 Name == "avx512.pmulu.dq.512" || // Added in 7.0 217 Name == "avx512.pmul.dq.512" || // Added in 7.0 218 Name.startswith("avx512.mask.pmul.dq.") || // Added in 4.0 219 Name.startswith("avx512.mask.pmulu.dq.") || // Added in 4.0 220 Name.startswith("avx512.mask.pmul.hr.sw.") || // Added in 7.0 221 Name.startswith("avx512.mask.pmulh.w.") || // Added in 7.0 222 Name.startswith("avx512.mask.pmulhu.w.") || // Added in 7.0 223 Name.startswith("avx512.mask.pmaddw.d.") || // Added in 7.0 224 Name.startswith("avx512.mask.pmaddubs.w.") || // Added in 7.0 225 Name.startswith("avx512.mask.packsswb.") || // Added in 5.0 226 Name.startswith("avx512.mask.packssdw.") || // Added in 5.0 227 Name.startswith("avx512.mask.packuswb.") || // Added in 5.0 228 Name.startswith("avx512.mask.packusdw.") || // Added in 5.0 229 Name.startswith("avx512.mask.cmp.b") || // Added in 5.0 230 Name.startswith("avx512.mask.cmp.d") || // Added in 5.0 231 Name.startswith("avx512.mask.cmp.q") || // Added in 5.0 232 Name.startswith("avx512.mask.cmp.w") || // Added in 5.0 233 Name.startswith("avx512.mask.cmp.p") || // Added in 7.0 234 Name.startswith("avx512.mask.ucmp.") || // Added in 5.0 235 Name.startswith("avx512.cvtb2mask.") || // Added in 7.0 236 Name.startswith("avx512.cvtw2mask.") || // Added in 7.0 237 Name.startswith("avx512.cvtd2mask.") || // Added in 7.0 238 Name.startswith("avx512.cvtq2mask.") || // Added in 7.0 239 Name.startswith("avx512.mask.vpermilvar.") || // Added in 4.0 240 Name.startswith("avx512.mask.psll.d") || // Added in 4.0 241 Name.startswith("avx512.mask.psll.q") || // Added in 4.0 242 Name.startswith("avx512.mask.psll.w") || // Added in 4.0 243 Name.startswith("avx512.mask.psra.d") || // Added in 4.0 244 Name.startswith("avx512.mask.psra.q") || // Added in 4.0 245 Name.startswith("avx512.mask.psra.w") || // Added in 4.0 246 Name.startswith("avx512.mask.psrl.d") || // Added in 4.0 247 Name.startswith("avx512.mask.psrl.q") || // Added in 4.0 248 Name.startswith("avx512.mask.psrl.w") || // Added in 4.0 249 Name.startswith("avx512.mask.pslli") || // Added in 4.0 250 Name.startswith("avx512.mask.psrai") || // Added in 4.0 251 Name.startswith("avx512.mask.psrli") || // Added in 4.0 252 Name.startswith("avx512.mask.psllv") || // Added in 4.0 253 Name.startswith("avx512.mask.psrav") || // Added in 4.0 254 Name.startswith("avx512.mask.psrlv") || // Added in 4.0 255 Name.startswith("sse41.pmovsx") || // Added in 3.8 256 Name.startswith("sse41.pmovzx") || // Added in 3.9 257 Name.startswith("avx2.pmovsx") || // Added in 3.9 258 Name.startswith("avx2.pmovzx") || // Added in 3.9 259 Name.startswith("avx512.mask.pmovsx") || // Added in 4.0 260 Name.startswith("avx512.mask.pmovzx") || // Added in 4.0 261 Name.startswith("avx512.mask.lzcnt.") || // Added in 5.0 262 Name.startswith("avx512.mask.pternlog.") || // Added in 7.0 263 Name.startswith("avx512.maskz.pternlog.") || // Added in 7.0 264 Name.startswith("avx512.mask.vpmadd52") || // Added in 7.0 265 Name.startswith("avx512.maskz.vpmadd52") || // Added in 7.0 266 Name.startswith("avx512.mask.vpermi2var.") || // Added in 7.0 267 Name.startswith("avx512.mask.vpermt2var.") || // Added in 7.0 268 Name.startswith("avx512.maskz.vpermt2var.") || // Added in 7.0 269 Name.startswith("avx512.mask.vpdpbusd.") || // Added in 7.0 270 Name.startswith("avx512.maskz.vpdpbusd.") || // Added in 7.0 271 Name.startswith("avx512.mask.vpdpbusds.") || // Added in 7.0 272 Name.startswith("avx512.maskz.vpdpbusds.") || // Added in 7.0 273 Name.startswith("avx512.mask.vpdpwssd.") || // Added in 7.0 274 Name.startswith("avx512.maskz.vpdpwssd.") || // Added in 7.0 275 Name.startswith("avx512.mask.vpdpwssds.") || // Added in 7.0 276 Name.startswith("avx512.maskz.vpdpwssds.") || // Added in 7.0 277 Name.startswith("avx512.mask.dbpsadbw.") || // Added in 7.0 278 Name.startswith("avx512.mask.vpshld.") || // Added in 7.0 279 Name.startswith("avx512.mask.vpshrd.") || // Added in 7.0 280 Name.startswith("avx512.mask.add.p") || // Added in 7.0. 128/256 in 4.0 281 Name.startswith("avx512.mask.sub.p") || // Added in 7.0. 128/256 in 4.0 282 Name.startswith("avx512.mask.mul.p") || // Added in 7.0. 128/256 in 4.0 283 Name.startswith("avx512.mask.div.p") || // Added in 7.0. 128/256 in 4.0 284 Name.startswith("avx512.mask.max.p") || // Added in 7.0. 128/256 in 5.0 285 Name.startswith("avx512.mask.min.p") || // Added in 7.0. 128/256 in 5.0 286 Name.startswith("avx512.mask.fpclass.p") || // Added in 7.0 287 Name.startswith("avx512.mask.padds.") || // Added in 8.0 288 Name.startswith("avx512.mask.psubs.") || // Added in 8.0 289 Name == "sse.cvtsi2ss" || // Added in 7.0 290 Name == "sse.cvtsi642ss" || // Added in 7.0 291 Name == "sse2.cvtsi2sd" || // Added in 7.0 292 Name == "sse2.cvtsi642sd" || // Added in 7.0 293 Name == "sse2.cvtss2sd" || // Added in 7.0 294 Name == "sse2.cvtdq2pd" || // Added in 3.9 295 Name == "sse2.cvtdq2ps" || // Added in 7.0 296 Name == "sse2.cvtps2pd" || // Added in 3.9 297 Name == "avx.cvtdq2.pd.256" || // Added in 3.9 298 Name == "avx.cvtdq2.ps.256" || // Added in 7.0 299 Name == "avx.cvt.ps2.pd.256" || // Added in 3.9 300 Name.startswith("avx.vinsertf128.") || // Added in 3.7 301 Name == "avx2.vinserti128" || // Added in 3.7 302 Name.startswith("avx512.mask.insert") || // Added in 4.0 303 Name.startswith("avx.vextractf128.") || // Added in 3.7 304 Name == "avx2.vextracti128" || // Added in 3.7 305 Name.startswith("avx512.mask.vextract") || // Added in 4.0 306 Name.startswith("sse4a.movnt.") || // Added in 3.9 307 Name.startswith("avx.movnt.") || // Added in 3.2 308 Name.startswith("avx512.storent.") || // Added in 3.9 309 Name == "sse41.movntdqa" || // Added in 5.0 310 Name == "avx2.movntdqa" || // Added in 5.0 311 Name == "avx512.movntdqa" || // Added in 5.0 312 Name == "sse2.storel.dq" || // Added in 3.9 313 Name.startswith("sse.storeu.") || // Added in 3.9 314 Name.startswith("sse2.storeu.") || // Added in 3.9 315 Name.startswith("avx.storeu.") || // Added in 3.9 316 Name.startswith("avx512.mask.storeu.") || // Added in 3.9 317 Name.startswith("avx512.mask.store.p") || // Added in 3.9 318 Name.startswith("avx512.mask.store.b.") || // Added in 3.9 319 Name.startswith("avx512.mask.store.w.") || // Added in 3.9 320 Name.startswith("avx512.mask.store.d.") || // Added in 3.9 321 Name.startswith("avx512.mask.store.q.") || // Added in 3.9 322 Name == "avx512.mask.store.ss" || // Added in 7.0 323 Name.startswith("avx512.mask.loadu.") || // Added in 3.9 324 Name.startswith("avx512.mask.load.") || // Added in 3.9 325 Name.startswith("avx512.mask.expand.load.") || // Added in 7.0 326 Name.startswith("avx512.mask.compress.store.") || // Added in 7.0 327 Name == "sse42.crc32.64.8" || // Added in 3.4 328 Name.startswith("avx.vbroadcast.s") || // Added in 3.5 329 Name.startswith("avx512.vbroadcast.s") || // Added in 7.0 330 Name.startswith("avx512.mask.palignr.") || // Added in 3.9 331 Name.startswith("avx512.mask.valign.") || // Added in 4.0 332 Name.startswith("sse2.psll.dq") || // Added in 3.7 333 Name.startswith("sse2.psrl.dq") || // Added in 3.7 334 Name.startswith("avx2.psll.dq") || // Added in 3.7 335 Name.startswith("avx2.psrl.dq") || // Added in 3.7 336 Name.startswith("avx512.psll.dq") || // Added in 3.9 337 Name.startswith("avx512.psrl.dq") || // Added in 3.9 338 Name == "sse41.pblendw" || // Added in 3.7 339 Name.startswith("sse41.blendp") || // Added in 3.7 340 Name.startswith("avx.blend.p") || // Added in 3.7 341 Name == "avx2.pblendw" || // Added in 3.7 342 Name.startswith("avx2.pblendd.") || // Added in 3.7 343 Name.startswith("avx.vbroadcastf128") || // Added in 4.0 344 Name == "avx2.vbroadcasti128" || // Added in 3.7 345 Name.startswith("avx512.mask.broadcastf") || // Added in 6.0 346 Name.startswith("avx512.mask.broadcasti") || // Added in 6.0 347 Name == "xop.vpcmov" || // Added in 3.8 348 Name == "xop.vpcmov.256" || // Added in 5.0 349 Name.startswith("avx512.mask.move.s") || // Added in 4.0 350 Name.startswith("avx512.cvtmask2") || // Added in 5.0 351 (Name.startswith("xop.vpcom") && // Added in 3.2 352 F->arg_size() == 2) || 353 Name.startswith("xop.vprot") || // Added in 8.0 354 Name.startswith("avx512.prol") || // Added in 8.0 355 Name.startswith("avx512.pror") || // Added in 8.0 356 Name.startswith("avx512.mask.prorv.") || // Added in 8.0 357 Name.startswith("avx512.mask.pror.") || // Added in 8.0 358 Name.startswith("avx512.mask.prolv.") || // Added in 8.0 359 Name.startswith("avx512.mask.prol.") || // Added in 8.0 360 Name.startswith("avx512.ptestm") || //Added in 6.0 361 Name.startswith("avx512.ptestnm") || //Added in 6.0 362 Name.startswith("sse2.pavg") || // Added in 6.0 363 Name.startswith("avx2.pavg") || // Added in 6.0 364 Name.startswith("avx512.mask.pavg")) // Added in 6.0 365 return true; 366 367 return false; 368 } 369 370 static bool UpgradeX86IntrinsicFunction(Function *F, StringRef Name, 371 Function *&NewFn) { 372 // Only handle intrinsics that start with "x86.". 373 if (!Name.startswith("x86.")) 374 return false; 375 // Remove "x86." prefix. 376 Name = Name.substr(4); 377 378 if (ShouldUpgradeX86Intrinsic(F, Name)) { 379 NewFn = nullptr; 380 return true; 381 } 382 383 if (Name == "rdtscp") { // Added in 8.0 384 // If this intrinsic has 0 operands, it's the new version. 385 if (F->getFunctionType()->getNumParams() == 0) 386 return false; 387 388 rename(F); 389 NewFn = Intrinsic::getDeclaration(F->getParent(), 390 Intrinsic::x86_rdtscp); 391 return true; 392 } 393 394 // SSE4.1 ptest functions may have an old signature. 395 if (Name.startswith("sse41.ptest")) { // Added in 3.2 396 if (Name.substr(11) == "c") 397 return UpgradePTESTIntrinsic(F, Intrinsic::x86_sse41_ptestc, NewFn); 398 if (Name.substr(11) == "z") 399 return UpgradePTESTIntrinsic(F, Intrinsic::x86_sse41_ptestz, NewFn); 400 if (Name.substr(11) == "nzc") 401 return UpgradePTESTIntrinsic(F, Intrinsic::x86_sse41_ptestnzc, NewFn); 402 } 403 // Several blend and other instructions with masks used the wrong number of 404 // bits. 405 if (Name == "sse41.insertps") // Added in 3.6 406 return UpgradeX86IntrinsicsWith8BitMask(F, Intrinsic::x86_sse41_insertps, 407 NewFn); 408 if (Name == "sse41.dppd") // Added in 3.6 409 return UpgradeX86IntrinsicsWith8BitMask(F, Intrinsic::x86_sse41_dppd, 410 NewFn); 411 if (Name == "sse41.dpps") // Added in 3.6 412 return UpgradeX86IntrinsicsWith8BitMask(F, Intrinsic::x86_sse41_dpps, 413 NewFn); 414 if (Name == "sse41.mpsadbw") // Added in 3.6 415 return UpgradeX86IntrinsicsWith8BitMask(F, Intrinsic::x86_sse41_mpsadbw, 416 NewFn); 417 if (Name == "avx.dp.ps.256") // Added in 3.6 418 return UpgradeX86IntrinsicsWith8BitMask(F, Intrinsic::x86_avx_dp_ps_256, 419 NewFn); 420 if (Name == "avx2.mpsadbw") // Added in 3.6 421 return UpgradeX86IntrinsicsWith8BitMask(F, Intrinsic::x86_avx2_mpsadbw, 422 NewFn); 423 424 // frcz.ss/sd may need to have an argument dropped. Added in 3.2 425 if (Name.startswith("xop.vfrcz.ss") && F->arg_size() == 2) { 426 rename(F); 427 NewFn = Intrinsic::getDeclaration(F->getParent(), 428 Intrinsic::x86_xop_vfrcz_ss); 429 return true; 430 } 431 if (Name.startswith("xop.vfrcz.sd") && F->arg_size() == 2) { 432 rename(F); 433 NewFn = Intrinsic::getDeclaration(F->getParent(), 434 Intrinsic::x86_xop_vfrcz_sd); 435 return true; 436 } 437 // Upgrade any XOP PERMIL2 index operand still using a float/double vector. 438 if (Name.startswith("xop.vpermil2")) { // Added in 3.9 439 auto Idx = F->getFunctionType()->getParamType(2); 440 if (Idx->isFPOrFPVectorTy()) { 441 rename(F); 442 unsigned IdxSize = Idx->getPrimitiveSizeInBits(); 443 unsigned EltSize = Idx->getScalarSizeInBits(); 444 Intrinsic::ID Permil2ID; 445 if (EltSize == 64 && IdxSize == 128) 446 Permil2ID = Intrinsic::x86_xop_vpermil2pd; 447 else if (EltSize == 32 && IdxSize == 128) 448 Permil2ID = Intrinsic::x86_xop_vpermil2ps; 449 else if (EltSize == 64 && IdxSize == 256) 450 Permil2ID = Intrinsic::x86_xop_vpermil2pd_256; 451 else 452 Permil2ID = Intrinsic::x86_xop_vpermil2ps_256; 453 NewFn = Intrinsic::getDeclaration(F->getParent(), Permil2ID); 454 return true; 455 } 456 } 457 458 return false; 459 } 460 461 static bool UpgradeIntrinsicFunction1(Function *F, Function *&NewFn) { 462 assert(F && "Illegal to upgrade a non-existent Function."); 463 464 // Quickly eliminate it, if it's not a candidate. 465 StringRef Name = F->getName(); 466 if (Name.size() <= 8 || !Name.startswith("llvm.")) 467 return false; 468 Name = Name.substr(5); // Strip off "llvm." 469 470 switch (Name[0]) { 471 default: break; 472 case 'a': { 473 if (Name.startswith("arm.rbit") || Name.startswith("aarch64.rbit")) { 474 NewFn = Intrinsic::getDeclaration(F->getParent(), Intrinsic::bitreverse, 475 F->arg_begin()->getType()); 476 return true; 477 } 478 if (Name.startswith("arm.neon.vclz")) { 479 Type* args[2] = { 480 F->arg_begin()->getType(), 481 Type::getInt1Ty(F->getContext()) 482 }; 483 // Can't use Intrinsic::getDeclaration here as it adds a ".i1" to 484 // the end of the name. Change name from llvm.arm.neon.vclz.* to 485 // llvm.ctlz.* 486 FunctionType* fType = FunctionType::get(F->getReturnType(), args, false); 487 NewFn = Function::Create(fType, F->getLinkage(), F->getAddressSpace(), 488 "llvm.ctlz." + Name.substr(14), F->getParent()); 489 return true; 490 } 491 if (Name.startswith("arm.neon.vcnt")) { 492 NewFn = Intrinsic::getDeclaration(F->getParent(), Intrinsic::ctpop, 493 F->arg_begin()->getType()); 494 return true; 495 } 496 Regex vldRegex("^arm\\.neon\\.vld([1234]|[234]lane)\\.v[a-z0-9]*$"); 497 if (vldRegex.match(Name)) { 498 auto fArgs = F->getFunctionType()->params(); 499 SmallVector<Type *, 4> Tys(fArgs.begin(), fArgs.end()); 500 // Can't use Intrinsic::getDeclaration here as the return types might 501 // then only be structurally equal. 502 FunctionType* fType = FunctionType::get(F->getReturnType(), Tys, false); 503 NewFn = Function::Create(fType, F->getLinkage(), F->getAddressSpace(), 504 "llvm." + Name + ".p0i8", F->getParent()); 505 return true; 506 } 507 Regex vstRegex("^arm\\.neon\\.vst([1234]|[234]lane)\\.v[a-z0-9]*$"); 508 if (vstRegex.match(Name)) { 509 static const Intrinsic::ID StoreInts[] = {Intrinsic::arm_neon_vst1, 510 Intrinsic::arm_neon_vst2, 511 Intrinsic::arm_neon_vst3, 512 Intrinsic::arm_neon_vst4}; 513 514 static const Intrinsic::ID StoreLaneInts[] = { 515 Intrinsic::arm_neon_vst2lane, Intrinsic::arm_neon_vst3lane, 516 Intrinsic::arm_neon_vst4lane 517 }; 518 519 auto fArgs = F->getFunctionType()->params(); 520 Type *Tys[] = {fArgs[0], fArgs[1]}; 521 if (Name.find("lane") == StringRef::npos) 522 NewFn = Intrinsic::getDeclaration(F->getParent(), 523 StoreInts[fArgs.size() - 3], Tys); 524 else 525 NewFn = Intrinsic::getDeclaration(F->getParent(), 526 StoreLaneInts[fArgs.size() - 5], Tys); 527 return true; 528 } 529 if (Name == "aarch64.thread.pointer" || Name == "arm.thread.pointer") { 530 NewFn = Intrinsic::getDeclaration(F->getParent(), Intrinsic::thread_pointer); 531 return true; 532 } 533 break; 534 } 535 536 case 'c': { 537 if (Name.startswith("ctlz.") && F->arg_size() == 1) { 538 rename(F); 539 NewFn = Intrinsic::getDeclaration(F->getParent(), Intrinsic::ctlz, 540 F->arg_begin()->getType()); 541 return true; 542 } 543 if (Name.startswith("cttz.") && F->arg_size() == 1) { 544 rename(F); 545 NewFn = Intrinsic::getDeclaration(F->getParent(), Intrinsic::cttz, 546 F->arg_begin()->getType()); 547 return true; 548 } 549 break; 550 } 551 case 'd': { 552 if (Name == "dbg.value" && F->arg_size() == 4) { 553 rename(F); 554 NewFn = Intrinsic::getDeclaration(F->getParent(), Intrinsic::dbg_value); 555 return true; 556 } 557 break; 558 } 559 case 'i': 560 case 'l': { 561 bool IsLifetimeStart = Name.startswith("lifetime.start"); 562 if (IsLifetimeStart || Name.startswith("invariant.start")) { 563 Intrinsic::ID ID = IsLifetimeStart ? 564 Intrinsic::lifetime_start : Intrinsic::invariant_start; 565 auto Args = F->getFunctionType()->params(); 566 Type* ObjectPtr[1] = {Args[1]}; 567 if (F->getName() != Intrinsic::getName(ID, ObjectPtr)) { 568 rename(F); 569 NewFn = Intrinsic::getDeclaration(F->getParent(), ID, ObjectPtr); 570 return true; 571 } 572 } 573 574 bool IsLifetimeEnd = Name.startswith("lifetime.end"); 575 if (IsLifetimeEnd || Name.startswith("invariant.end")) { 576 Intrinsic::ID ID = IsLifetimeEnd ? 577 Intrinsic::lifetime_end : Intrinsic::invariant_end; 578 579 auto Args = F->getFunctionType()->params(); 580 Type* ObjectPtr[1] = {Args[IsLifetimeEnd ? 1 : 2]}; 581 if (F->getName() != Intrinsic::getName(ID, ObjectPtr)) { 582 rename(F); 583 NewFn = Intrinsic::getDeclaration(F->getParent(), ID, ObjectPtr); 584 return true; 585 } 586 } 587 if (Name.startswith("invariant.group.barrier")) { 588 // Rename invariant.group.barrier to launder.invariant.group 589 auto Args = F->getFunctionType()->params(); 590 Type* ObjectPtr[1] = {Args[0]}; 591 rename(F); 592 NewFn = Intrinsic::getDeclaration(F->getParent(), 593 Intrinsic::launder_invariant_group, ObjectPtr); 594 return true; 595 596 } 597 598 break; 599 } 600 case 'm': { 601 if (Name.startswith("masked.load.")) { 602 Type *Tys[] = { F->getReturnType(), F->arg_begin()->getType() }; 603 if (F->getName() != Intrinsic::getName(Intrinsic::masked_load, Tys)) { 604 rename(F); 605 NewFn = Intrinsic::getDeclaration(F->getParent(), 606 Intrinsic::masked_load, 607 Tys); 608 return true; 609 } 610 } 611 if (Name.startswith("masked.store.")) { 612 auto Args = F->getFunctionType()->params(); 613 Type *Tys[] = { Args[0], Args[1] }; 614 if (F->getName() != Intrinsic::getName(Intrinsic::masked_store, Tys)) { 615 rename(F); 616 NewFn = Intrinsic::getDeclaration(F->getParent(), 617 Intrinsic::masked_store, 618 Tys); 619 return true; 620 } 621 } 622 // Renaming gather/scatter intrinsics with no address space overloading 623 // to the new overload which includes an address space 624 if (Name.startswith("masked.gather.")) { 625 Type *Tys[] = {F->getReturnType(), F->arg_begin()->getType()}; 626 if (F->getName() != Intrinsic::getName(Intrinsic::masked_gather, Tys)) { 627 rename(F); 628 NewFn = Intrinsic::getDeclaration(F->getParent(), 629 Intrinsic::masked_gather, Tys); 630 return true; 631 } 632 } 633 if (Name.startswith("masked.scatter.")) { 634 auto Args = F->getFunctionType()->params(); 635 Type *Tys[] = {Args[0], Args[1]}; 636 if (F->getName() != Intrinsic::getName(Intrinsic::masked_scatter, Tys)) { 637 rename(F); 638 NewFn = Intrinsic::getDeclaration(F->getParent(), 639 Intrinsic::masked_scatter, Tys); 640 return true; 641 } 642 } 643 // Updating the memory intrinsics (memcpy/memmove/memset) that have an 644 // alignment parameter to embedding the alignment as an attribute of 645 // the pointer args. 646 if (Name.startswith("memcpy.") && F->arg_size() == 5) { 647 rename(F); 648 // Get the types of dest, src, and len 649 ArrayRef<Type *> ParamTypes = F->getFunctionType()->params().slice(0, 3); 650 NewFn = Intrinsic::getDeclaration(F->getParent(), Intrinsic::memcpy, 651 ParamTypes); 652 return true; 653 } 654 if (Name.startswith("memmove.") && F->arg_size() == 5) { 655 rename(F); 656 // Get the types of dest, src, and len 657 ArrayRef<Type *> ParamTypes = F->getFunctionType()->params().slice(0, 3); 658 NewFn = Intrinsic::getDeclaration(F->getParent(), Intrinsic::memmove, 659 ParamTypes); 660 return true; 661 } 662 if (Name.startswith("memset.") && F->arg_size() == 5) { 663 rename(F); 664 // Get the types of dest, and len 665 const auto *FT = F->getFunctionType(); 666 Type *ParamTypes[2] = { 667 FT->getParamType(0), // Dest 668 FT->getParamType(2) // len 669 }; 670 NewFn = Intrinsic::getDeclaration(F->getParent(), Intrinsic::memset, 671 ParamTypes); 672 return true; 673 } 674 break; 675 } 676 case 'n': { 677 if (Name.startswith("nvvm.")) { 678 Name = Name.substr(5); 679 680 // The following nvvm intrinsics correspond exactly to an LLVM intrinsic. 681 Intrinsic::ID IID = StringSwitch<Intrinsic::ID>(Name) 682 .Cases("brev32", "brev64", Intrinsic::bitreverse) 683 .Case("clz.i", Intrinsic::ctlz) 684 .Case("popc.i", Intrinsic::ctpop) 685 .Default(Intrinsic::not_intrinsic); 686 if (IID != Intrinsic::not_intrinsic && F->arg_size() == 1) { 687 NewFn = Intrinsic::getDeclaration(F->getParent(), IID, 688 {F->getReturnType()}); 689 return true; 690 } 691 692 // The following nvvm intrinsics correspond exactly to an LLVM idiom, but 693 // not to an intrinsic alone. We expand them in UpgradeIntrinsicCall. 694 // 695 // TODO: We could add lohi.i2d. 696 bool Expand = StringSwitch<bool>(Name) 697 .Cases("abs.i", "abs.ll", true) 698 .Cases("clz.ll", "popc.ll", "h2f", true) 699 .Cases("max.i", "max.ll", "max.ui", "max.ull", true) 700 .Cases("min.i", "min.ll", "min.ui", "min.ull", true) 701 .Default(false); 702 if (Expand) { 703 NewFn = nullptr; 704 return true; 705 } 706 } 707 break; 708 } 709 case 'o': 710 // We only need to change the name to match the mangling including the 711 // address space. 712 if (Name.startswith("objectsize.")) { 713 Type *Tys[2] = { F->getReturnType(), F->arg_begin()->getType() }; 714 if (F->arg_size() == 2 || 715 F->getName() != Intrinsic::getName(Intrinsic::objectsize, Tys)) { 716 rename(F); 717 NewFn = Intrinsic::getDeclaration(F->getParent(), Intrinsic::objectsize, 718 Tys); 719 return true; 720 } 721 } 722 break; 723 724 case 's': 725 if (Name == "stackprotectorcheck") { 726 NewFn = nullptr; 727 return true; 728 } 729 break; 730 731 case 'x': 732 if (UpgradeX86IntrinsicFunction(F, Name, NewFn)) 733 return true; 734 } 735 // Remangle our intrinsic since we upgrade the mangling 736 auto Result = llvm::Intrinsic::remangleIntrinsicFunction(F); 737 if (Result != None) { 738 NewFn = Result.getValue(); 739 return true; 740 } 741 742 // This may not belong here. This function is effectively being overloaded 743 // to both detect an intrinsic which needs upgrading, and to provide the 744 // upgraded form of the intrinsic. We should perhaps have two separate 745 // functions for this. 746 return false; 747 } 748 749 bool llvm::UpgradeIntrinsicFunction(Function *F, Function *&NewFn) { 750 NewFn = nullptr; 751 bool Upgraded = UpgradeIntrinsicFunction1(F, NewFn); 752 assert(F != NewFn && "Intrinsic function upgraded to the same function"); 753 754 // Upgrade intrinsic attributes. This does not change the function. 755 if (NewFn) 756 F = NewFn; 757 if (Intrinsic::ID id = F->getIntrinsicID()) 758 F->setAttributes(Intrinsic::getAttributes(F->getContext(), id)); 759 return Upgraded; 760 } 761 762 bool llvm::UpgradeGlobalVariable(GlobalVariable *GV) { 763 // Nothing to do yet. 764 return false; 765 } 766 767 // Handles upgrading SSE2/AVX2/AVX512BW PSLLDQ intrinsics by converting them 768 // to byte shuffles. 769 static Value *UpgradeX86PSLLDQIntrinsics(IRBuilder<> &Builder, 770 Value *Op, unsigned Shift) { 771 Type *ResultTy = Op->getType(); 772 unsigned NumElts = ResultTy->getVectorNumElements() * 8; 773 774 // Bitcast from a 64-bit element type to a byte element type. 775 Type *VecTy = VectorType::get(Builder.getInt8Ty(), NumElts); 776 Op = Builder.CreateBitCast(Op, VecTy, "cast"); 777 778 // We'll be shuffling in zeroes. 779 Value *Res = Constant::getNullValue(VecTy); 780 781 // If shift is less than 16, emit a shuffle to move the bytes. Otherwise, 782 // we'll just return the zero vector. 783 if (Shift < 16) { 784 uint32_t Idxs[64]; 785 // 256/512-bit version is split into 2/4 16-byte lanes. 786 for (unsigned l = 0; l != NumElts; l += 16) 787 for (unsigned i = 0; i != 16; ++i) { 788 unsigned Idx = NumElts + i - Shift; 789 if (Idx < NumElts) 790 Idx -= NumElts - 16; // end of lane, switch operand. 791 Idxs[l + i] = Idx + l; 792 } 793 794 Res = Builder.CreateShuffleVector(Res, Op, makeArrayRef(Idxs, NumElts)); 795 } 796 797 // Bitcast back to a 64-bit element type. 798 return Builder.CreateBitCast(Res, ResultTy, "cast"); 799 } 800 801 // Handles upgrading SSE2/AVX2/AVX512BW PSRLDQ intrinsics by converting them 802 // to byte shuffles. 803 static Value *UpgradeX86PSRLDQIntrinsics(IRBuilder<> &Builder, Value *Op, 804 unsigned Shift) { 805 Type *ResultTy = Op->getType(); 806 unsigned NumElts = ResultTy->getVectorNumElements() * 8; 807 808 // Bitcast from a 64-bit element type to a byte element type. 809 Type *VecTy = VectorType::get(Builder.getInt8Ty(), NumElts); 810 Op = Builder.CreateBitCast(Op, VecTy, "cast"); 811 812 // We'll be shuffling in zeroes. 813 Value *Res = Constant::getNullValue(VecTy); 814 815 // If shift is less than 16, emit a shuffle to move the bytes. Otherwise, 816 // we'll just return the zero vector. 817 if (Shift < 16) { 818 uint32_t Idxs[64]; 819 // 256/512-bit version is split into 2/4 16-byte lanes. 820 for (unsigned l = 0; l != NumElts; l += 16) 821 for (unsigned i = 0; i != 16; ++i) { 822 unsigned Idx = i + Shift; 823 if (Idx >= 16) 824 Idx += NumElts - 16; // end of lane, switch operand. 825 Idxs[l + i] = Idx + l; 826 } 827 828 Res = Builder.CreateShuffleVector(Op, Res, makeArrayRef(Idxs, NumElts)); 829 } 830 831 // Bitcast back to a 64-bit element type. 832 return Builder.CreateBitCast(Res, ResultTy, "cast"); 833 } 834 835 static Value *getX86MaskVec(IRBuilder<> &Builder, Value *Mask, 836 unsigned NumElts) { 837 llvm::VectorType *MaskTy = llvm::VectorType::get(Builder.getInt1Ty(), 838 cast<IntegerType>(Mask->getType())->getBitWidth()); 839 Mask = Builder.CreateBitCast(Mask, MaskTy); 840 841 // If we have less than 8 elements, then the starting mask was an i8 and 842 // we need to extract down to the right number of elements. 843 if (NumElts < 8) { 844 uint32_t Indices[4]; 845 for (unsigned i = 0; i != NumElts; ++i) 846 Indices[i] = i; 847 Mask = Builder.CreateShuffleVector(Mask, Mask, 848 makeArrayRef(Indices, NumElts), 849 "extract"); 850 } 851 852 return Mask; 853 } 854 855 static Value *EmitX86Select(IRBuilder<> &Builder, Value *Mask, 856 Value *Op0, Value *Op1) { 857 // If the mask is all ones just emit the first operation. 858 if (const auto *C = dyn_cast<Constant>(Mask)) 859 if (C->isAllOnesValue()) 860 return Op0; 861 862 Mask = getX86MaskVec(Builder, Mask, Op0->getType()->getVectorNumElements()); 863 return Builder.CreateSelect(Mask, Op0, Op1); 864 } 865 866 static Value *EmitX86ScalarSelect(IRBuilder<> &Builder, Value *Mask, 867 Value *Op0, Value *Op1) { 868 // If the mask is all ones just emit the first operation. 869 if (const auto *C = dyn_cast<Constant>(Mask)) 870 if (C->isAllOnesValue()) 871 return Op0; 872 873 llvm::VectorType *MaskTy = 874 llvm::VectorType::get(Builder.getInt1Ty(), 875 Mask->getType()->getIntegerBitWidth()); 876 Mask = Builder.CreateBitCast(Mask, MaskTy); 877 Mask = Builder.CreateExtractElement(Mask, (uint64_t)0); 878 return Builder.CreateSelect(Mask, Op0, Op1); 879 } 880 881 // Handle autoupgrade for masked PALIGNR and VALIGND/Q intrinsics. 882 // PALIGNR handles large immediates by shifting while VALIGN masks the immediate 883 // so we need to handle both cases. VALIGN also doesn't have 128-bit lanes. 884 static Value *UpgradeX86ALIGNIntrinsics(IRBuilder<> &Builder, Value *Op0, 885 Value *Op1, Value *Shift, 886 Value *Passthru, Value *Mask, 887 bool IsVALIGN) { 888 unsigned ShiftVal = cast<llvm::ConstantInt>(Shift)->getZExtValue(); 889 890 unsigned NumElts = Op0->getType()->getVectorNumElements(); 891 assert((IsVALIGN || NumElts % 16 == 0) && "Illegal NumElts for PALIGNR!"); 892 assert((!IsVALIGN || NumElts <= 16) && "NumElts too large for VALIGN!"); 893 assert(isPowerOf2_32(NumElts) && "NumElts not a power of 2!"); 894 895 // Mask the immediate for VALIGN. 896 if (IsVALIGN) 897 ShiftVal &= (NumElts - 1); 898 899 // If palignr is shifting the pair of vectors more than the size of two 900 // lanes, emit zero. 901 if (ShiftVal >= 32) 902 return llvm::Constant::getNullValue(Op0->getType()); 903 904 // If palignr is shifting the pair of input vectors more than one lane, 905 // but less than two lanes, convert to shifting in zeroes. 906 if (ShiftVal > 16) { 907 ShiftVal -= 16; 908 Op1 = Op0; 909 Op0 = llvm::Constant::getNullValue(Op0->getType()); 910 } 911 912 uint32_t Indices[64]; 913 // 256-bit palignr operates on 128-bit lanes so we need to handle that 914 for (unsigned l = 0; l < NumElts; l += 16) { 915 for (unsigned i = 0; i != 16; ++i) { 916 unsigned Idx = ShiftVal + i; 917 if (!IsVALIGN && Idx >= 16) // Disable wrap for VALIGN. 918 Idx += NumElts - 16; // End of lane, switch operand. 919 Indices[l + i] = Idx + l; 920 } 921 } 922 923 Value *Align = Builder.CreateShuffleVector(Op1, Op0, 924 makeArrayRef(Indices, NumElts), 925 "palignr"); 926 927 return EmitX86Select(Builder, Mask, Align, Passthru); 928 } 929 930 static Value *UpgradeX86AddSubSatIntrinsics(IRBuilder<> &Builder, CallInst &CI, 931 bool IsAddition) { 932 Type *Ty = CI.getType(); 933 Value *Op0 = CI.getOperand(0); 934 Value *Op1 = CI.getOperand(1); 935 936 Intrinsic::ID IID = IsAddition ? Intrinsic::uadd_sat : Intrinsic::usub_sat; 937 Function *Intrin = Intrinsic::getDeclaration(CI.getModule(), IID, Ty); 938 Value *Res = Builder.CreateCall(Intrin, {Op0, Op1}); 939 940 if (CI.getNumArgOperands() == 4) { // For masked intrinsics. 941 Value *VecSrc = CI.getOperand(2); 942 Value *Mask = CI.getOperand(3); 943 Res = EmitX86Select(Builder, Mask, Res, VecSrc); 944 } 945 return Res; 946 } 947 948 static Value *upgradeX86Rotate(IRBuilder<> &Builder, CallInst &CI, 949 bool IsRotateRight) { 950 Type *Ty = CI.getType(); 951 Value *Src = CI.getArgOperand(0); 952 Value *Amt = CI.getArgOperand(1); 953 954 // Amount may be scalar immediate, in which case create a splat vector. 955 // Funnel shifts amounts are treated as modulo and types are all power-of-2 so 956 // we only care about the lowest log2 bits anyway. 957 if (Amt->getType() != Ty) { 958 unsigned NumElts = Ty->getVectorNumElements(); 959 Amt = Builder.CreateIntCast(Amt, Ty->getScalarType(), false); 960 Amt = Builder.CreateVectorSplat(NumElts, Amt); 961 } 962 963 Intrinsic::ID IID = IsRotateRight ? Intrinsic::fshr : Intrinsic::fshl; 964 Function *Intrin = Intrinsic::getDeclaration(CI.getModule(), IID, Ty); 965 Value *Res = Builder.CreateCall(Intrin, {Src, Src, Amt}); 966 967 if (CI.getNumArgOperands() == 4) { // For masked intrinsics. 968 Value *VecSrc = CI.getOperand(2); 969 Value *Mask = CI.getOperand(3); 970 Res = EmitX86Select(Builder, Mask, Res, VecSrc); 971 } 972 return Res; 973 } 974 975 static Value *UpgradeMaskedStore(IRBuilder<> &Builder, 976 Value *Ptr, Value *Data, Value *Mask, 977 bool Aligned) { 978 // Cast the pointer to the right type. 979 Ptr = Builder.CreateBitCast(Ptr, 980 llvm::PointerType::getUnqual(Data->getType())); 981 unsigned Align = 982 Aligned ? cast<VectorType>(Data->getType())->getBitWidth() / 8 : 1; 983 984 // If the mask is all ones just emit a regular store. 985 if (const auto *C = dyn_cast<Constant>(Mask)) 986 if (C->isAllOnesValue()) 987 return Builder.CreateAlignedStore(Data, Ptr, Align); 988 989 // Convert the mask from an integer type to a vector of i1. 990 unsigned NumElts = Data->getType()->getVectorNumElements(); 991 Mask = getX86MaskVec(Builder, Mask, NumElts); 992 return Builder.CreateMaskedStore(Data, Ptr, Align, Mask); 993 } 994 995 static Value *UpgradeMaskedLoad(IRBuilder<> &Builder, 996 Value *Ptr, Value *Passthru, Value *Mask, 997 bool Aligned) { 998 // Cast the pointer to the right type. 999 Ptr = Builder.CreateBitCast(Ptr, 1000 llvm::PointerType::getUnqual(Passthru->getType())); 1001 unsigned Align = 1002 Aligned ? cast<VectorType>(Passthru->getType())->getBitWidth() / 8 : 1; 1003 1004 // If the mask is all ones just emit a regular store. 1005 if (const auto *C = dyn_cast<Constant>(Mask)) 1006 if (C->isAllOnesValue()) 1007 return Builder.CreateAlignedLoad(Ptr, Align); 1008 1009 // Convert the mask from an integer type to a vector of i1. 1010 unsigned NumElts = Passthru->getType()->getVectorNumElements(); 1011 Mask = getX86MaskVec(Builder, Mask, NumElts); 1012 return Builder.CreateMaskedLoad(Ptr, Align, Mask, Passthru); 1013 } 1014 1015 static Value *upgradeAbs(IRBuilder<> &Builder, CallInst &CI) { 1016 Value *Op0 = CI.getArgOperand(0); 1017 llvm::Type *Ty = Op0->getType(); 1018 Value *Zero = llvm::Constant::getNullValue(Ty); 1019 Value *Cmp = Builder.CreateICmp(ICmpInst::ICMP_SGT, Op0, Zero); 1020 Value *Neg = Builder.CreateNeg(Op0); 1021 Value *Res = Builder.CreateSelect(Cmp, Op0, Neg); 1022 1023 if (CI.getNumArgOperands() == 3) 1024 Res = EmitX86Select(Builder,CI.getArgOperand(2), Res, CI.getArgOperand(1)); 1025 1026 return Res; 1027 } 1028 1029 static Value *upgradeIntMinMax(IRBuilder<> &Builder, CallInst &CI, 1030 ICmpInst::Predicate Pred) { 1031 Value *Op0 = CI.getArgOperand(0); 1032 Value *Op1 = CI.getArgOperand(1); 1033 Value *Cmp = Builder.CreateICmp(Pred, Op0, Op1); 1034 Value *Res = Builder.CreateSelect(Cmp, Op0, Op1); 1035 1036 if (CI.getNumArgOperands() == 4) 1037 Res = EmitX86Select(Builder, CI.getArgOperand(3), Res, CI.getArgOperand(2)); 1038 1039 return Res; 1040 } 1041 1042 static Value *upgradePMULDQ(IRBuilder<> &Builder, CallInst &CI, bool IsSigned) { 1043 Type *Ty = CI.getType(); 1044 1045 // Arguments have a vXi32 type so cast to vXi64. 1046 Value *LHS = Builder.CreateBitCast(CI.getArgOperand(0), Ty); 1047 Value *RHS = Builder.CreateBitCast(CI.getArgOperand(1), Ty); 1048 1049 if (IsSigned) { 1050 // Shift left then arithmetic shift right. 1051 Constant *ShiftAmt = ConstantInt::get(Ty, 32); 1052 LHS = Builder.CreateShl(LHS, ShiftAmt); 1053 LHS = Builder.CreateAShr(LHS, ShiftAmt); 1054 RHS = Builder.CreateShl(RHS, ShiftAmt); 1055 RHS = Builder.CreateAShr(RHS, ShiftAmt); 1056 } else { 1057 // Clear the upper bits. 1058 Constant *Mask = ConstantInt::get(Ty, 0xffffffff); 1059 LHS = Builder.CreateAnd(LHS, Mask); 1060 RHS = Builder.CreateAnd(RHS, Mask); 1061 } 1062 1063 Value *Res = Builder.CreateMul(LHS, RHS); 1064 1065 if (CI.getNumArgOperands() == 4) 1066 Res = EmitX86Select(Builder, CI.getArgOperand(3), Res, CI.getArgOperand(2)); 1067 1068 return Res; 1069 } 1070 1071 // Applying mask on vector of i1's and make sure result is at least 8 bits wide. 1072 static Value *ApplyX86MaskOn1BitsVec(IRBuilder<> &Builder, Value *Vec, 1073 Value *Mask) { 1074 unsigned NumElts = Vec->getType()->getVectorNumElements(); 1075 if (Mask) { 1076 const auto *C = dyn_cast<Constant>(Mask); 1077 if (!C || !C->isAllOnesValue()) 1078 Vec = Builder.CreateAnd(Vec, getX86MaskVec(Builder, Mask, NumElts)); 1079 } 1080 1081 if (NumElts < 8) { 1082 uint32_t Indices[8]; 1083 for (unsigned i = 0; i != NumElts; ++i) 1084 Indices[i] = i; 1085 for (unsigned i = NumElts; i != 8; ++i) 1086 Indices[i] = NumElts + i % NumElts; 1087 Vec = Builder.CreateShuffleVector(Vec, 1088 Constant::getNullValue(Vec->getType()), 1089 Indices); 1090 } 1091 return Builder.CreateBitCast(Vec, Builder.getIntNTy(std::max(NumElts, 8U))); 1092 } 1093 1094 static Value *upgradeMaskedCompare(IRBuilder<> &Builder, CallInst &CI, 1095 unsigned CC, bool Signed) { 1096 Value *Op0 = CI.getArgOperand(0); 1097 unsigned NumElts = Op0->getType()->getVectorNumElements(); 1098 1099 Value *Cmp; 1100 if (CC == 3) { 1101 Cmp = Constant::getNullValue(llvm::VectorType::get(Builder.getInt1Ty(), NumElts)); 1102 } else if (CC == 7) { 1103 Cmp = Constant::getAllOnesValue(llvm::VectorType::get(Builder.getInt1Ty(), NumElts)); 1104 } else { 1105 ICmpInst::Predicate Pred; 1106 switch (CC) { 1107 default: llvm_unreachable("Unknown condition code"); 1108 case 0: Pred = ICmpInst::ICMP_EQ; break; 1109 case 1: Pred = Signed ? ICmpInst::ICMP_SLT : ICmpInst::ICMP_ULT; break; 1110 case 2: Pred = Signed ? ICmpInst::ICMP_SLE : ICmpInst::ICMP_ULE; break; 1111 case 4: Pred = ICmpInst::ICMP_NE; break; 1112 case 5: Pred = Signed ? ICmpInst::ICMP_SGE : ICmpInst::ICMP_UGE; break; 1113 case 6: Pred = Signed ? ICmpInst::ICMP_SGT : ICmpInst::ICMP_UGT; break; 1114 } 1115 Cmp = Builder.CreateICmp(Pred, Op0, CI.getArgOperand(1)); 1116 } 1117 1118 Value *Mask = CI.getArgOperand(CI.getNumArgOperands() - 1); 1119 1120 return ApplyX86MaskOn1BitsVec(Builder, Cmp, Mask); 1121 } 1122 1123 // Replace a masked intrinsic with an older unmasked intrinsic. 1124 static Value *UpgradeX86MaskedShift(IRBuilder<> &Builder, CallInst &CI, 1125 Intrinsic::ID IID) { 1126 Function *Intrin = Intrinsic::getDeclaration(CI.getModule(), IID); 1127 Value *Rep = Builder.CreateCall(Intrin, 1128 { CI.getArgOperand(0), CI.getArgOperand(1) }); 1129 return EmitX86Select(Builder, CI.getArgOperand(3), Rep, CI.getArgOperand(2)); 1130 } 1131 1132 static Value* upgradeMaskedMove(IRBuilder<> &Builder, CallInst &CI) { 1133 Value* A = CI.getArgOperand(0); 1134 Value* B = CI.getArgOperand(1); 1135 Value* Src = CI.getArgOperand(2); 1136 Value* Mask = CI.getArgOperand(3); 1137 1138 Value* AndNode = Builder.CreateAnd(Mask, APInt(8, 1)); 1139 Value* Cmp = Builder.CreateIsNotNull(AndNode); 1140 Value* Extract1 = Builder.CreateExtractElement(B, (uint64_t)0); 1141 Value* Extract2 = Builder.CreateExtractElement(Src, (uint64_t)0); 1142 Value* Select = Builder.CreateSelect(Cmp, Extract1, Extract2); 1143 return Builder.CreateInsertElement(A, Select, (uint64_t)0); 1144 } 1145 1146 1147 static Value* UpgradeMaskToInt(IRBuilder<> &Builder, CallInst &CI) { 1148 Value* Op = CI.getArgOperand(0); 1149 Type* ReturnOp = CI.getType(); 1150 unsigned NumElts = CI.getType()->getVectorNumElements(); 1151 Value *Mask = getX86MaskVec(Builder, Op, NumElts); 1152 return Builder.CreateSExt(Mask, ReturnOp, "vpmovm2"); 1153 } 1154 1155 // Replace intrinsic with unmasked version and a select. 1156 static bool upgradeAVX512MaskToSelect(StringRef Name, IRBuilder<> &Builder, 1157 CallInst &CI, Value *&Rep) { 1158 Name = Name.substr(12); // Remove avx512.mask. 1159 1160 unsigned VecWidth = CI.getType()->getPrimitiveSizeInBits(); 1161 unsigned EltWidth = CI.getType()->getScalarSizeInBits(); 1162 Intrinsic::ID IID; 1163 if (Name.startswith("max.p")) { 1164 if (VecWidth == 128 && EltWidth == 32) 1165 IID = Intrinsic::x86_sse_max_ps; 1166 else if (VecWidth == 128 && EltWidth == 64) 1167 IID = Intrinsic::x86_sse2_max_pd; 1168 else if (VecWidth == 256 && EltWidth == 32) 1169 IID = Intrinsic::x86_avx_max_ps_256; 1170 else if (VecWidth == 256 && EltWidth == 64) 1171 IID = Intrinsic::x86_avx_max_pd_256; 1172 else 1173 llvm_unreachable("Unexpected intrinsic"); 1174 } else if (Name.startswith("min.p")) { 1175 if (VecWidth == 128 && EltWidth == 32) 1176 IID = Intrinsic::x86_sse_min_ps; 1177 else if (VecWidth == 128 && EltWidth == 64) 1178 IID = Intrinsic::x86_sse2_min_pd; 1179 else if (VecWidth == 256 && EltWidth == 32) 1180 IID = Intrinsic::x86_avx_min_ps_256; 1181 else if (VecWidth == 256 && EltWidth == 64) 1182 IID = Intrinsic::x86_avx_min_pd_256; 1183 else 1184 llvm_unreachable("Unexpected intrinsic"); 1185 } else if (Name.startswith("pshuf.b.")) { 1186 if (VecWidth == 128) 1187 IID = Intrinsic::x86_ssse3_pshuf_b_128; 1188 else if (VecWidth == 256) 1189 IID = Intrinsic::x86_avx2_pshuf_b; 1190 else if (VecWidth == 512) 1191 IID = Intrinsic::x86_avx512_pshuf_b_512; 1192 else 1193 llvm_unreachable("Unexpected intrinsic"); 1194 } else if (Name.startswith("pmul.hr.sw.")) { 1195 if (VecWidth == 128) 1196 IID = Intrinsic::x86_ssse3_pmul_hr_sw_128; 1197 else if (VecWidth == 256) 1198 IID = Intrinsic::x86_avx2_pmul_hr_sw; 1199 else if (VecWidth == 512) 1200 IID = Intrinsic::x86_avx512_pmul_hr_sw_512; 1201 else 1202 llvm_unreachable("Unexpected intrinsic"); 1203 } else if (Name.startswith("pmulh.w.")) { 1204 if (VecWidth == 128) 1205 IID = Intrinsic::x86_sse2_pmulh_w; 1206 else if (VecWidth == 256) 1207 IID = Intrinsic::x86_avx2_pmulh_w; 1208 else if (VecWidth == 512) 1209 IID = Intrinsic::x86_avx512_pmulh_w_512; 1210 else 1211 llvm_unreachable("Unexpected intrinsic"); 1212 } else if (Name.startswith("pmulhu.w.")) { 1213 if (VecWidth == 128) 1214 IID = Intrinsic::x86_sse2_pmulhu_w; 1215 else if (VecWidth == 256) 1216 IID = Intrinsic::x86_avx2_pmulhu_w; 1217 else if (VecWidth == 512) 1218 IID = Intrinsic::x86_avx512_pmulhu_w_512; 1219 else 1220 llvm_unreachable("Unexpected intrinsic"); 1221 } else if (Name.startswith("pmaddw.d.")) { 1222 if (VecWidth == 128) 1223 IID = Intrinsic::x86_sse2_pmadd_wd; 1224 else if (VecWidth == 256) 1225 IID = Intrinsic::x86_avx2_pmadd_wd; 1226 else if (VecWidth == 512) 1227 IID = Intrinsic::x86_avx512_pmaddw_d_512; 1228 else 1229 llvm_unreachable("Unexpected intrinsic"); 1230 } else if (Name.startswith("pmaddubs.w.")) { 1231 if (VecWidth == 128) 1232 IID = Intrinsic::x86_ssse3_pmadd_ub_sw_128; 1233 else if (VecWidth == 256) 1234 IID = Intrinsic::x86_avx2_pmadd_ub_sw; 1235 else if (VecWidth == 512) 1236 IID = Intrinsic::x86_avx512_pmaddubs_w_512; 1237 else 1238 llvm_unreachable("Unexpected intrinsic"); 1239 } else if (Name.startswith("packsswb.")) { 1240 if (VecWidth == 128) 1241 IID = Intrinsic::x86_sse2_packsswb_128; 1242 else if (VecWidth == 256) 1243 IID = Intrinsic::x86_avx2_packsswb; 1244 else if (VecWidth == 512) 1245 IID = Intrinsic::x86_avx512_packsswb_512; 1246 else 1247 llvm_unreachable("Unexpected intrinsic"); 1248 } else if (Name.startswith("packssdw.")) { 1249 if (VecWidth == 128) 1250 IID = Intrinsic::x86_sse2_packssdw_128; 1251 else if (VecWidth == 256) 1252 IID = Intrinsic::x86_avx2_packssdw; 1253 else if (VecWidth == 512) 1254 IID = Intrinsic::x86_avx512_packssdw_512; 1255 else 1256 llvm_unreachable("Unexpected intrinsic"); 1257 } else if (Name.startswith("packuswb.")) { 1258 if (VecWidth == 128) 1259 IID = Intrinsic::x86_sse2_packuswb_128; 1260 else if (VecWidth == 256) 1261 IID = Intrinsic::x86_avx2_packuswb; 1262 else if (VecWidth == 512) 1263 IID = Intrinsic::x86_avx512_packuswb_512; 1264 else 1265 llvm_unreachable("Unexpected intrinsic"); 1266 } else if (Name.startswith("packusdw.")) { 1267 if (VecWidth == 128) 1268 IID = Intrinsic::x86_sse41_packusdw; 1269 else if (VecWidth == 256) 1270 IID = Intrinsic::x86_avx2_packusdw; 1271 else if (VecWidth == 512) 1272 IID = Intrinsic::x86_avx512_packusdw_512; 1273 else 1274 llvm_unreachable("Unexpected intrinsic"); 1275 } else if (Name.startswith("vpermilvar.")) { 1276 if (VecWidth == 128 && EltWidth == 32) 1277 IID = Intrinsic::x86_avx_vpermilvar_ps; 1278 else if (VecWidth == 128 && EltWidth == 64) 1279 IID = Intrinsic::x86_avx_vpermilvar_pd; 1280 else if (VecWidth == 256 && EltWidth == 32) 1281 IID = Intrinsic::x86_avx_vpermilvar_ps_256; 1282 else if (VecWidth == 256 && EltWidth == 64) 1283 IID = Intrinsic::x86_avx_vpermilvar_pd_256; 1284 else if (VecWidth == 512 && EltWidth == 32) 1285 IID = Intrinsic::x86_avx512_vpermilvar_ps_512; 1286 else if (VecWidth == 512 && EltWidth == 64) 1287 IID = Intrinsic::x86_avx512_vpermilvar_pd_512; 1288 else 1289 llvm_unreachable("Unexpected intrinsic"); 1290 } else if (Name == "cvtpd2dq.256") { 1291 IID = Intrinsic::x86_avx_cvt_pd2dq_256; 1292 } else if (Name == "cvtpd2ps.256") { 1293 IID = Intrinsic::x86_avx_cvt_pd2_ps_256; 1294 } else if (Name == "cvttpd2dq.256") { 1295 IID = Intrinsic::x86_avx_cvtt_pd2dq_256; 1296 } else if (Name == "cvttps2dq.128") { 1297 IID = Intrinsic::x86_sse2_cvttps2dq; 1298 } else if (Name == "cvttps2dq.256") { 1299 IID = Intrinsic::x86_avx_cvtt_ps2dq_256; 1300 } else if (Name.startswith("permvar.")) { 1301 bool IsFloat = CI.getType()->isFPOrFPVectorTy(); 1302 if (VecWidth == 256 && EltWidth == 32 && IsFloat) 1303 IID = Intrinsic::x86_avx2_permps; 1304 else if (VecWidth == 256 && EltWidth == 32 && !IsFloat) 1305 IID = Intrinsic::x86_avx2_permd; 1306 else if (VecWidth == 256 && EltWidth == 64 && IsFloat) 1307 IID = Intrinsic::x86_avx512_permvar_df_256; 1308 else if (VecWidth == 256 && EltWidth == 64 && !IsFloat) 1309 IID = Intrinsic::x86_avx512_permvar_di_256; 1310 else if (VecWidth == 512 && EltWidth == 32 && IsFloat) 1311 IID = Intrinsic::x86_avx512_permvar_sf_512; 1312 else if (VecWidth == 512 && EltWidth == 32 && !IsFloat) 1313 IID = Intrinsic::x86_avx512_permvar_si_512; 1314 else if (VecWidth == 512 && EltWidth == 64 && IsFloat) 1315 IID = Intrinsic::x86_avx512_permvar_df_512; 1316 else if (VecWidth == 512 && EltWidth == 64 && !IsFloat) 1317 IID = Intrinsic::x86_avx512_permvar_di_512; 1318 else if (VecWidth == 128 && EltWidth == 16) 1319 IID = Intrinsic::x86_avx512_permvar_hi_128; 1320 else if (VecWidth == 256 && EltWidth == 16) 1321 IID = Intrinsic::x86_avx512_permvar_hi_256; 1322 else if (VecWidth == 512 && EltWidth == 16) 1323 IID = Intrinsic::x86_avx512_permvar_hi_512; 1324 else if (VecWidth == 128 && EltWidth == 8) 1325 IID = Intrinsic::x86_avx512_permvar_qi_128; 1326 else if (VecWidth == 256 && EltWidth == 8) 1327 IID = Intrinsic::x86_avx512_permvar_qi_256; 1328 else if (VecWidth == 512 && EltWidth == 8) 1329 IID = Intrinsic::x86_avx512_permvar_qi_512; 1330 else 1331 llvm_unreachable("Unexpected intrinsic"); 1332 } else if (Name.startswith("dbpsadbw.")) { 1333 if (VecWidth == 128) 1334 IID = Intrinsic::x86_avx512_dbpsadbw_128; 1335 else if (VecWidth == 256) 1336 IID = Intrinsic::x86_avx512_dbpsadbw_256; 1337 else if (VecWidth == 512) 1338 IID = Intrinsic::x86_avx512_dbpsadbw_512; 1339 else 1340 llvm_unreachable("Unexpected intrinsic"); 1341 } else if (Name.startswith("vpshld.")) { 1342 if (VecWidth == 128 && Name[7] == 'q') 1343 IID = Intrinsic::x86_avx512_vpshld_q_128; 1344 else if (VecWidth == 128 && Name[7] == 'd') 1345 IID = Intrinsic::x86_avx512_vpshld_d_128; 1346 else if (VecWidth == 128 && Name[7] == 'w') 1347 IID = Intrinsic::x86_avx512_vpshld_w_128; 1348 else if (VecWidth == 256 && Name[7] == 'q') 1349 IID = Intrinsic::x86_avx512_vpshld_q_256; 1350 else if (VecWidth == 256 && Name[7] == 'd') 1351 IID = Intrinsic::x86_avx512_vpshld_d_256; 1352 else if (VecWidth == 256 && Name[7] == 'w') 1353 IID = Intrinsic::x86_avx512_vpshld_w_256; 1354 else if (VecWidth == 512 && Name[7] == 'q') 1355 IID = Intrinsic::x86_avx512_vpshld_q_512; 1356 else if (VecWidth == 512 && Name[7] == 'd') 1357 IID = Intrinsic::x86_avx512_vpshld_d_512; 1358 else if (VecWidth == 512 && Name[7] == 'w') 1359 IID = Intrinsic::x86_avx512_vpshld_w_512; 1360 else 1361 llvm_unreachable("Unexpected intrinsic"); 1362 } else if (Name.startswith("vpshrd.")) { 1363 if (VecWidth == 128 && Name[7] == 'q') 1364 IID = Intrinsic::x86_avx512_vpshrd_q_128; 1365 else if (VecWidth == 128 && Name[7] == 'd') 1366 IID = Intrinsic::x86_avx512_vpshrd_d_128; 1367 else if (VecWidth == 128 && Name[7] == 'w') 1368 IID = Intrinsic::x86_avx512_vpshrd_w_128; 1369 else if (VecWidth == 256 && Name[7] == 'q') 1370 IID = Intrinsic::x86_avx512_vpshrd_q_256; 1371 else if (VecWidth == 256 && Name[7] == 'd') 1372 IID = Intrinsic::x86_avx512_vpshrd_d_256; 1373 else if (VecWidth == 256 && Name[7] == 'w') 1374 IID = Intrinsic::x86_avx512_vpshrd_w_256; 1375 else if (VecWidth == 512 && Name[7] == 'q') 1376 IID = Intrinsic::x86_avx512_vpshrd_q_512; 1377 else if (VecWidth == 512 && Name[7] == 'd') 1378 IID = Intrinsic::x86_avx512_vpshrd_d_512; 1379 else if (VecWidth == 512 && Name[7] == 'w') 1380 IID = Intrinsic::x86_avx512_vpshrd_w_512; 1381 else 1382 llvm_unreachable("Unexpected intrinsic"); 1383 } else if (Name.startswith("padds.")) { 1384 if (VecWidth == 128 && EltWidth == 8) 1385 IID = Intrinsic::x86_sse2_padds_b; 1386 else if (VecWidth == 256 && EltWidth == 8) 1387 IID = Intrinsic::x86_avx2_padds_b; 1388 else if (VecWidth == 512 && EltWidth == 8) 1389 IID = Intrinsic::x86_avx512_padds_b_512; 1390 else if (VecWidth == 128 && EltWidth == 16) 1391 IID = Intrinsic::x86_sse2_padds_w; 1392 else if (VecWidth == 256 && EltWidth == 16) 1393 IID = Intrinsic::x86_avx2_padds_w; 1394 else if (VecWidth == 512 && EltWidth == 16) 1395 IID = Intrinsic::x86_avx512_padds_w_512; 1396 else 1397 llvm_unreachable("Unexpected intrinsic"); 1398 } else if (Name.startswith("psubs.")) { 1399 if (VecWidth == 128 && EltWidth == 8) 1400 IID = Intrinsic::x86_sse2_psubs_b; 1401 else if (VecWidth == 256 && EltWidth == 8) 1402 IID = Intrinsic::x86_avx2_psubs_b; 1403 else if (VecWidth == 512 && EltWidth == 8) 1404 IID = Intrinsic::x86_avx512_psubs_b_512; 1405 else if (VecWidth == 128 && EltWidth == 16) 1406 IID = Intrinsic::x86_sse2_psubs_w; 1407 else if (VecWidth == 256 && EltWidth == 16) 1408 IID = Intrinsic::x86_avx2_psubs_w; 1409 else if (VecWidth == 512 && EltWidth == 16) 1410 IID = Intrinsic::x86_avx512_psubs_w_512; 1411 else 1412 llvm_unreachable("Unexpected intrinsic"); 1413 } else 1414 return false; 1415 1416 SmallVector<Value *, 4> Args(CI.arg_operands().begin(), 1417 CI.arg_operands().end()); 1418 Args.pop_back(); 1419 Args.pop_back(); 1420 Rep = Builder.CreateCall(Intrinsic::getDeclaration(CI.getModule(), IID), 1421 Args); 1422 unsigned NumArgs = CI.getNumArgOperands(); 1423 Rep = EmitX86Select(Builder, CI.getArgOperand(NumArgs - 1), Rep, 1424 CI.getArgOperand(NumArgs - 2)); 1425 return true; 1426 } 1427 1428 /// Upgrade comment in call to inline asm that represents an objc retain release 1429 /// marker. 1430 void llvm::UpgradeInlineAsmString(std::string *AsmStr) { 1431 size_t Pos; 1432 if (AsmStr->find("mov\tfp") == 0 && 1433 AsmStr->find("objc_retainAutoreleaseReturnValue") != std::string::npos && 1434 (Pos = AsmStr->find("# marker")) != std::string::npos) { 1435 AsmStr->replace(Pos, 1, ";"); 1436 } 1437 return; 1438 } 1439 1440 /// Upgrade a call to an old intrinsic. All argument and return casting must be 1441 /// provided to seamlessly integrate with existing context. 1442 void llvm::UpgradeIntrinsicCall(CallInst *CI, Function *NewFn) { 1443 Function *F = CI->getCalledFunction(); 1444 LLVMContext &C = CI->getContext(); 1445 IRBuilder<> Builder(C); 1446 Builder.SetInsertPoint(CI->getParent(), CI->getIterator()); 1447 1448 assert(F && "Intrinsic call is not direct?"); 1449 1450 if (!NewFn) { 1451 // Get the Function's name. 1452 StringRef Name = F->getName(); 1453 1454 assert(Name.startswith("llvm.") && "Intrinsic doesn't start with 'llvm.'"); 1455 Name = Name.substr(5); 1456 1457 bool IsX86 = Name.startswith("x86."); 1458 if (IsX86) 1459 Name = Name.substr(4); 1460 bool IsNVVM = Name.startswith("nvvm."); 1461 if (IsNVVM) 1462 Name = Name.substr(5); 1463 1464 if (IsX86 && Name.startswith("sse4a.movnt.")) { 1465 Module *M = F->getParent(); 1466 SmallVector<Metadata *, 1> Elts; 1467 Elts.push_back( 1468 ConstantAsMetadata::get(ConstantInt::get(Type::getInt32Ty(C), 1))); 1469 MDNode *Node = MDNode::get(C, Elts); 1470 1471 Value *Arg0 = CI->getArgOperand(0); 1472 Value *Arg1 = CI->getArgOperand(1); 1473 1474 // Nontemporal (unaligned) store of the 0'th element of the float/double 1475 // vector. 1476 Type *SrcEltTy = cast<VectorType>(Arg1->getType())->getElementType(); 1477 PointerType *EltPtrTy = PointerType::getUnqual(SrcEltTy); 1478 Value *Addr = Builder.CreateBitCast(Arg0, EltPtrTy, "cast"); 1479 Value *Extract = 1480 Builder.CreateExtractElement(Arg1, (uint64_t)0, "extractelement"); 1481 1482 StoreInst *SI = Builder.CreateAlignedStore(Extract, Addr, 1); 1483 SI->setMetadata(M->getMDKindID("nontemporal"), Node); 1484 1485 // Remove intrinsic. 1486 CI->eraseFromParent(); 1487 return; 1488 } 1489 1490 if (IsX86 && (Name.startswith("avx.movnt.") || 1491 Name.startswith("avx512.storent."))) { 1492 Module *M = F->getParent(); 1493 SmallVector<Metadata *, 1> Elts; 1494 Elts.push_back( 1495 ConstantAsMetadata::get(ConstantInt::get(Type::getInt32Ty(C), 1))); 1496 MDNode *Node = MDNode::get(C, Elts); 1497 1498 Value *Arg0 = CI->getArgOperand(0); 1499 Value *Arg1 = CI->getArgOperand(1); 1500 1501 // Convert the type of the pointer to a pointer to the stored type. 1502 Value *BC = Builder.CreateBitCast(Arg0, 1503 PointerType::getUnqual(Arg1->getType()), 1504 "cast"); 1505 VectorType *VTy = cast<VectorType>(Arg1->getType()); 1506 StoreInst *SI = Builder.CreateAlignedStore(Arg1, BC, 1507 VTy->getBitWidth() / 8); 1508 SI->setMetadata(M->getMDKindID("nontemporal"), Node); 1509 1510 // Remove intrinsic. 1511 CI->eraseFromParent(); 1512 return; 1513 } 1514 1515 if (IsX86 && Name == "sse2.storel.dq") { 1516 Value *Arg0 = CI->getArgOperand(0); 1517 Value *Arg1 = CI->getArgOperand(1); 1518 1519 Type *NewVecTy = VectorType::get(Type::getInt64Ty(C), 2); 1520 Value *BC0 = Builder.CreateBitCast(Arg1, NewVecTy, "cast"); 1521 Value *Elt = Builder.CreateExtractElement(BC0, (uint64_t)0); 1522 Value *BC = Builder.CreateBitCast(Arg0, 1523 PointerType::getUnqual(Elt->getType()), 1524 "cast"); 1525 Builder.CreateAlignedStore(Elt, BC, 1); 1526 1527 // Remove intrinsic. 1528 CI->eraseFromParent(); 1529 return; 1530 } 1531 1532 if (IsX86 && (Name.startswith("sse.storeu.") || 1533 Name.startswith("sse2.storeu.") || 1534 Name.startswith("avx.storeu."))) { 1535 Value *Arg0 = CI->getArgOperand(0); 1536 Value *Arg1 = CI->getArgOperand(1); 1537 1538 Arg0 = Builder.CreateBitCast(Arg0, 1539 PointerType::getUnqual(Arg1->getType()), 1540 "cast"); 1541 Builder.CreateAlignedStore(Arg1, Arg0, 1); 1542 1543 // Remove intrinsic. 1544 CI->eraseFromParent(); 1545 return; 1546 } 1547 1548 if (IsX86 && Name == "avx512.mask.store.ss") { 1549 Value *Mask = Builder.CreateAnd(CI->getArgOperand(2), Builder.getInt8(1)); 1550 UpgradeMaskedStore(Builder, CI->getArgOperand(0), CI->getArgOperand(1), 1551 Mask, false); 1552 1553 // Remove intrinsic. 1554 CI->eraseFromParent(); 1555 return; 1556 } 1557 1558 if (IsX86 && (Name.startswith("avx512.mask.store"))) { 1559 // "avx512.mask.storeu." or "avx512.mask.store." 1560 bool Aligned = Name[17] != 'u'; // "avx512.mask.storeu". 1561 UpgradeMaskedStore(Builder, CI->getArgOperand(0), CI->getArgOperand(1), 1562 CI->getArgOperand(2), Aligned); 1563 1564 // Remove intrinsic. 1565 CI->eraseFromParent(); 1566 return; 1567 } 1568 1569 Value *Rep; 1570 // Upgrade packed integer vector compare intrinsics to compare instructions. 1571 if (IsX86 && (Name.startswith("sse2.pcmp") || 1572 Name.startswith("avx2.pcmp"))) { 1573 // "sse2.pcpmpeq." "sse2.pcmpgt." "avx2.pcmpeq." or "avx2.pcmpgt." 1574 bool CmpEq = Name[9] == 'e'; 1575 Rep = Builder.CreateICmp(CmpEq ? ICmpInst::ICMP_EQ : ICmpInst::ICMP_SGT, 1576 CI->getArgOperand(0), CI->getArgOperand(1)); 1577 Rep = Builder.CreateSExt(Rep, CI->getType(), ""); 1578 } else if (IsX86 && (Name.startswith("avx512.broadcastm"))) { 1579 Type *ExtTy = Type::getInt32Ty(C); 1580 if (CI->getOperand(0)->getType()->isIntegerTy(8)) 1581 ExtTy = Type::getInt64Ty(C); 1582 unsigned NumElts = CI->getType()->getPrimitiveSizeInBits() / 1583 ExtTy->getPrimitiveSizeInBits(); 1584 Rep = Builder.CreateZExt(CI->getArgOperand(0), ExtTy); 1585 Rep = Builder.CreateVectorSplat(NumElts, Rep); 1586 } else if (IsX86 && (Name == "sse.sqrt.ss" || 1587 Name == "sse2.sqrt.sd")) { 1588 Value *Vec = CI->getArgOperand(0); 1589 Value *Elt0 = Builder.CreateExtractElement(Vec, (uint64_t)0); 1590 Function *Intr = Intrinsic::getDeclaration(F->getParent(), 1591 Intrinsic::sqrt, Elt0->getType()); 1592 Elt0 = Builder.CreateCall(Intr, Elt0); 1593 Rep = Builder.CreateInsertElement(Vec, Elt0, (uint64_t)0); 1594 } else if (IsX86 && (Name.startswith("avx.sqrt.p") || 1595 Name.startswith("sse2.sqrt.p") || 1596 Name.startswith("sse.sqrt.p"))) { 1597 Rep = Builder.CreateCall(Intrinsic::getDeclaration(F->getParent(), 1598 Intrinsic::sqrt, 1599 CI->getType()), 1600 {CI->getArgOperand(0)}); 1601 } else if (IsX86 && (Name.startswith("avx512.mask.sqrt.p"))) { 1602 if (CI->getNumArgOperands() == 4 && 1603 (!isa<ConstantInt>(CI->getArgOperand(3)) || 1604 cast<ConstantInt>(CI->getArgOperand(3))->getZExtValue() != 4)) { 1605 Intrinsic::ID IID = Name[18] == 's' ? Intrinsic::x86_avx512_sqrt_ps_512 1606 : Intrinsic::x86_avx512_sqrt_pd_512; 1607 1608 Value *Args[] = { CI->getArgOperand(0), CI->getArgOperand(3) }; 1609 Rep = Builder.CreateCall(Intrinsic::getDeclaration(CI->getModule(), 1610 IID), Args); 1611 } else { 1612 Rep = Builder.CreateCall(Intrinsic::getDeclaration(F->getParent(), 1613 Intrinsic::sqrt, 1614 CI->getType()), 1615 {CI->getArgOperand(0)}); 1616 } 1617 Rep = EmitX86Select(Builder, CI->getArgOperand(2), Rep, 1618 CI->getArgOperand(1)); 1619 } else if (IsX86 && (Name.startswith("avx512.ptestm") || 1620 Name.startswith("avx512.ptestnm"))) { 1621 Value *Op0 = CI->getArgOperand(0); 1622 Value *Op1 = CI->getArgOperand(1); 1623 Value *Mask = CI->getArgOperand(2); 1624 Rep = Builder.CreateAnd(Op0, Op1); 1625 llvm::Type *Ty = Op0->getType(); 1626 Value *Zero = llvm::Constant::getNullValue(Ty); 1627 ICmpInst::Predicate Pred = 1628 Name.startswith("avx512.ptestm") ? ICmpInst::ICMP_NE : ICmpInst::ICMP_EQ; 1629 Rep = Builder.CreateICmp(Pred, Rep, Zero); 1630 Rep = ApplyX86MaskOn1BitsVec(Builder, Rep, Mask); 1631 } else if (IsX86 && (Name.startswith("avx512.mask.pbroadcast"))){ 1632 unsigned NumElts = 1633 CI->getArgOperand(1)->getType()->getVectorNumElements(); 1634 Rep = Builder.CreateVectorSplat(NumElts, CI->getArgOperand(0)); 1635 Rep = EmitX86Select(Builder, CI->getArgOperand(2), Rep, 1636 CI->getArgOperand(1)); 1637 } else if (IsX86 && (Name.startswith("avx512.kunpck"))) { 1638 unsigned NumElts = CI->getType()->getScalarSizeInBits(); 1639 Value *LHS = getX86MaskVec(Builder, CI->getArgOperand(0), NumElts); 1640 Value *RHS = getX86MaskVec(Builder, CI->getArgOperand(1), NumElts); 1641 uint32_t Indices[64]; 1642 for (unsigned i = 0; i != NumElts; ++i) 1643 Indices[i] = i; 1644 1645 // First extract half of each vector. This gives better codegen than 1646 // doing it in a single shuffle. 1647 LHS = Builder.CreateShuffleVector(LHS, LHS, 1648 makeArrayRef(Indices, NumElts / 2)); 1649 RHS = Builder.CreateShuffleVector(RHS, RHS, 1650 makeArrayRef(Indices, NumElts / 2)); 1651 // Concat the vectors. 1652 // NOTE: Operands have to be swapped to match intrinsic definition. 1653 Rep = Builder.CreateShuffleVector(RHS, LHS, 1654 makeArrayRef(Indices, NumElts)); 1655 Rep = Builder.CreateBitCast(Rep, CI->getType()); 1656 } else if (IsX86 && Name == "avx512.kand.w") { 1657 Value *LHS = getX86MaskVec(Builder, CI->getArgOperand(0), 16); 1658 Value *RHS = getX86MaskVec(Builder, CI->getArgOperand(1), 16); 1659 Rep = Builder.CreateAnd(LHS, RHS); 1660 Rep = Builder.CreateBitCast(Rep, CI->getType()); 1661 } else if (IsX86 && Name == "avx512.kandn.w") { 1662 Value *LHS = getX86MaskVec(Builder, CI->getArgOperand(0), 16); 1663 Value *RHS = getX86MaskVec(Builder, CI->getArgOperand(1), 16); 1664 LHS = Builder.CreateNot(LHS); 1665 Rep = Builder.CreateAnd(LHS, RHS); 1666 Rep = Builder.CreateBitCast(Rep, CI->getType()); 1667 } else if (IsX86 && Name == "avx512.kor.w") { 1668 Value *LHS = getX86MaskVec(Builder, CI->getArgOperand(0), 16); 1669 Value *RHS = getX86MaskVec(Builder, CI->getArgOperand(1), 16); 1670 Rep = Builder.CreateOr(LHS, RHS); 1671 Rep = Builder.CreateBitCast(Rep, CI->getType()); 1672 } else if (IsX86 && Name == "avx512.kxor.w") { 1673 Value *LHS = getX86MaskVec(Builder, CI->getArgOperand(0), 16); 1674 Value *RHS = getX86MaskVec(Builder, CI->getArgOperand(1), 16); 1675 Rep = Builder.CreateXor(LHS, RHS); 1676 Rep = Builder.CreateBitCast(Rep, CI->getType()); 1677 } else if (IsX86 && Name == "avx512.kxnor.w") { 1678 Value *LHS = getX86MaskVec(Builder, CI->getArgOperand(0), 16); 1679 Value *RHS = getX86MaskVec(Builder, CI->getArgOperand(1), 16); 1680 LHS = Builder.CreateNot(LHS); 1681 Rep = Builder.CreateXor(LHS, RHS); 1682 Rep = Builder.CreateBitCast(Rep, CI->getType()); 1683 } else if (IsX86 && Name == "avx512.knot.w") { 1684 Rep = getX86MaskVec(Builder, CI->getArgOperand(0), 16); 1685 Rep = Builder.CreateNot(Rep); 1686 Rep = Builder.CreateBitCast(Rep, CI->getType()); 1687 } else if (IsX86 && 1688 (Name == "avx512.kortestz.w" || Name == "avx512.kortestc.w")) { 1689 Value *LHS = getX86MaskVec(Builder, CI->getArgOperand(0), 16); 1690 Value *RHS = getX86MaskVec(Builder, CI->getArgOperand(1), 16); 1691 Rep = Builder.CreateOr(LHS, RHS); 1692 Rep = Builder.CreateBitCast(Rep, Builder.getInt16Ty()); 1693 Value *C; 1694 if (Name[14] == 'c') 1695 C = ConstantInt::getAllOnesValue(Builder.getInt16Ty()); 1696 else 1697 C = ConstantInt::getNullValue(Builder.getInt16Ty()); 1698 Rep = Builder.CreateICmpEQ(Rep, C); 1699 Rep = Builder.CreateZExt(Rep, Builder.getInt32Ty()); 1700 } else if (IsX86 && (Name == "sse.add.ss" || Name == "sse2.add.sd" || 1701 Name == "sse.sub.ss" || Name == "sse2.sub.sd" || 1702 Name == "sse.mul.ss" || Name == "sse2.mul.sd" || 1703 Name == "sse.div.ss" || Name == "sse2.div.sd")) { 1704 Type *I32Ty = Type::getInt32Ty(C); 1705 Value *Elt0 = Builder.CreateExtractElement(CI->getArgOperand(0), 1706 ConstantInt::get(I32Ty, 0)); 1707 Value *Elt1 = Builder.CreateExtractElement(CI->getArgOperand(1), 1708 ConstantInt::get(I32Ty, 0)); 1709 Value *EltOp; 1710 if (Name.contains(".add.")) 1711 EltOp = Builder.CreateFAdd(Elt0, Elt1); 1712 else if (Name.contains(".sub.")) 1713 EltOp = Builder.CreateFSub(Elt0, Elt1); 1714 else if (Name.contains(".mul.")) 1715 EltOp = Builder.CreateFMul(Elt0, Elt1); 1716 else 1717 EltOp = Builder.CreateFDiv(Elt0, Elt1); 1718 Rep = Builder.CreateInsertElement(CI->getArgOperand(0), EltOp, 1719 ConstantInt::get(I32Ty, 0)); 1720 } else if (IsX86 && Name.startswith("avx512.mask.pcmp")) { 1721 // "avx512.mask.pcmpeq." or "avx512.mask.pcmpgt." 1722 bool CmpEq = Name[16] == 'e'; 1723 Rep = upgradeMaskedCompare(Builder, *CI, CmpEq ? 0 : 6, true); 1724 } else if (IsX86 && Name.startswith("avx512.mask.fpclass.p")) { 1725 Type *OpTy = CI->getArgOperand(0)->getType(); 1726 unsigned VecWidth = OpTy->getPrimitiveSizeInBits(); 1727 unsigned EltWidth = OpTy->getScalarSizeInBits(); 1728 Intrinsic::ID IID; 1729 if (VecWidth == 128 && EltWidth == 32) 1730 IID = Intrinsic::x86_avx512_fpclass_ps_128; 1731 else if (VecWidth == 256 && EltWidth == 32) 1732 IID = Intrinsic::x86_avx512_fpclass_ps_256; 1733 else if (VecWidth == 512 && EltWidth == 32) 1734 IID = Intrinsic::x86_avx512_fpclass_ps_512; 1735 else if (VecWidth == 128 && EltWidth == 64) 1736 IID = Intrinsic::x86_avx512_fpclass_pd_128; 1737 else if (VecWidth == 256 && EltWidth == 64) 1738 IID = Intrinsic::x86_avx512_fpclass_pd_256; 1739 else if (VecWidth == 512 && EltWidth == 64) 1740 IID = Intrinsic::x86_avx512_fpclass_pd_512; 1741 else 1742 llvm_unreachable("Unexpected intrinsic"); 1743 1744 Rep = Builder.CreateCall(Intrinsic::getDeclaration(F->getParent(), IID), 1745 { CI->getOperand(0), CI->getArgOperand(1) }); 1746 Rep = ApplyX86MaskOn1BitsVec(Builder, Rep, CI->getArgOperand(2)); 1747 } else if (IsX86 && Name.startswith("avx512.mask.cmp.p")) { 1748 Type *OpTy = CI->getArgOperand(0)->getType(); 1749 unsigned VecWidth = OpTy->getPrimitiveSizeInBits(); 1750 unsigned EltWidth = OpTy->getScalarSizeInBits(); 1751 Intrinsic::ID IID; 1752 if (VecWidth == 128 && EltWidth == 32) 1753 IID = Intrinsic::x86_avx512_cmp_ps_128; 1754 else if (VecWidth == 256 && EltWidth == 32) 1755 IID = Intrinsic::x86_avx512_cmp_ps_256; 1756 else if (VecWidth == 512 && EltWidth == 32) 1757 IID = Intrinsic::x86_avx512_cmp_ps_512; 1758 else if (VecWidth == 128 && EltWidth == 64) 1759 IID = Intrinsic::x86_avx512_cmp_pd_128; 1760 else if (VecWidth == 256 && EltWidth == 64) 1761 IID = Intrinsic::x86_avx512_cmp_pd_256; 1762 else if (VecWidth == 512 && EltWidth == 64) 1763 IID = Intrinsic::x86_avx512_cmp_pd_512; 1764 else 1765 llvm_unreachable("Unexpected intrinsic"); 1766 1767 SmallVector<Value *, 4> Args; 1768 Args.push_back(CI->getArgOperand(0)); 1769 Args.push_back(CI->getArgOperand(1)); 1770 Args.push_back(CI->getArgOperand(2)); 1771 if (CI->getNumArgOperands() == 5) 1772 Args.push_back(CI->getArgOperand(4)); 1773 1774 Rep = Builder.CreateCall(Intrinsic::getDeclaration(F->getParent(), IID), 1775 Args); 1776 Rep = ApplyX86MaskOn1BitsVec(Builder, Rep, CI->getArgOperand(3)); 1777 } else if (IsX86 && Name.startswith("avx512.mask.cmp.") && 1778 Name[16] != 'p') { 1779 // Integer compare intrinsics. 1780 unsigned Imm = cast<ConstantInt>(CI->getArgOperand(2))->getZExtValue(); 1781 Rep = upgradeMaskedCompare(Builder, *CI, Imm, true); 1782 } else if (IsX86 && Name.startswith("avx512.mask.ucmp.")) { 1783 unsigned Imm = cast<ConstantInt>(CI->getArgOperand(2))->getZExtValue(); 1784 Rep = upgradeMaskedCompare(Builder, *CI, Imm, false); 1785 } else if (IsX86 && (Name.startswith("avx512.cvtb2mask.") || 1786 Name.startswith("avx512.cvtw2mask.") || 1787 Name.startswith("avx512.cvtd2mask.") || 1788 Name.startswith("avx512.cvtq2mask."))) { 1789 Value *Op = CI->getArgOperand(0); 1790 Value *Zero = llvm::Constant::getNullValue(Op->getType()); 1791 Rep = Builder.CreateICmp(ICmpInst::ICMP_SLT, Op, Zero); 1792 Rep = ApplyX86MaskOn1BitsVec(Builder, Rep, nullptr); 1793 } else if(IsX86 && (Name == "ssse3.pabs.b.128" || 1794 Name == "ssse3.pabs.w.128" || 1795 Name == "ssse3.pabs.d.128" || 1796 Name.startswith("avx2.pabs") || 1797 Name.startswith("avx512.mask.pabs"))) { 1798 Rep = upgradeAbs(Builder, *CI); 1799 } else if (IsX86 && (Name == "sse41.pmaxsb" || 1800 Name == "sse2.pmaxs.w" || 1801 Name == "sse41.pmaxsd" || 1802 Name.startswith("avx2.pmaxs") || 1803 Name.startswith("avx512.mask.pmaxs"))) { 1804 Rep = upgradeIntMinMax(Builder, *CI, ICmpInst::ICMP_SGT); 1805 } else if (IsX86 && (Name == "sse2.pmaxu.b" || 1806 Name == "sse41.pmaxuw" || 1807 Name == "sse41.pmaxud" || 1808 Name.startswith("avx2.pmaxu") || 1809 Name.startswith("avx512.mask.pmaxu"))) { 1810 Rep = upgradeIntMinMax(Builder, *CI, ICmpInst::ICMP_UGT); 1811 } else if (IsX86 && (Name == "sse41.pminsb" || 1812 Name == "sse2.pmins.w" || 1813 Name == "sse41.pminsd" || 1814 Name.startswith("avx2.pmins") || 1815 Name.startswith("avx512.mask.pmins"))) { 1816 Rep = upgradeIntMinMax(Builder, *CI, ICmpInst::ICMP_SLT); 1817 } else if (IsX86 && (Name == "sse2.pminu.b" || 1818 Name == "sse41.pminuw" || 1819 Name == "sse41.pminud" || 1820 Name.startswith("avx2.pminu") || 1821 Name.startswith("avx512.mask.pminu"))) { 1822 Rep = upgradeIntMinMax(Builder, *CI, ICmpInst::ICMP_ULT); 1823 } else if (IsX86 && (Name == "sse2.pmulu.dq" || 1824 Name == "avx2.pmulu.dq" || 1825 Name == "avx512.pmulu.dq.512" || 1826 Name.startswith("avx512.mask.pmulu.dq."))) { 1827 Rep = upgradePMULDQ(Builder, *CI, /*Signed*/false); 1828 } else if (IsX86 && (Name == "sse41.pmuldq" || 1829 Name == "avx2.pmul.dq" || 1830 Name == "avx512.pmul.dq.512" || 1831 Name.startswith("avx512.mask.pmul.dq."))) { 1832 Rep = upgradePMULDQ(Builder, *CI, /*Signed*/true); 1833 } else if (IsX86 && (Name == "sse.cvtsi2ss" || 1834 Name == "sse2.cvtsi2sd" || 1835 Name == "sse.cvtsi642ss" || 1836 Name == "sse2.cvtsi642sd")) { 1837 Rep = Builder.CreateSIToFP(CI->getArgOperand(1), 1838 CI->getType()->getVectorElementType()); 1839 Rep = Builder.CreateInsertElement(CI->getArgOperand(0), Rep, (uint64_t)0); 1840 } else if (IsX86 && Name == "avx512.cvtusi2sd") { 1841 Rep = Builder.CreateUIToFP(CI->getArgOperand(1), 1842 CI->getType()->getVectorElementType()); 1843 Rep = Builder.CreateInsertElement(CI->getArgOperand(0), Rep, (uint64_t)0); 1844 } else if (IsX86 && Name == "sse2.cvtss2sd") { 1845 Rep = Builder.CreateExtractElement(CI->getArgOperand(1), (uint64_t)0); 1846 Rep = Builder.CreateFPExt(Rep, CI->getType()->getVectorElementType()); 1847 Rep = Builder.CreateInsertElement(CI->getArgOperand(0), Rep, (uint64_t)0); 1848 } else if (IsX86 && (Name == "sse2.cvtdq2pd" || 1849 Name == "sse2.cvtdq2ps" || 1850 Name == "avx.cvtdq2.pd.256" || 1851 Name == "avx.cvtdq2.ps.256" || 1852 Name.startswith("avx512.mask.cvtdq2pd.") || 1853 Name.startswith("avx512.mask.cvtudq2pd.") || 1854 Name == "avx512.mask.cvtdq2ps.128" || 1855 Name == "avx512.mask.cvtdq2ps.256" || 1856 Name == "avx512.mask.cvtudq2ps.128" || 1857 Name == "avx512.mask.cvtudq2ps.256" || 1858 Name == "avx512.mask.cvtqq2pd.128" || 1859 Name == "avx512.mask.cvtqq2pd.256" || 1860 Name == "avx512.mask.cvtuqq2pd.128" || 1861 Name == "avx512.mask.cvtuqq2pd.256" || 1862 Name == "sse2.cvtps2pd" || 1863 Name == "avx.cvt.ps2.pd.256" || 1864 Name == "avx512.mask.cvtps2pd.128" || 1865 Name == "avx512.mask.cvtps2pd.256")) { 1866 Type *DstTy = CI->getType(); 1867 Rep = CI->getArgOperand(0); 1868 1869 unsigned NumDstElts = DstTy->getVectorNumElements(); 1870 if (NumDstElts < Rep->getType()->getVectorNumElements()) { 1871 assert(NumDstElts == 2 && "Unexpected vector size"); 1872 uint32_t ShuffleMask[2] = { 0, 1 }; 1873 Rep = Builder.CreateShuffleVector(Rep, Rep, ShuffleMask); 1874 } 1875 1876 bool IsPS2PD = (StringRef::npos != Name.find("ps2")); 1877 bool IsUnsigned = (StringRef::npos != Name.find("cvtu")); 1878 if (IsPS2PD) 1879 Rep = Builder.CreateFPExt(Rep, DstTy, "cvtps2pd"); 1880 else if (IsUnsigned) 1881 Rep = Builder.CreateUIToFP(Rep, DstTy, "cvt"); 1882 else 1883 Rep = Builder.CreateSIToFP(Rep, DstTy, "cvt"); 1884 1885 if (CI->getNumArgOperands() == 3) 1886 Rep = EmitX86Select(Builder, CI->getArgOperand(2), Rep, 1887 CI->getArgOperand(1)); 1888 } else if (IsX86 && (Name.startswith("avx512.mask.loadu."))) { 1889 Rep = UpgradeMaskedLoad(Builder, CI->getArgOperand(0), 1890 CI->getArgOperand(1), CI->getArgOperand(2), 1891 /*Aligned*/false); 1892 } else if (IsX86 && (Name.startswith("avx512.mask.load."))) { 1893 Rep = UpgradeMaskedLoad(Builder, CI->getArgOperand(0), 1894 CI->getArgOperand(1),CI->getArgOperand(2), 1895 /*Aligned*/true); 1896 } else if (IsX86 && Name.startswith("avx512.mask.expand.load.")) { 1897 Type *ResultTy = CI->getType(); 1898 Type *PtrTy = ResultTy->getVectorElementType(); 1899 1900 // Cast the pointer to element type. 1901 Value *Ptr = Builder.CreateBitCast(CI->getOperand(0), 1902 llvm::PointerType::getUnqual(PtrTy)); 1903 1904 Value *MaskVec = getX86MaskVec(Builder, CI->getArgOperand(2), 1905 ResultTy->getVectorNumElements()); 1906 1907 Function *ELd = Intrinsic::getDeclaration(F->getParent(), 1908 Intrinsic::masked_expandload, 1909 ResultTy); 1910 Rep = Builder.CreateCall(ELd, { Ptr, MaskVec, CI->getOperand(1) }); 1911 } else if (IsX86 && Name.startswith("avx512.mask.compress.store.")) { 1912 Type *ResultTy = CI->getArgOperand(1)->getType(); 1913 Type *PtrTy = ResultTy->getVectorElementType(); 1914 1915 // Cast the pointer to element type. 1916 Value *Ptr = Builder.CreateBitCast(CI->getOperand(0), 1917 llvm::PointerType::getUnqual(PtrTy)); 1918 1919 Value *MaskVec = getX86MaskVec(Builder, CI->getArgOperand(2), 1920 ResultTy->getVectorNumElements()); 1921 1922 Function *CSt = Intrinsic::getDeclaration(F->getParent(), 1923 Intrinsic::masked_compressstore, 1924 ResultTy); 1925 Rep = Builder.CreateCall(CSt, { CI->getArgOperand(1), Ptr, MaskVec }); 1926 } else if (IsX86 && Name.startswith("xop.vpcom")) { 1927 Intrinsic::ID intID; 1928 if (Name.endswith("ub")) 1929 intID = Intrinsic::x86_xop_vpcomub; 1930 else if (Name.endswith("uw")) 1931 intID = Intrinsic::x86_xop_vpcomuw; 1932 else if (Name.endswith("ud")) 1933 intID = Intrinsic::x86_xop_vpcomud; 1934 else if (Name.endswith("uq")) 1935 intID = Intrinsic::x86_xop_vpcomuq; 1936 else if (Name.endswith("b")) 1937 intID = Intrinsic::x86_xop_vpcomb; 1938 else if (Name.endswith("w")) 1939 intID = Intrinsic::x86_xop_vpcomw; 1940 else if (Name.endswith("d")) 1941 intID = Intrinsic::x86_xop_vpcomd; 1942 else if (Name.endswith("q")) 1943 intID = Intrinsic::x86_xop_vpcomq; 1944 else 1945 llvm_unreachable("Unknown suffix"); 1946 1947 Name = Name.substr(9); // strip off "xop.vpcom" 1948 unsigned Imm; 1949 if (Name.startswith("lt")) 1950 Imm = 0; 1951 else if (Name.startswith("le")) 1952 Imm = 1; 1953 else if (Name.startswith("gt")) 1954 Imm = 2; 1955 else if (Name.startswith("ge")) 1956 Imm = 3; 1957 else if (Name.startswith("eq")) 1958 Imm = 4; 1959 else if (Name.startswith("ne")) 1960 Imm = 5; 1961 else if (Name.startswith("false")) 1962 Imm = 6; 1963 else if (Name.startswith("true")) 1964 Imm = 7; 1965 else 1966 llvm_unreachable("Unknown condition"); 1967 1968 Function *VPCOM = Intrinsic::getDeclaration(F->getParent(), intID); 1969 Rep = 1970 Builder.CreateCall(VPCOM, {CI->getArgOperand(0), CI->getArgOperand(1), 1971 Builder.getInt8(Imm)}); 1972 } else if (IsX86 && Name.startswith("xop.vpcmov")) { 1973 Value *Sel = CI->getArgOperand(2); 1974 Value *NotSel = Builder.CreateNot(Sel); 1975 Value *Sel0 = Builder.CreateAnd(CI->getArgOperand(0), Sel); 1976 Value *Sel1 = Builder.CreateAnd(CI->getArgOperand(1), NotSel); 1977 Rep = Builder.CreateOr(Sel0, Sel1); 1978 } else if (IsX86 && (Name.startswith("xop.vprot") || 1979 Name.startswith("avx512.prol") || 1980 Name.startswith("avx512.mask.prol"))) { 1981 Rep = upgradeX86Rotate(Builder, *CI, false); 1982 } else if (IsX86 && (Name.startswith("avx512.pror") || 1983 Name.startswith("avx512.mask.pror"))) { 1984 Rep = upgradeX86Rotate(Builder, *CI, true); 1985 } else if (IsX86 && Name == "sse42.crc32.64.8") { 1986 Function *CRC32 = Intrinsic::getDeclaration(F->getParent(), 1987 Intrinsic::x86_sse42_crc32_32_8); 1988 Value *Trunc0 = Builder.CreateTrunc(CI->getArgOperand(0), Type::getInt32Ty(C)); 1989 Rep = Builder.CreateCall(CRC32, {Trunc0, CI->getArgOperand(1)}); 1990 Rep = Builder.CreateZExt(Rep, CI->getType(), ""); 1991 } else if (IsX86 && (Name.startswith("avx.vbroadcast.s") || 1992 Name.startswith("avx512.vbroadcast.s"))) { 1993 // Replace broadcasts with a series of insertelements. 1994 Type *VecTy = CI->getType(); 1995 Type *EltTy = VecTy->getVectorElementType(); 1996 unsigned EltNum = VecTy->getVectorNumElements(); 1997 Value *Cast = Builder.CreateBitCast(CI->getArgOperand(0), 1998 EltTy->getPointerTo()); 1999 Value *Load = Builder.CreateLoad(EltTy, Cast); 2000 Type *I32Ty = Type::getInt32Ty(C); 2001 Rep = UndefValue::get(VecTy); 2002 for (unsigned I = 0; I < EltNum; ++I) 2003 Rep = Builder.CreateInsertElement(Rep, Load, 2004 ConstantInt::get(I32Ty, I)); 2005 } else if (IsX86 && (Name.startswith("sse41.pmovsx") || 2006 Name.startswith("sse41.pmovzx") || 2007 Name.startswith("avx2.pmovsx") || 2008 Name.startswith("avx2.pmovzx") || 2009 Name.startswith("avx512.mask.pmovsx") || 2010 Name.startswith("avx512.mask.pmovzx"))) { 2011 VectorType *SrcTy = cast<VectorType>(CI->getArgOperand(0)->getType()); 2012 VectorType *DstTy = cast<VectorType>(CI->getType()); 2013 unsigned NumDstElts = DstTy->getNumElements(); 2014 2015 // Extract a subvector of the first NumDstElts lanes and sign/zero extend. 2016 SmallVector<uint32_t, 8> ShuffleMask(NumDstElts); 2017 for (unsigned i = 0; i != NumDstElts; ++i) 2018 ShuffleMask[i] = i; 2019 2020 Value *SV = Builder.CreateShuffleVector( 2021 CI->getArgOperand(0), UndefValue::get(SrcTy), ShuffleMask); 2022 2023 bool DoSext = (StringRef::npos != Name.find("pmovsx")); 2024 Rep = DoSext ? Builder.CreateSExt(SV, DstTy) 2025 : Builder.CreateZExt(SV, DstTy); 2026 // If there are 3 arguments, it's a masked intrinsic so we need a select. 2027 if (CI->getNumArgOperands() == 3) 2028 Rep = EmitX86Select(Builder, CI->getArgOperand(2), Rep, 2029 CI->getArgOperand(1)); 2030 } else if (IsX86 && (Name.startswith("avx.vbroadcastf128") || 2031 Name == "avx2.vbroadcasti128")) { 2032 // Replace vbroadcastf128/vbroadcasti128 with a vector load+shuffle. 2033 Type *EltTy = CI->getType()->getVectorElementType(); 2034 unsigned NumSrcElts = 128 / EltTy->getPrimitiveSizeInBits(); 2035 Type *VT = VectorType::get(EltTy, NumSrcElts); 2036 Value *Op = Builder.CreatePointerCast(CI->getArgOperand(0), 2037 PointerType::getUnqual(VT)); 2038 Value *Load = Builder.CreateAlignedLoad(Op, 1); 2039 if (NumSrcElts == 2) 2040 Rep = Builder.CreateShuffleVector(Load, UndefValue::get(Load->getType()), 2041 { 0, 1, 0, 1 }); 2042 else 2043 Rep = Builder.CreateShuffleVector(Load, UndefValue::get(Load->getType()), 2044 { 0, 1, 2, 3, 0, 1, 2, 3 }); 2045 } else if (IsX86 && (Name.startswith("avx512.mask.shuf.i") || 2046 Name.startswith("avx512.mask.shuf.f"))) { 2047 unsigned Imm = cast<ConstantInt>(CI->getArgOperand(2))->getZExtValue(); 2048 Type *VT = CI->getType(); 2049 unsigned NumLanes = VT->getPrimitiveSizeInBits() / 128; 2050 unsigned NumElementsInLane = 128 / VT->getScalarSizeInBits(); 2051 unsigned ControlBitsMask = NumLanes - 1; 2052 unsigned NumControlBits = NumLanes / 2; 2053 SmallVector<uint32_t, 8> ShuffleMask(0); 2054 2055 for (unsigned l = 0; l != NumLanes; ++l) { 2056 unsigned LaneMask = (Imm >> (l * NumControlBits)) & ControlBitsMask; 2057 // We actually need the other source. 2058 if (l >= NumLanes / 2) 2059 LaneMask += NumLanes; 2060 for (unsigned i = 0; i != NumElementsInLane; ++i) 2061 ShuffleMask.push_back(LaneMask * NumElementsInLane + i); 2062 } 2063 Rep = Builder.CreateShuffleVector(CI->getArgOperand(0), 2064 CI->getArgOperand(1), ShuffleMask); 2065 Rep = EmitX86Select(Builder, CI->getArgOperand(4), Rep, 2066 CI->getArgOperand(3)); 2067 }else if (IsX86 && (Name.startswith("avx512.mask.broadcastf") || 2068 Name.startswith("avx512.mask.broadcasti"))) { 2069 unsigned NumSrcElts = 2070 CI->getArgOperand(0)->getType()->getVectorNumElements(); 2071 unsigned NumDstElts = CI->getType()->getVectorNumElements(); 2072 2073 SmallVector<uint32_t, 8> ShuffleMask(NumDstElts); 2074 for (unsigned i = 0; i != NumDstElts; ++i) 2075 ShuffleMask[i] = i % NumSrcElts; 2076 2077 Rep = Builder.CreateShuffleVector(CI->getArgOperand(0), 2078 CI->getArgOperand(0), 2079 ShuffleMask); 2080 Rep = EmitX86Select(Builder, CI->getArgOperand(2), Rep, 2081 CI->getArgOperand(1)); 2082 } else if (IsX86 && (Name.startswith("avx2.pbroadcast") || 2083 Name.startswith("avx2.vbroadcast") || 2084 Name.startswith("avx512.pbroadcast") || 2085 Name.startswith("avx512.mask.broadcast.s"))) { 2086 // Replace vp?broadcasts with a vector shuffle. 2087 Value *Op = CI->getArgOperand(0); 2088 unsigned NumElts = CI->getType()->getVectorNumElements(); 2089 Type *MaskTy = VectorType::get(Type::getInt32Ty(C), NumElts); 2090 Rep = Builder.CreateShuffleVector(Op, UndefValue::get(Op->getType()), 2091 Constant::getNullValue(MaskTy)); 2092 2093 if (CI->getNumArgOperands() == 3) 2094 Rep = EmitX86Select(Builder, CI->getArgOperand(2), Rep, 2095 CI->getArgOperand(1)); 2096 } else if (IsX86 && (Name.startswith("sse2.paddus.") || 2097 Name.startswith("sse2.psubus.") || 2098 Name.startswith("avx2.paddus.") || 2099 Name.startswith("avx2.psubus.") || 2100 Name.startswith("avx512.mask.paddus.") || 2101 Name.startswith("avx512.mask.psubus."))) { 2102 bool IsAdd = Name.contains(".paddus"); 2103 Rep = UpgradeX86AddSubSatIntrinsics(Builder, *CI, IsAdd); 2104 } else if (IsX86 && Name.startswith("avx512.mask.palignr.")) { 2105 Rep = UpgradeX86ALIGNIntrinsics(Builder, CI->getArgOperand(0), 2106 CI->getArgOperand(1), 2107 CI->getArgOperand(2), 2108 CI->getArgOperand(3), 2109 CI->getArgOperand(4), 2110 false); 2111 } else if (IsX86 && Name.startswith("avx512.mask.valign.")) { 2112 Rep = UpgradeX86ALIGNIntrinsics(Builder, CI->getArgOperand(0), 2113 CI->getArgOperand(1), 2114 CI->getArgOperand(2), 2115 CI->getArgOperand(3), 2116 CI->getArgOperand(4), 2117 true); 2118 } else if (IsX86 && (Name == "sse2.psll.dq" || 2119 Name == "avx2.psll.dq")) { 2120 // 128/256-bit shift left specified in bits. 2121 unsigned Shift = cast<ConstantInt>(CI->getArgOperand(1))->getZExtValue(); 2122 Rep = UpgradeX86PSLLDQIntrinsics(Builder, CI->getArgOperand(0), 2123 Shift / 8); // Shift is in bits. 2124 } else if (IsX86 && (Name == "sse2.psrl.dq" || 2125 Name == "avx2.psrl.dq")) { 2126 // 128/256-bit shift right specified in bits. 2127 unsigned Shift = cast<ConstantInt>(CI->getArgOperand(1))->getZExtValue(); 2128 Rep = UpgradeX86PSRLDQIntrinsics(Builder, CI->getArgOperand(0), 2129 Shift / 8); // Shift is in bits. 2130 } else if (IsX86 && (Name == "sse2.psll.dq.bs" || 2131 Name == "avx2.psll.dq.bs" || 2132 Name == "avx512.psll.dq.512")) { 2133 // 128/256/512-bit shift left specified in bytes. 2134 unsigned Shift = cast<ConstantInt>(CI->getArgOperand(1))->getZExtValue(); 2135 Rep = UpgradeX86PSLLDQIntrinsics(Builder, CI->getArgOperand(0), Shift); 2136 } else if (IsX86 && (Name == "sse2.psrl.dq.bs" || 2137 Name == "avx2.psrl.dq.bs" || 2138 Name == "avx512.psrl.dq.512")) { 2139 // 128/256/512-bit shift right specified in bytes. 2140 unsigned Shift = cast<ConstantInt>(CI->getArgOperand(1))->getZExtValue(); 2141 Rep = UpgradeX86PSRLDQIntrinsics(Builder, CI->getArgOperand(0), Shift); 2142 } else if (IsX86 && (Name == "sse41.pblendw" || 2143 Name.startswith("sse41.blendp") || 2144 Name.startswith("avx.blend.p") || 2145 Name == "avx2.pblendw" || 2146 Name.startswith("avx2.pblendd."))) { 2147 Value *Op0 = CI->getArgOperand(0); 2148 Value *Op1 = CI->getArgOperand(1); 2149 unsigned Imm = cast <ConstantInt>(CI->getArgOperand(2))->getZExtValue(); 2150 VectorType *VecTy = cast<VectorType>(CI->getType()); 2151 unsigned NumElts = VecTy->getNumElements(); 2152 2153 SmallVector<uint32_t, 16> Idxs(NumElts); 2154 for (unsigned i = 0; i != NumElts; ++i) 2155 Idxs[i] = ((Imm >> (i%8)) & 1) ? i + NumElts : i; 2156 2157 Rep = Builder.CreateShuffleVector(Op0, Op1, Idxs); 2158 } else if (IsX86 && (Name.startswith("avx.vinsertf128.") || 2159 Name == "avx2.vinserti128" || 2160 Name.startswith("avx512.mask.insert"))) { 2161 Value *Op0 = CI->getArgOperand(0); 2162 Value *Op1 = CI->getArgOperand(1); 2163 unsigned Imm = cast<ConstantInt>(CI->getArgOperand(2))->getZExtValue(); 2164 unsigned DstNumElts = CI->getType()->getVectorNumElements(); 2165 unsigned SrcNumElts = Op1->getType()->getVectorNumElements(); 2166 unsigned Scale = DstNumElts / SrcNumElts; 2167 2168 // Mask off the high bits of the immediate value; hardware ignores those. 2169 Imm = Imm % Scale; 2170 2171 // Extend the second operand into a vector the size of the destination. 2172 Value *UndefV = UndefValue::get(Op1->getType()); 2173 SmallVector<uint32_t, 8> Idxs(DstNumElts); 2174 for (unsigned i = 0; i != SrcNumElts; ++i) 2175 Idxs[i] = i; 2176 for (unsigned i = SrcNumElts; i != DstNumElts; ++i) 2177 Idxs[i] = SrcNumElts; 2178 Rep = Builder.CreateShuffleVector(Op1, UndefV, Idxs); 2179 2180 // Insert the second operand into the first operand. 2181 2182 // Note that there is no guarantee that instruction lowering will actually 2183 // produce a vinsertf128 instruction for the created shuffles. In 2184 // particular, the 0 immediate case involves no lane changes, so it can 2185 // be handled as a blend. 2186 2187 // Example of shuffle mask for 32-bit elements: 2188 // Imm = 1 <i32 0, i32 1, i32 2, i32 3, i32 8, i32 9, i32 10, i32 11> 2189 // Imm = 0 <i32 8, i32 9, i32 10, i32 11, i32 4, i32 5, i32 6, i32 7 > 2190 2191 // First fill with identify mask. 2192 for (unsigned i = 0; i != DstNumElts; ++i) 2193 Idxs[i] = i; 2194 // Then replace the elements where we need to insert. 2195 for (unsigned i = 0; i != SrcNumElts; ++i) 2196 Idxs[i + Imm * SrcNumElts] = i + DstNumElts; 2197 Rep = Builder.CreateShuffleVector(Op0, Rep, Idxs); 2198 2199 // If the intrinsic has a mask operand, handle that. 2200 if (CI->getNumArgOperands() == 5) 2201 Rep = EmitX86Select(Builder, CI->getArgOperand(4), Rep, 2202 CI->getArgOperand(3)); 2203 } else if (IsX86 && (Name.startswith("avx.vextractf128.") || 2204 Name == "avx2.vextracti128" || 2205 Name.startswith("avx512.mask.vextract"))) { 2206 Value *Op0 = CI->getArgOperand(0); 2207 unsigned Imm = cast<ConstantInt>(CI->getArgOperand(1))->getZExtValue(); 2208 unsigned DstNumElts = CI->getType()->getVectorNumElements(); 2209 unsigned SrcNumElts = Op0->getType()->getVectorNumElements(); 2210 unsigned Scale = SrcNumElts / DstNumElts; 2211 2212 // Mask off the high bits of the immediate value; hardware ignores those. 2213 Imm = Imm % Scale; 2214 2215 // Get indexes for the subvector of the input vector. 2216 SmallVector<uint32_t, 8> Idxs(DstNumElts); 2217 for (unsigned i = 0; i != DstNumElts; ++i) { 2218 Idxs[i] = i + (Imm * DstNumElts); 2219 } 2220 Rep = Builder.CreateShuffleVector(Op0, Op0, Idxs); 2221 2222 // If the intrinsic has a mask operand, handle that. 2223 if (CI->getNumArgOperands() == 4) 2224 Rep = EmitX86Select(Builder, CI->getArgOperand(3), Rep, 2225 CI->getArgOperand(2)); 2226 } else if (!IsX86 && Name == "stackprotectorcheck") { 2227 Rep = nullptr; 2228 } else if (IsX86 && (Name.startswith("avx512.mask.perm.df.") || 2229 Name.startswith("avx512.mask.perm.di."))) { 2230 Value *Op0 = CI->getArgOperand(0); 2231 unsigned Imm = cast<ConstantInt>(CI->getArgOperand(1))->getZExtValue(); 2232 VectorType *VecTy = cast<VectorType>(CI->getType()); 2233 unsigned NumElts = VecTy->getNumElements(); 2234 2235 SmallVector<uint32_t, 8> Idxs(NumElts); 2236 for (unsigned i = 0; i != NumElts; ++i) 2237 Idxs[i] = (i & ~0x3) + ((Imm >> (2 * (i & 0x3))) & 3); 2238 2239 Rep = Builder.CreateShuffleVector(Op0, Op0, Idxs); 2240 2241 if (CI->getNumArgOperands() == 4) 2242 Rep = EmitX86Select(Builder, CI->getArgOperand(3), Rep, 2243 CI->getArgOperand(2)); 2244 } else if (IsX86 && (Name.startswith("avx.vperm2f128.") || 2245 Name == "avx2.vperm2i128")) { 2246 // The immediate permute control byte looks like this: 2247 // [1:0] - select 128 bits from sources for low half of destination 2248 // [2] - ignore 2249 // [3] - zero low half of destination 2250 // [5:4] - select 128 bits from sources for high half of destination 2251 // [6] - ignore 2252 // [7] - zero high half of destination 2253 2254 uint8_t Imm = cast<ConstantInt>(CI->getArgOperand(2))->getZExtValue(); 2255 2256 unsigned NumElts = CI->getType()->getVectorNumElements(); 2257 unsigned HalfSize = NumElts / 2; 2258 SmallVector<uint32_t, 8> ShuffleMask(NumElts); 2259 2260 // Determine which operand(s) are actually in use for this instruction. 2261 Value *V0 = (Imm & 0x02) ? CI->getArgOperand(1) : CI->getArgOperand(0); 2262 Value *V1 = (Imm & 0x20) ? CI->getArgOperand(1) : CI->getArgOperand(0); 2263 2264 // If needed, replace operands based on zero mask. 2265 V0 = (Imm & 0x08) ? ConstantAggregateZero::get(CI->getType()) : V0; 2266 V1 = (Imm & 0x80) ? ConstantAggregateZero::get(CI->getType()) : V1; 2267 2268 // Permute low half of result. 2269 unsigned StartIndex = (Imm & 0x01) ? HalfSize : 0; 2270 for (unsigned i = 0; i < HalfSize; ++i) 2271 ShuffleMask[i] = StartIndex + i; 2272 2273 // Permute high half of result. 2274 StartIndex = (Imm & 0x10) ? HalfSize : 0; 2275 for (unsigned i = 0; i < HalfSize; ++i) 2276 ShuffleMask[i + HalfSize] = NumElts + StartIndex + i; 2277 2278 Rep = Builder.CreateShuffleVector(V0, V1, ShuffleMask); 2279 2280 } else if (IsX86 && (Name.startswith("avx.vpermil.") || 2281 Name == "sse2.pshuf.d" || 2282 Name.startswith("avx512.mask.vpermil.p") || 2283 Name.startswith("avx512.mask.pshuf.d."))) { 2284 Value *Op0 = CI->getArgOperand(0); 2285 unsigned Imm = cast<ConstantInt>(CI->getArgOperand(1))->getZExtValue(); 2286 VectorType *VecTy = cast<VectorType>(CI->getType()); 2287 unsigned NumElts = VecTy->getNumElements(); 2288 // Calculate the size of each index in the immediate. 2289 unsigned IdxSize = 64 / VecTy->getScalarSizeInBits(); 2290 unsigned IdxMask = ((1 << IdxSize) - 1); 2291 2292 SmallVector<uint32_t, 8> Idxs(NumElts); 2293 // Lookup the bits for this element, wrapping around the immediate every 2294 // 8-bits. Elements are grouped into sets of 2 or 4 elements so we need 2295 // to offset by the first index of each group. 2296 for (unsigned i = 0; i != NumElts; ++i) 2297 Idxs[i] = ((Imm >> ((i * IdxSize) % 8)) & IdxMask) | (i & ~IdxMask); 2298 2299 Rep = Builder.CreateShuffleVector(Op0, Op0, Idxs); 2300 2301 if (CI->getNumArgOperands() == 4) 2302 Rep = EmitX86Select(Builder, CI->getArgOperand(3), Rep, 2303 CI->getArgOperand(2)); 2304 } else if (IsX86 && (Name == "sse2.pshufl.w" || 2305 Name.startswith("avx512.mask.pshufl.w."))) { 2306 Value *Op0 = CI->getArgOperand(0); 2307 unsigned Imm = cast<ConstantInt>(CI->getArgOperand(1))->getZExtValue(); 2308 unsigned NumElts = CI->getType()->getVectorNumElements(); 2309 2310 SmallVector<uint32_t, 16> Idxs(NumElts); 2311 for (unsigned l = 0; l != NumElts; l += 8) { 2312 for (unsigned i = 0; i != 4; ++i) 2313 Idxs[i + l] = ((Imm >> (2 * i)) & 0x3) + l; 2314 for (unsigned i = 4; i != 8; ++i) 2315 Idxs[i + l] = i + l; 2316 } 2317 2318 Rep = Builder.CreateShuffleVector(Op0, Op0, Idxs); 2319 2320 if (CI->getNumArgOperands() == 4) 2321 Rep = EmitX86Select(Builder, CI->getArgOperand(3), Rep, 2322 CI->getArgOperand(2)); 2323 } else if (IsX86 && (Name == "sse2.pshufh.w" || 2324 Name.startswith("avx512.mask.pshufh.w."))) { 2325 Value *Op0 = CI->getArgOperand(0); 2326 unsigned Imm = cast<ConstantInt>(CI->getArgOperand(1))->getZExtValue(); 2327 unsigned NumElts = CI->getType()->getVectorNumElements(); 2328 2329 SmallVector<uint32_t, 16> Idxs(NumElts); 2330 for (unsigned l = 0; l != NumElts; l += 8) { 2331 for (unsigned i = 0; i != 4; ++i) 2332 Idxs[i + l] = i + l; 2333 for (unsigned i = 0; i != 4; ++i) 2334 Idxs[i + l + 4] = ((Imm >> (2 * i)) & 0x3) + 4 + l; 2335 } 2336 2337 Rep = Builder.CreateShuffleVector(Op0, Op0, Idxs); 2338 2339 if (CI->getNumArgOperands() == 4) 2340 Rep = EmitX86Select(Builder, CI->getArgOperand(3), Rep, 2341 CI->getArgOperand(2)); 2342 } else if (IsX86 && Name.startswith("avx512.mask.shuf.p")) { 2343 Value *Op0 = CI->getArgOperand(0); 2344 Value *Op1 = CI->getArgOperand(1); 2345 unsigned Imm = cast<ConstantInt>(CI->getArgOperand(2))->getZExtValue(); 2346 unsigned NumElts = CI->getType()->getVectorNumElements(); 2347 2348 unsigned NumLaneElts = 128/CI->getType()->getScalarSizeInBits(); 2349 unsigned HalfLaneElts = NumLaneElts / 2; 2350 2351 SmallVector<uint32_t, 16> Idxs(NumElts); 2352 for (unsigned i = 0; i != NumElts; ++i) { 2353 // Base index is the starting element of the lane. 2354 Idxs[i] = i - (i % NumLaneElts); 2355 // If we are half way through the lane switch to the other source. 2356 if ((i % NumLaneElts) >= HalfLaneElts) 2357 Idxs[i] += NumElts; 2358 // Now select the specific element. By adding HalfLaneElts bits from 2359 // the immediate. Wrapping around the immediate every 8-bits. 2360 Idxs[i] += (Imm >> ((i * HalfLaneElts) % 8)) & ((1 << HalfLaneElts) - 1); 2361 } 2362 2363 Rep = Builder.CreateShuffleVector(Op0, Op1, Idxs); 2364 2365 Rep = EmitX86Select(Builder, CI->getArgOperand(4), Rep, 2366 CI->getArgOperand(3)); 2367 } else if (IsX86 && (Name.startswith("avx512.mask.movddup") || 2368 Name.startswith("avx512.mask.movshdup") || 2369 Name.startswith("avx512.mask.movsldup"))) { 2370 Value *Op0 = CI->getArgOperand(0); 2371 unsigned NumElts = CI->getType()->getVectorNumElements(); 2372 unsigned NumLaneElts = 128/CI->getType()->getScalarSizeInBits(); 2373 2374 unsigned Offset = 0; 2375 if (Name.startswith("avx512.mask.movshdup.")) 2376 Offset = 1; 2377 2378 SmallVector<uint32_t, 16> Idxs(NumElts); 2379 for (unsigned l = 0; l != NumElts; l += NumLaneElts) 2380 for (unsigned i = 0; i != NumLaneElts; i += 2) { 2381 Idxs[i + l + 0] = i + l + Offset; 2382 Idxs[i + l + 1] = i + l + Offset; 2383 } 2384 2385 Rep = Builder.CreateShuffleVector(Op0, Op0, Idxs); 2386 2387 Rep = EmitX86Select(Builder, CI->getArgOperand(2), Rep, 2388 CI->getArgOperand(1)); 2389 } else if (IsX86 && (Name.startswith("avx512.mask.punpckl") || 2390 Name.startswith("avx512.mask.unpckl."))) { 2391 Value *Op0 = CI->getArgOperand(0); 2392 Value *Op1 = CI->getArgOperand(1); 2393 int NumElts = CI->getType()->getVectorNumElements(); 2394 int NumLaneElts = 128/CI->getType()->getScalarSizeInBits(); 2395 2396 SmallVector<uint32_t, 64> Idxs(NumElts); 2397 for (int l = 0; l != NumElts; l += NumLaneElts) 2398 for (int i = 0; i != NumLaneElts; ++i) 2399 Idxs[i + l] = l + (i / 2) + NumElts * (i % 2); 2400 2401 Rep = Builder.CreateShuffleVector(Op0, Op1, Idxs); 2402 2403 Rep = EmitX86Select(Builder, CI->getArgOperand(3), Rep, 2404 CI->getArgOperand(2)); 2405 } else if (IsX86 && (Name.startswith("avx512.mask.punpckh") || 2406 Name.startswith("avx512.mask.unpckh."))) { 2407 Value *Op0 = CI->getArgOperand(0); 2408 Value *Op1 = CI->getArgOperand(1); 2409 int NumElts = CI->getType()->getVectorNumElements(); 2410 int NumLaneElts = 128/CI->getType()->getScalarSizeInBits(); 2411 2412 SmallVector<uint32_t, 64> Idxs(NumElts); 2413 for (int l = 0; l != NumElts; l += NumLaneElts) 2414 for (int i = 0; i != NumLaneElts; ++i) 2415 Idxs[i + l] = (NumLaneElts / 2) + l + (i / 2) + NumElts * (i % 2); 2416 2417 Rep = Builder.CreateShuffleVector(Op0, Op1, Idxs); 2418 2419 Rep = EmitX86Select(Builder, CI->getArgOperand(3), Rep, 2420 CI->getArgOperand(2)); 2421 } else if (IsX86 && Name.startswith("avx512.mask.pand.")) { 2422 Rep = Builder.CreateAnd(CI->getArgOperand(0), CI->getArgOperand(1)); 2423 Rep = EmitX86Select(Builder, CI->getArgOperand(3), Rep, 2424 CI->getArgOperand(2)); 2425 } else if (IsX86 && Name.startswith("avx512.mask.pandn.")) { 2426 Rep = Builder.CreateAnd(Builder.CreateNot(CI->getArgOperand(0)), 2427 CI->getArgOperand(1)); 2428 Rep = EmitX86Select(Builder, CI->getArgOperand(3), Rep, 2429 CI->getArgOperand(2)); 2430 } else if (IsX86 && Name.startswith("avx512.mask.por.")) { 2431 Rep = Builder.CreateOr(CI->getArgOperand(0), CI->getArgOperand(1)); 2432 Rep = EmitX86Select(Builder, CI->getArgOperand(3), Rep, 2433 CI->getArgOperand(2)); 2434 } else if (IsX86 && Name.startswith("avx512.mask.pxor.")) { 2435 Rep = Builder.CreateXor(CI->getArgOperand(0), CI->getArgOperand(1)); 2436 Rep = EmitX86Select(Builder, CI->getArgOperand(3), Rep, 2437 CI->getArgOperand(2)); 2438 } else if (IsX86 && Name.startswith("avx512.mask.and.")) { 2439 VectorType *FTy = cast<VectorType>(CI->getType()); 2440 VectorType *ITy = VectorType::getInteger(FTy); 2441 Rep = Builder.CreateAnd(Builder.CreateBitCast(CI->getArgOperand(0), ITy), 2442 Builder.CreateBitCast(CI->getArgOperand(1), ITy)); 2443 Rep = Builder.CreateBitCast(Rep, FTy); 2444 Rep = EmitX86Select(Builder, CI->getArgOperand(3), Rep, 2445 CI->getArgOperand(2)); 2446 } else if (IsX86 && Name.startswith("avx512.mask.andn.")) { 2447 VectorType *FTy = cast<VectorType>(CI->getType()); 2448 VectorType *ITy = VectorType::getInteger(FTy); 2449 Rep = Builder.CreateNot(Builder.CreateBitCast(CI->getArgOperand(0), ITy)); 2450 Rep = Builder.CreateAnd(Rep, 2451 Builder.CreateBitCast(CI->getArgOperand(1), ITy)); 2452 Rep = Builder.CreateBitCast(Rep, FTy); 2453 Rep = EmitX86Select(Builder, CI->getArgOperand(3), Rep, 2454 CI->getArgOperand(2)); 2455 } else if (IsX86 && Name.startswith("avx512.mask.or.")) { 2456 VectorType *FTy = cast<VectorType>(CI->getType()); 2457 VectorType *ITy = VectorType::getInteger(FTy); 2458 Rep = Builder.CreateOr(Builder.CreateBitCast(CI->getArgOperand(0), ITy), 2459 Builder.CreateBitCast(CI->getArgOperand(1), ITy)); 2460 Rep = Builder.CreateBitCast(Rep, FTy); 2461 Rep = EmitX86Select(Builder, CI->getArgOperand(3), Rep, 2462 CI->getArgOperand(2)); 2463 } else if (IsX86 && Name.startswith("avx512.mask.xor.")) { 2464 VectorType *FTy = cast<VectorType>(CI->getType()); 2465 VectorType *ITy = VectorType::getInteger(FTy); 2466 Rep = Builder.CreateXor(Builder.CreateBitCast(CI->getArgOperand(0), ITy), 2467 Builder.CreateBitCast(CI->getArgOperand(1), ITy)); 2468 Rep = Builder.CreateBitCast(Rep, FTy); 2469 Rep = EmitX86Select(Builder, CI->getArgOperand(3), Rep, 2470 CI->getArgOperand(2)); 2471 } else if (IsX86 && Name.startswith("avx512.mask.padd.")) { 2472 Rep = Builder.CreateAdd(CI->getArgOperand(0), CI->getArgOperand(1)); 2473 Rep = EmitX86Select(Builder, CI->getArgOperand(3), Rep, 2474 CI->getArgOperand(2)); 2475 } else if (IsX86 && Name.startswith("avx512.mask.psub.")) { 2476 Rep = Builder.CreateSub(CI->getArgOperand(0), CI->getArgOperand(1)); 2477 Rep = EmitX86Select(Builder, CI->getArgOperand(3), Rep, 2478 CI->getArgOperand(2)); 2479 } else if (IsX86 && Name.startswith("avx512.mask.pmull.")) { 2480 Rep = Builder.CreateMul(CI->getArgOperand(0), CI->getArgOperand(1)); 2481 Rep = EmitX86Select(Builder, CI->getArgOperand(3), Rep, 2482 CI->getArgOperand(2)); 2483 } else if (IsX86 && Name.startswith("avx512.mask.add.p")) { 2484 if (Name.endswith(".512")) { 2485 Intrinsic::ID IID; 2486 if (Name[17] == 's') 2487 IID = Intrinsic::x86_avx512_add_ps_512; 2488 else 2489 IID = Intrinsic::x86_avx512_add_pd_512; 2490 2491 Rep = Builder.CreateCall(Intrinsic::getDeclaration(F->getParent(), IID), 2492 { CI->getArgOperand(0), CI->getArgOperand(1), 2493 CI->getArgOperand(4) }); 2494 } else { 2495 Rep = Builder.CreateFAdd(CI->getArgOperand(0), CI->getArgOperand(1)); 2496 } 2497 Rep = EmitX86Select(Builder, CI->getArgOperand(3), Rep, 2498 CI->getArgOperand(2)); 2499 } else if (IsX86 && Name.startswith("avx512.mask.div.p")) { 2500 if (Name.endswith(".512")) { 2501 Intrinsic::ID IID; 2502 if (Name[17] == 's') 2503 IID = Intrinsic::x86_avx512_div_ps_512; 2504 else 2505 IID = Intrinsic::x86_avx512_div_pd_512; 2506 2507 Rep = Builder.CreateCall(Intrinsic::getDeclaration(F->getParent(), IID), 2508 { CI->getArgOperand(0), CI->getArgOperand(1), 2509 CI->getArgOperand(4) }); 2510 } else { 2511 Rep = Builder.CreateFDiv(CI->getArgOperand(0), CI->getArgOperand(1)); 2512 } 2513 Rep = EmitX86Select(Builder, CI->getArgOperand(3), Rep, 2514 CI->getArgOperand(2)); 2515 } else if (IsX86 && Name.startswith("avx512.mask.mul.p")) { 2516 if (Name.endswith(".512")) { 2517 Intrinsic::ID IID; 2518 if (Name[17] == 's') 2519 IID = Intrinsic::x86_avx512_mul_ps_512; 2520 else 2521 IID = Intrinsic::x86_avx512_mul_pd_512; 2522 2523 Rep = Builder.CreateCall(Intrinsic::getDeclaration(F->getParent(), IID), 2524 { CI->getArgOperand(0), CI->getArgOperand(1), 2525 CI->getArgOperand(4) }); 2526 } else { 2527 Rep = Builder.CreateFMul(CI->getArgOperand(0), CI->getArgOperand(1)); 2528 } 2529 Rep = EmitX86Select(Builder, CI->getArgOperand(3), Rep, 2530 CI->getArgOperand(2)); 2531 } else if (IsX86 && Name.startswith("avx512.mask.sub.p")) { 2532 if (Name.endswith(".512")) { 2533 Intrinsic::ID IID; 2534 if (Name[17] == 's') 2535 IID = Intrinsic::x86_avx512_sub_ps_512; 2536 else 2537 IID = Intrinsic::x86_avx512_sub_pd_512; 2538 2539 Rep = Builder.CreateCall(Intrinsic::getDeclaration(F->getParent(), IID), 2540 { CI->getArgOperand(0), CI->getArgOperand(1), 2541 CI->getArgOperand(4) }); 2542 } else { 2543 Rep = Builder.CreateFSub(CI->getArgOperand(0), CI->getArgOperand(1)); 2544 } 2545 Rep = EmitX86Select(Builder, CI->getArgOperand(3), Rep, 2546 CI->getArgOperand(2)); 2547 } else if (IsX86 && Name.startswith("avx512.mask.max.p") && 2548 Name.drop_front(18) == ".512") { 2549 Intrinsic::ID IID; 2550 if (Name[17] == 's') 2551 IID = Intrinsic::x86_avx512_max_ps_512; 2552 else 2553 IID = Intrinsic::x86_avx512_max_pd_512; 2554 2555 Rep = Builder.CreateCall(Intrinsic::getDeclaration(F->getParent(), IID), 2556 { CI->getArgOperand(0), CI->getArgOperand(1), 2557 CI->getArgOperand(4) }); 2558 Rep = EmitX86Select(Builder, CI->getArgOperand(3), Rep, 2559 CI->getArgOperand(2)); 2560 } else if (IsX86 && Name.startswith("avx512.mask.min.p") && 2561 Name.drop_front(18) == ".512") { 2562 Intrinsic::ID IID; 2563 if (Name[17] == 's') 2564 IID = Intrinsic::x86_avx512_min_ps_512; 2565 else 2566 IID = Intrinsic::x86_avx512_min_pd_512; 2567 2568 Rep = Builder.CreateCall(Intrinsic::getDeclaration(F->getParent(), IID), 2569 { CI->getArgOperand(0), CI->getArgOperand(1), 2570 CI->getArgOperand(4) }); 2571 Rep = EmitX86Select(Builder, CI->getArgOperand(3), Rep, 2572 CI->getArgOperand(2)); 2573 } else if (IsX86 && Name.startswith("avx512.mask.lzcnt.")) { 2574 Rep = Builder.CreateCall(Intrinsic::getDeclaration(F->getParent(), 2575 Intrinsic::ctlz, 2576 CI->getType()), 2577 { CI->getArgOperand(0), Builder.getInt1(false) }); 2578 Rep = EmitX86Select(Builder, CI->getArgOperand(2), Rep, 2579 CI->getArgOperand(1)); 2580 } else if (IsX86 && Name.startswith("avx512.mask.psll")) { 2581 bool IsImmediate = Name[16] == 'i' || 2582 (Name.size() > 18 && Name[18] == 'i'); 2583 bool IsVariable = Name[16] == 'v'; 2584 char Size = Name[16] == '.' ? Name[17] : 2585 Name[17] == '.' ? Name[18] : 2586 Name[18] == '.' ? Name[19] : 2587 Name[20]; 2588 2589 Intrinsic::ID IID; 2590 if (IsVariable && Name[17] != '.') { 2591 if (Size == 'd' && Name[17] == '2') // avx512.mask.psllv2.di 2592 IID = Intrinsic::x86_avx2_psllv_q; 2593 else if (Size == 'd' && Name[17] == '4') // avx512.mask.psllv4.di 2594 IID = Intrinsic::x86_avx2_psllv_q_256; 2595 else if (Size == 's' && Name[17] == '4') // avx512.mask.psllv4.si 2596 IID = Intrinsic::x86_avx2_psllv_d; 2597 else if (Size == 's' && Name[17] == '8') // avx512.mask.psllv8.si 2598 IID = Intrinsic::x86_avx2_psllv_d_256; 2599 else if (Size == 'h' && Name[17] == '8') // avx512.mask.psllv8.hi 2600 IID = Intrinsic::x86_avx512_psllv_w_128; 2601 else if (Size == 'h' && Name[17] == '1') // avx512.mask.psllv16.hi 2602 IID = Intrinsic::x86_avx512_psllv_w_256; 2603 else if (Name[17] == '3' && Name[18] == '2') // avx512.mask.psllv32hi 2604 IID = Intrinsic::x86_avx512_psllv_w_512; 2605 else 2606 llvm_unreachable("Unexpected size"); 2607 } else if (Name.endswith(".128")) { 2608 if (Size == 'd') // avx512.mask.psll.d.128, avx512.mask.psll.di.128 2609 IID = IsImmediate ? Intrinsic::x86_sse2_pslli_d 2610 : Intrinsic::x86_sse2_psll_d; 2611 else if (Size == 'q') // avx512.mask.psll.q.128, avx512.mask.psll.qi.128 2612 IID = IsImmediate ? Intrinsic::x86_sse2_pslli_q 2613 : Intrinsic::x86_sse2_psll_q; 2614 else if (Size == 'w') // avx512.mask.psll.w.128, avx512.mask.psll.wi.128 2615 IID = IsImmediate ? Intrinsic::x86_sse2_pslli_w 2616 : Intrinsic::x86_sse2_psll_w; 2617 else 2618 llvm_unreachable("Unexpected size"); 2619 } else if (Name.endswith(".256")) { 2620 if (Size == 'd') // avx512.mask.psll.d.256, avx512.mask.psll.di.256 2621 IID = IsImmediate ? Intrinsic::x86_avx2_pslli_d 2622 : Intrinsic::x86_avx2_psll_d; 2623 else if (Size == 'q') // avx512.mask.psll.q.256, avx512.mask.psll.qi.256 2624 IID = IsImmediate ? Intrinsic::x86_avx2_pslli_q 2625 : Intrinsic::x86_avx2_psll_q; 2626 else if (Size == 'w') // avx512.mask.psll.w.256, avx512.mask.psll.wi.256 2627 IID = IsImmediate ? Intrinsic::x86_avx2_pslli_w 2628 : Intrinsic::x86_avx2_psll_w; 2629 else 2630 llvm_unreachable("Unexpected size"); 2631 } else { 2632 if (Size == 'd') // psll.di.512, pslli.d, psll.d, psllv.d.512 2633 IID = IsImmediate ? Intrinsic::x86_avx512_pslli_d_512 : 2634 IsVariable ? Intrinsic::x86_avx512_psllv_d_512 : 2635 Intrinsic::x86_avx512_psll_d_512; 2636 else if (Size == 'q') // psll.qi.512, pslli.q, psll.q, psllv.q.512 2637 IID = IsImmediate ? Intrinsic::x86_avx512_pslli_q_512 : 2638 IsVariable ? Intrinsic::x86_avx512_psllv_q_512 : 2639 Intrinsic::x86_avx512_psll_q_512; 2640 else if (Size == 'w') // psll.wi.512, pslli.w, psll.w 2641 IID = IsImmediate ? Intrinsic::x86_avx512_pslli_w_512 2642 : Intrinsic::x86_avx512_psll_w_512; 2643 else 2644 llvm_unreachable("Unexpected size"); 2645 } 2646 2647 Rep = UpgradeX86MaskedShift(Builder, *CI, IID); 2648 } else if (IsX86 && Name.startswith("avx512.mask.psrl")) { 2649 bool IsImmediate = Name[16] == 'i' || 2650 (Name.size() > 18 && Name[18] == 'i'); 2651 bool IsVariable = Name[16] == 'v'; 2652 char Size = Name[16] == '.' ? Name[17] : 2653 Name[17] == '.' ? Name[18] : 2654 Name[18] == '.' ? Name[19] : 2655 Name[20]; 2656 2657 Intrinsic::ID IID; 2658 if (IsVariable && Name[17] != '.') { 2659 if (Size == 'd' && Name[17] == '2') // avx512.mask.psrlv2.di 2660 IID = Intrinsic::x86_avx2_psrlv_q; 2661 else if (Size == 'd' && Name[17] == '4') // avx512.mask.psrlv4.di 2662 IID = Intrinsic::x86_avx2_psrlv_q_256; 2663 else if (Size == 's' && Name[17] == '4') // avx512.mask.psrlv4.si 2664 IID = Intrinsic::x86_avx2_psrlv_d; 2665 else if (Size == 's' && Name[17] == '8') // avx512.mask.psrlv8.si 2666 IID = Intrinsic::x86_avx2_psrlv_d_256; 2667 else if (Size == 'h' && Name[17] == '8') // avx512.mask.psrlv8.hi 2668 IID = Intrinsic::x86_avx512_psrlv_w_128; 2669 else if (Size == 'h' && Name[17] == '1') // avx512.mask.psrlv16.hi 2670 IID = Intrinsic::x86_avx512_psrlv_w_256; 2671 else if (Name[17] == '3' && Name[18] == '2') // avx512.mask.psrlv32hi 2672 IID = Intrinsic::x86_avx512_psrlv_w_512; 2673 else 2674 llvm_unreachable("Unexpected size"); 2675 } else if (Name.endswith(".128")) { 2676 if (Size == 'd') // avx512.mask.psrl.d.128, avx512.mask.psrl.di.128 2677 IID = IsImmediate ? Intrinsic::x86_sse2_psrli_d 2678 : Intrinsic::x86_sse2_psrl_d; 2679 else if (Size == 'q') // avx512.mask.psrl.q.128, avx512.mask.psrl.qi.128 2680 IID = IsImmediate ? Intrinsic::x86_sse2_psrli_q 2681 : Intrinsic::x86_sse2_psrl_q; 2682 else if (Size == 'w') // avx512.mask.psrl.w.128, avx512.mask.psrl.wi.128 2683 IID = IsImmediate ? Intrinsic::x86_sse2_psrli_w 2684 : Intrinsic::x86_sse2_psrl_w; 2685 else 2686 llvm_unreachable("Unexpected size"); 2687 } else if (Name.endswith(".256")) { 2688 if (Size == 'd') // avx512.mask.psrl.d.256, avx512.mask.psrl.di.256 2689 IID = IsImmediate ? Intrinsic::x86_avx2_psrli_d 2690 : Intrinsic::x86_avx2_psrl_d; 2691 else if (Size == 'q') // avx512.mask.psrl.q.256, avx512.mask.psrl.qi.256 2692 IID = IsImmediate ? Intrinsic::x86_avx2_psrli_q 2693 : Intrinsic::x86_avx2_psrl_q; 2694 else if (Size == 'w') // avx512.mask.psrl.w.256, avx512.mask.psrl.wi.256 2695 IID = IsImmediate ? Intrinsic::x86_avx2_psrli_w 2696 : Intrinsic::x86_avx2_psrl_w; 2697 else 2698 llvm_unreachable("Unexpected size"); 2699 } else { 2700 if (Size == 'd') // psrl.di.512, psrli.d, psrl.d, psrl.d.512 2701 IID = IsImmediate ? Intrinsic::x86_avx512_psrli_d_512 : 2702 IsVariable ? Intrinsic::x86_avx512_psrlv_d_512 : 2703 Intrinsic::x86_avx512_psrl_d_512; 2704 else if (Size == 'q') // psrl.qi.512, psrli.q, psrl.q, psrl.q.512 2705 IID = IsImmediate ? Intrinsic::x86_avx512_psrli_q_512 : 2706 IsVariable ? Intrinsic::x86_avx512_psrlv_q_512 : 2707 Intrinsic::x86_avx512_psrl_q_512; 2708 else if (Size == 'w') // psrl.wi.512, psrli.w, psrl.w) 2709 IID = IsImmediate ? Intrinsic::x86_avx512_psrli_w_512 2710 : Intrinsic::x86_avx512_psrl_w_512; 2711 else 2712 llvm_unreachable("Unexpected size"); 2713 } 2714 2715 Rep = UpgradeX86MaskedShift(Builder, *CI, IID); 2716 } else if (IsX86 && Name.startswith("avx512.mask.psra")) { 2717 bool IsImmediate = Name[16] == 'i' || 2718 (Name.size() > 18 && Name[18] == 'i'); 2719 bool IsVariable = Name[16] == 'v'; 2720 char Size = Name[16] == '.' ? Name[17] : 2721 Name[17] == '.' ? Name[18] : 2722 Name[18] == '.' ? Name[19] : 2723 Name[20]; 2724 2725 Intrinsic::ID IID; 2726 if (IsVariable && Name[17] != '.') { 2727 if (Size == 's' && Name[17] == '4') // avx512.mask.psrav4.si 2728 IID = Intrinsic::x86_avx2_psrav_d; 2729 else if (Size == 's' && Name[17] == '8') // avx512.mask.psrav8.si 2730 IID = Intrinsic::x86_avx2_psrav_d_256; 2731 else if (Size == 'h' && Name[17] == '8') // avx512.mask.psrav8.hi 2732 IID = Intrinsic::x86_avx512_psrav_w_128; 2733 else if (Size == 'h' && Name[17] == '1') // avx512.mask.psrav16.hi 2734 IID = Intrinsic::x86_avx512_psrav_w_256; 2735 else if (Name[17] == '3' && Name[18] == '2') // avx512.mask.psrav32hi 2736 IID = Intrinsic::x86_avx512_psrav_w_512; 2737 else 2738 llvm_unreachable("Unexpected size"); 2739 } else if (Name.endswith(".128")) { 2740 if (Size == 'd') // avx512.mask.psra.d.128, avx512.mask.psra.di.128 2741 IID = IsImmediate ? Intrinsic::x86_sse2_psrai_d 2742 : Intrinsic::x86_sse2_psra_d; 2743 else if (Size == 'q') // avx512.mask.psra.q.128, avx512.mask.psra.qi.128 2744 IID = IsImmediate ? Intrinsic::x86_avx512_psrai_q_128 : 2745 IsVariable ? Intrinsic::x86_avx512_psrav_q_128 : 2746 Intrinsic::x86_avx512_psra_q_128; 2747 else if (Size == 'w') // avx512.mask.psra.w.128, avx512.mask.psra.wi.128 2748 IID = IsImmediate ? Intrinsic::x86_sse2_psrai_w 2749 : Intrinsic::x86_sse2_psra_w; 2750 else 2751 llvm_unreachable("Unexpected size"); 2752 } else if (Name.endswith(".256")) { 2753 if (Size == 'd') // avx512.mask.psra.d.256, avx512.mask.psra.di.256 2754 IID = IsImmediate ? Intrinsic::x86_avx2_psrai_d 2755 : Intrinsic::x86_avx2_psra_d; 2756 else if (Size == 'q') // avx512.mask.psra.q.256, avx512.mask.psra.qi.256 2757 IID = IsImmediate ? Intrinsic::x86_avx512_psrai_q_256 : 2758 IsVariable ? Intrinsic::x86_avx512_psrav_q_256 : 2759 Intrinsic::x86_avx512_psra_q_256; 2760 else if (Size == 'w') // avx512.mask.psra.w.256, avx512.mask.psra.wi.256 2761 IID = IsImmediate ? Intrinsic::x86_avx2_psrai_w 2762 : Intrinsic::x86_avx2_psra_w; 2763 else 2764 llvm_unreachable("Unexpected size"); 2765 } else { 2766 if (Size == 'd') // psra.di.512, psrai.d, psra.d, psrav.d.512 2767 IID = IsImmediate ? Intrinsic::x86_avx512_psrai_d_512 : 2768 IsVariable ? Intrinsic::x86_avx512_psrav_d_512 : 2769 Intrinsic::x86_avx512_psra_d_512; 2770 else if (Size == 'q') // psra.qi.512, psrai.q, psra.q 2771 IID = IsImmediate ? Intrinsic::x86_avx512_psrai_q_512 : 2772 IsVariable ? Intrinsic::x86_avx512_psrav_q_512 : 2773 Intrinsic::x86_avx512_psra_q_512; 2774 else if (Size == 'w') // psra.wi.512, psrai.w, psra.w 2775 IID = IsImmediate ? Intrinsic::x86_avx512_psrai_w_512 2776 : Intrinsic::x86_avx512_psra_w_512; 2777 else 2778 llvm_unreachable("Unexpected size"); 2779 } 2780 2781 Rep = UpgradeX86MaskedShift(Builder, *CI, IID); 2782 } else if (IsX86 && Name.startswith("avx512.mask.move.s")) { 2783 Rep = upgradeMaskedMove(Builder, *CI); 2784 } else if (IsX86 && Name.startswith("avx512.cvtmask2")) { 2785 Rep = UpgradeMaskToInt(Builder, *CI); 2786 } else if (IsX86 && Name.endswith(".movntdqa")) { 2787 Module *M = F->getParent(); 2788 MDNode *Node = MDNode::get( 2789 C, ConstantAsMetadata::get(ConstantInt::get(Type::getInt32Ty(C), 1))); 2790 2791 Value *Ptr = CI->getArgOperand(0); 2792 VectorType *VTy = cast<VectorType>(CI->getType()); 2793 2794 // Convert the type of the pointer to a pointer to the stored type. 2795 Value *BC = 2796 Builder.CreateBitCast(Ptr, PointerType::getUnqual(VTy), "cast"); 2797 LoadInst *LI = Builder.CreateAlignedLoad(BC, VTy->getBitWidth() / 8); 2798 LI->setMetadata(M->getMDKindID("nontemporal"), Node); 2799 Rep = LI; 2800 } else if (IsX86 && 2801 (Name.startswith("sse2.pavg") || Name.startswith("avx2.pavg") || 2802 Name.startswith("avx512.mask.pavg"))) { 2803 // llvm.x86.sse2.pavg.b/w, llvm.x86.avx2.pavg.b/w, 2804 // llvm.x86.avx512.mask.pavg.b/w 2805 Value *A = CI->getArgOperand(0); 2806 Value *B = CI->getArgOperand(1); 2807 VectorType *ZextType = VectorType::getExtendedElementVectorType( 2808 cast<VectorType>(A->getType())); 2809 Value *ExtendedA = Builder.CreateZExt(A, ZextType); 2810 Value *ExtendedB = Builder.CreateZExt(B, ZextType); 2811 Value *Sum = Builder.CreateAdd(ExtendedA, ExtendedB); 2812 Value *AddOne = Builder.CreateAdd(Sum, ConstantInt::get(ZextType, 1)); 2813 Value *ShiftR = Builder.CreateLShr(AddOne, ConstantInt::get(ZextType, 1)); 2814 Rep = Builder.CreateTrunc(ShiftR, A->getType()); 2815 if (CI->getNumArgOperands() > 2) { 2816 Rep = EmitX86Select(Builder, CI->getArgOperand(3), Rep, 2817 CI->getArgOperand(2)); 2818 } 2819 } else if (IsX86 && (Name.startswith("fma.vfmadd.") || 2820 Name.startswith("fma.vfmsub.") || 2821 Name.startswith("fma.vfnmadd.") || 2822 Name.startswith("fma.vfnmsub."))) { 2823 bool NegMul = Name[6] == 'n'; 2824 bool NegAcc = NegMul ? Name[8] == 's' : Name[7] == 's'; 2825 bool IsScalar = NegMul ? Name[12] == 's' : Name[11] == 's'; 2826 2827 Value *Ops[] = { CI->getArgOperand(0), CI->getArgOperand(1), 2828 CI->getArgOperand(2) }; 2829 2830 if (IsScalar) { 2831 Ops[0] = Builder.CreateExtractElement(Ops[0], (uint64_t)0); 2832 Ops[1] = Builder.CreateExtractElement(Ops[1], (uint64_t)0); 2833 Ops[2] = Builder.CreateExtractElement(Ops[2], (uint64_t)0); 2834 } 2835 2836 if (NegMul && !IsScalar) 2837 Ops[0] = Builder.CreateFNeg(Ops[0]); 2838 if (NegMul && IsScalar) 2839 Ops[1] = Builder.CreateFNeg(Ops[1]); 2840 if (NegAcc) 2841 Ops[2] = Builder.CreateFNeg(Ops[2]); 2842 2843 Rep = Builder.CreateCall(Intrinsic::getDeclaration(CI->getModule(), 2844 Intrinsic::fma, 2845 Ops[0]->getType()), 2846 Ops); 2847 2848 if (IsScalar) 2849 Rep = Builder.CreateInsertElement(CI->getArgOperand(0), Rep, 2850 (uint64_t)0); 2851 } else if (IsX86 && Name.startswith("fma4.vfmadd.s")) { 2852 Value *Ops[] = { CI->getArgOperand(0), CI->getArgOperand(1), 2853 CI->getArgOperand(2) }; 2854 2855 Ops[0] = Builder.CreateExtractElement(Ops[0], (uint64_t)0); 2856 Ops[1] = Builder.CreateExtractElement(Ops[1], (uint64_t)0); 2857 Ops[2] = Builder.CreateExtractElement(Ops[2], (uint64_t)0); 2858 2859 Rep = Builder.CreateCall(Intrinsic::getDeclaration(CI->getModule(), 2860 Intrinsic::fma, 2861 Ops[0]->getType()), 2862 Ops); 2863 2864 Rep = Builder.CreateInsertElement(Constant::getNullValue(CI->getType()), 2865 Rep, (uint64_t)0); 2866 } else if (IsX86 && (Name.startswith("avx512.mask.vfmadd.s") || 2867 Name.startswith("avx512.maskz.vfmadd.s") || 2868 Name.startswith("avx512.mask3.vfmadd.s") || 2869 Name.startswith("avx512.mask3.vfmsub.s") || 2870 Name.startswith("avx512.mask3.vfnmsub.s"))) { 2871 bool IsMask3 = Name[11] == '3'; 2872 bool IsMaskZ = Name[11] == 'z'; 2873 // Drop the "avx512.mask." to make it easier. 2874 Name = Name.drop_front(IsMask3 || IsMaskZ ? 13 : 12); 2875 bool NegMul = Name[2] == 'n'; 2876 bool NegAcc = NegMul ? Name[4] == 's' : Name[3] == 's'; 2877 2878 Value *A = CI->getArgOperand(0); 2879 Value *B = CI->getArgOperand(1); 2880 Value *C = CI->getArgOperand(2); 2881 2882 if (NegMul && (IsMask3 || IsMaskZ)) 2883 A = Builder.CreateFNeg(A); 2884 if (NegMul && !(IsMask3 || IsMaskZ)) 2885 B = Builder.CreateFNeg(B); 2886 if (NegAcc) 2887 C = Builder.CreateFNeg(C); 2888 2889 A = Builder.CreateExtractElement(A, (uint64_t)0); 2890 B = Builder.CreateExtractElement(B, (uint64_t)0); 2891 C = Builder.CreateExtractElement(C, (uint64_t)0); 2892 2893 if (!isa<ConstantInt>(CI->getArgOperand(4)) || 2894 cast<ConstantInt>(CI->getArgOperand(4))->getZExtValue() != 4) { 2895 Value *Ops[] = { A, B, C, CI->getArgOperand(4) }; 2896 2897 Intrinsic::ID IID; 2898 if (Name.back() == 'd') 2899 IID = Intrinsic::x86_avx512_vfmadd_f64; 2900 else 2901 IID = Intrinsic::x86_avx512_vfmadd_f32; 2902 Function *FMA = Intrinsic::getDeclaration(CI->getModule(), IID); 2903 Rep = Builder.CreateCall(FMA, Ops); 2904 } else { 2905 Function *FMA = Intrinsic::getDeclaration(CI->getModule(), 2906 Intrinsic::fma, 2907 A->getType()); 2908 Rep = Builder.CreateCall(FMA, { A, B, C }); 2909 } 2910 2911 Value *PassThru = IsMaskZ ? Constant::getNullValue(Rep->getType()) : 2912 IsMask3 ? C : A; 2913 2914 // For Mask3 with NegAcc, we need to create a new extractelement that 2915 // avoids the negation above. 2916 if (NegAcc && IsMask3) 2917 PassThru = Builder.CreateExtractElement(CI->getArgOperand(2), 2918 (uint64_t)0); 2919 2920 Rep = EmitX86ScalarSelect(Builder, CI->getArgOperand(3), 2921 Rep, PassThru); 2922 Rep = Builder.CreateInsertElement(CI->getArgOperand(IsMask3 ? 2 : 0), 2923 Rep, (uint64_t)0); 2924 } else if (IsX86 && (Name.startswith("avx512.mask.vfmadd.p") || 2925 Name.startswith("avx512.mask.vfnmadd.p") || 2926 Name.startswith("avx512.mask.vfnmsub.p") || 2927 Name.startswith("avx512.mask3.vfmadd.p") || 2928 Name.startswith("avx512.mask3.vfmsub.p") || 2929 Name.startswith("avx512.mask3.vfnmsub.p") || 2930 Name.startswith("avx512.maskz.vfmadd.p"))) { 2931 bool IsMask3 = Name[11] == '3'; 2932 bool IsMaskZ = Name[11] == 'z'; 2933 // Drop the "avx512.mask." to make it easier. 2934 Name = Name.drop_front(IsMask3 || IsMaskZ ? 13 : 12); 2935 bool NegMul = Name[2] == 'n'; 2936 bool NegAcc = NegMul ? Name[4] == 's' : Name[3] == 's'; 2937 2938 Value *A = CI->getArgOperand(0); 2939 Value *B = CI->getArgOperand(1); 2940 Value *C = CI->getArgOperand(2); 2941 2942 if (NegMul && (IsMask3 || IsMaskZ)) 2943 A = Builder.CreateFNeg(A); 2944 if (NegMul && !(IsMask3 || IsMaskZ)) 2945 B = Builder.CreateFNeg(B); 2946 if (NegAcc) 2947 C = Builder.CreateFNeg(C); 2948 2949 if (CI->getNumArgOperands() == 5 && 2950 (!isa<ConstantInt>(CI->getArgOperand(4)) || 2951 cast<ConstantInt>(CI->getArgOperand(4))->getZExtValue() != 4)) { 2952 Intrinsic::ID IID; 2953 // Check the character before ".512" in string. 2954 if (Name[Name.size()-5] == 's') 2955 IID = Intrinsic::x86_avx512_vfmadd_ps_512; 2956 else 2957 IID = Intrinsic::x86_avx512_vfmadd_pd_512; 2958 2959 Rep = Builder.CreateCall(Intrinsic::getDeclaration(F->getParent(), IID), 2960 { A, B, C, CI->getArgOperand(4) }); 2961 } else { 2962 Function *FMA = Intrinsic::getDeclaration(CI->getModule(), 2963 Intrinsic::fma, 2964 A->getType()); 2965 Rep = Builder.CreateCall(FMA, { A, B, C }); 2966 } 2967 2968 Value *PassThru = IsMaskZ ? llvm::Constant::getNullValue(CI->getType()) : 2969 IsMask3 ? CI->getArgOperand(2) : 2970 CI->getArgOperand(0); 2971 2972 Rep = EmitX86Select(Builder, CI->getArgOperand(3), Rep, PassThru); 2973 } else if (IsX86 && (Name.startswith("fma.vfmaddsub.p") || 2974 Name.startswith("fma.vfmsubadd.p"))) { 2975 bool IsSubAdd = Name[7] == 's'; 2976 int NumElts = CI->getType()->getVectorNumElements(); 2977 2978 Value *Ops[] = { CI->getArgOperand(0), CI->getArgOperand(1), 2979 CI->getArgOperand(2) }; 2980 2981 Function *FMA = Intrinsic::getDeclaration(CI->getModule(), Intrinsic::fma, 2982 Ops[0]->getType()); 2983 Value *Odd = Builder.CreateCall(FMA, Ops); 2984 Ops[2] = Builder.CreateFNeg(Ops[2]); 2985 Value *Even = Builder.CreateCall(FMA, Ops); 2986 2987 if (IsSubAdd) 2988 std::swap(Even, Odd); 2989 2990 SmallVector<uint32_t, 32> Idxs(NumElts); 2991 for (int i = 0; i != NumElts; ++i) 2992 Idxs[i] = i + (i % 2) * NumElts; 2993 2994 Rep = Builder.CreateShuffleVector(Even, Odd, Idxs); 2995 } else if (IsX86 && (Name.startswith("avx512.mask.vfmaddsub.p") || 2996 Name.startswith("avx512.mask3.vfmaddsub.p") || 2997 Name.startswith("avx512.maskz.vfmaddsub.p") || 2998 Name.startswith("avx512.mask3.vfmsubadd.p"))) { 2999 bool IsMask3 = Name[11] == '3'; 3000 bool IsMaskZ = Name[11] == 'z'; 3001 // Drop the "avx512.mask." to make it easier. 3002 Name = Name.drop_front(IsMask3 || IsMaskZ ? 13 : 12); 3003 bool IsSubAdd = Name[3] == 's'; 3004 if (CI->getNumArgOperands() == 5 && 3005 (!isa<ConstantInt>(CI->getArgOperand(4)) || 3006 cast<ConstantInt>(CI->getArgOperand(4))->getZExtValue() != 4)) { 3007 Intrinsic::ID IID; 3008 // Check the character before ".512" in string. 3009 if (Name[Name.size()-5] == 's') 3010 IID = Intrinsic::x86_avx512_vfmaddsub_ps_512; 3011 else 3012 IID = Intrinsic::x86_avx512_vfmaddsub_pd_512; 3013 3014 Value *Ops[] = { CI->getArgOperand(0), CI->getArgOperand(1), 3015 CI->getArgOperand(2), CI->getArgOperand(4) }; 3016 if (IsSubAdd) 3017 Ops[2] = Builder.CreateFNeg(Ops[2]); 3018 3019 Rep = Builder.CreateCall(Intrinsic::getDeclaration(F->getParent(), IID), 3020 {CI->getArgOperand(0), CI->getArgOperand(1), 3021 CI->getArgOperand(2), CI->getArgOperand(4)}); 3022 } else { 3023 int NumElts = CI->getType()->getVectorNumElements(); 3024 3025 Value *Ops[] = { CI->getArgOperand(0), CI->getArgOperand(1), 3026 CI->getArgOperand(2) }; 3027 3028 Function *FMA = Intrinsic::getDeclaration(CI->getModule(), Intrinsic::fma, 3029 Ops[0]->getType()); 3030 Value *Odd = Builder.CreateCall(FMA, Ops); 3031 Ops[2] = Builder.CreateFNeg(Ops[2]); 3032 Value *Even = Builder.CreateCall(FMA, Ops); 3033 3034 if (IsSubAdd) 3035 std::swap(Even, Odd); 3036 3037 SmallVector<uint32_t, 32> Idxs(NumElts); 3038 for (int i = 0; i != NumElts; ++i) 3039 Idxs[i] = i + (i % 2) * NumElts; 3040 3041 Rep = Builder.CreateShuffleVector(Even, Odd, Idxs); 3042 } 3043 3044 Value *PassThru = IsMaskZ ? llvm::Constant::getNullValue(CI->getType()) : 3045 IsMask3 ? CI->getArgOperand(2) : 3046 CI->getArgOperand(0); 3047 3048 Rep = EmitX86Select(Builder, CI->getArgOperand(3), Rep, PassThru); 3049 } else if (IsX86 && (Name.startswith("avx512.mask.pternlog.") || 3050 Name.startswith("avx512.maskz.pternlog."))) { 3051 bool ZeroMask = Name[11] == 'z'; 3052 unsigned VecWidth = CI->getType()->getPrimitiveSizeInBits(); 3053 unsigned EltWidth = CI->getType()->getScalarSizeInBits(); 3054 Intrinsic::ID IID; 3055 if (VecWidth == 128 && EltWidth == 32) 3056 IID = Intrinsic::x86_avx512_pternlog_d_128; 3057 else if (VecWidth == 256 && EltWidth == 32) 3058 IID = Intrinsic::x86_avx512_pternlog_d_256; 3059 else if (VecWidth == 512 && EltWidth == 32) 3060 IID = Intrinsic::x86_avx512_pternlog_d_512; 3061 else if (VecWidth == 128 && EltWidth == 64) 3062 IID = Intrinsic::x86_avx512_pternlog_q_128; 3063 else if (VecWidth == 256 && EltWidth == 64) 3064 IID = Intrinsic::x86_avx512_pternlog_q_256; 3065 else if (VecWidth == 512 && EltWidth == 64) 3066 IID = Intrinsic::x86_avx512_pternlog_q_512; 3067 else 3068 llvm_unreachable("Unexpected intrinsic"); 3069 3070 Value *Args[] = { CI->getArgOperand(0) , CI->getArgOperand(1), 3071 CI->getArgOperand(2), CI->getArgOperand(3) }; 3072 Rep = Builder.CreateCall(Intrinsic::getDeclaration(CI->getModule(), IID), 3073 Args); 3074 Value *PassThru = ZeroMask ? ConstantAggregateZero::get(CI->getType()) 3075 : CI->getArgOperand(0); 3076 Rep = EmitX86Select(Builder, CI->getArgOperand(4), Rep, PassThru); 3077 } else if (IsX86 && (Name.startswith("avx512.mask.vpmadd52") || 3078 Name.startswith("avx512.maskz.vpmadd52"))) { 3079 bool ZeroMask = Name[11] == 'z'; 3080 bool High = Name[20] == 'h' || Name[21] == 'h'; 3081 unsigned VecWidth = CI->getType()->getPrimitiveSizeInBits(); 3082 Intrinsic::ID IID; 3083 if (VecWidth == 128 && !High) 3084 IID = Intrinsic::x86_avx512_vpmadd52l_uq_128; 3085 else if (VecWidth == 256 && !High) 3086 IID = Intrinsic::x86_avx512_vpmadd52l_uq_256; 3087 else if (VecWidth == 512 && !High) 3088 IID = Intrinsic::x86_avx512_vpmadd52l_uq_512; 3089 else if (VecWidth == 128 && High) 3090 IID = Intrinsic::x86_avx512_vpmadd52h_uq_128; 3091 else if (VecWidth == 256 && High) 3092 IID = Intrinsic::x86_avx512_vpmadd52h_uq_256; 3093 else if (VecWidth == 512 && High) 3094 IID = Intrinsic::x86_avx512_vpmadd52h_uq_512; 3095 else 3096 llvm_unreachable("Unexpected intrinsic"); 3097 3098 Value *Args[] = { CI->getArgOperand(0) , CI->getArgOperand(1), 3099 CI->getArgOperand(2) }; 3100 Rep = Builder.CreateCall(Intrinsic::getDeclaration(CI->getModule(), IID), 3101 Args); 3102 Value *PassThru = ZeroMask ? ConstantAggregateZero::get(CI->getType()) 3103 : CI->getArgOperand(0); 3104 Rep = EmitX86Select(Builder, CI->getArgOperand(3), Rep, PassThru); 3105 } else if (IsX86 && (Name.startswith("avx512.mask.vpermi2var.") || 3106 Name.startswith("avx512.mask.vpermt2var.") || 3107 Name.startswith("avx512.maskz.vpermt2var."))) { 3108 bool ZeroMask = Name[11] == 'z'; 3109 bool IndexForm = Name[17] == 'i'; 3110 unsigned VecWidth = CI->getType()->getPrimitiveSizeInBits(); 3111 unsigned EltWidth = CI->getType()->getScalarSizeInBits(); 3112 bool IsFloat = CI->getType()->isFPOrFPVectorTy(); 3113 Intrinsic::ID IID; 3114 if (VecWidth == 128 && EltWidth == 32 && IsFloat) 3115 IID = Intrinsic::x86_avx512_vpermi2var_ps_128; 3116 else if (VecWidth == 128 && EltWidth == 32 && !IsFloat) 3117 IID = Intrinsic::x86_avx512_vpermi2var_d_128; 3118 else if (VecWidth == 128 && EltWidth == 64 && IsFloat) 3119 IID = Intrinsic::x86_avx512_vpermi2var_pd_128; 3120 else if (VecWidth == 128 && EltWidth == 64 && !IsFloat) 3121 IID = Intrinsic::x86_avx512_vpermi2var_q_128; 3122 else if (VecWidth == 256 && EltWidth == 32 && IsFloat) 3123 IID = Intrinsic::x86_avx512_vpermi2var_ps_256; 3124 else if (VecWidth == 256 && EltWidth == 32 && !IsFloat) 3125 IID = Intrinsic::x86_avx512_vpermi2var_d_256; 3126 else if (VecWidth == 256 && EltWidth == 64 && IsFloat) 3127 IID = Intrinsic::x86_avx512_vpermi2var_pd_256; 3128 else if (VecWidth == 256 && EltWidth == 64 && !IsFloat) 3129 IID = Intrinsic::x86_avx512_vpermi2var_q_256; 3130 else if (VecWidth == 512 && EltWidth == 32 && IsFloat) 3131 IID = Intrinsic::x86_avx512_vpermi2var_ps_512; 3132 else if (VecWidth == 512 && EltWidth == 32 && !IsFloat) 3133 IID = Intrinsic::x86_avx512_vpermi2var_d_512; 3134 else if (VecWidth == 512 && EltWidth == 64 && IsFloat) 3135 IID = Intrinsic::x86_avx512_vpermi2var_pd_512; 3136 else if (VecWidth == 512 && EltWidth == 64 && !IsFloat) 3137 IID = Intrinsic::x86_avx512_vpermi2var_q_512; 3138 else if (VecWidth == 128 && EltWidth == 16) 3139 IID = Intrinsic::x86_avx512_vpermi2var_hi_128; 3140 else if (VecWidth == 256 && EltWidth == 16) 3141 IID = Intrinsic::x86_avx512_vpermi2var_hi_256; 3142 else if (VecWidth == 512 && EltWidth == 16) 3143 IID = Intrinsic::x86_avx512_vpermi2var_hi_512; 3144 else if (VecWidth == 128 && EltWidth == 8) 3145 IID = Intrinsic::x86_avx512_vpermi2var_qi_128; 3146 else if (VecWidth == 256 && EltWidth == 8) 3147 IID = Intrinsic::x86_avx512_vpermi2var_qi_256; 3148 else if (VecWidth == 512 && EltWidth == 8) 3149 IID = Intrinsic::x86_avx512_vpermi2var_qi_512; 3150 else 3151 llvm_unreachable("Unexpected intrinsic"); 3152 3153 Value *Args[] = { CI->getArgOperand(0) , CI->getArgOperand(1), 3154 CI->getArgOperand(2) }; 3155 3156 // If this isn't index form we need to swap operand 0 and 1. 3157 if (!IndexForm) 3158 std::swap(Args[0], Args[1]); 3159 3160 Rep = Builder.CreateCall(Intrinsic::getDeclaration(CI->getModule(), IID), 3161 Args); 3162 Value *PassThru = ZeroMask ? ConstantAggregateZero::get(CI->getType()) 3163 : Builder.CreateBitCast(CI->getArgOperand(1), 3164 CI->getType()); 3165 Rep = EmitX86Select(Builder, CI->getArgOperand(3), Rep, PassThru); 3166 } else if (IsX86 && (Name.startswith("avx512.mask.vpdpbusd.") || 3167 Name.startswith("avx512.maskz.vpdpbusd.") || 3168 Name.startswith("avx512.mask.vpdpbusds.") || 3169 Name.startswith("avx512.maskz.vpdpbusds."))) { 3170 bool ZeroMask = Name[11] == 'z'; 3171 bool IsSaturating = Name[ZeroMask ? 21 : 20] == 's'; 3172 unsigned VecWidth = CI->getType()->getPrimitiveSizeInBits(); 3173 Intrinsic::ID IID; 3174 if (VecWidth == 128 && !IsSaturating) 3175 IID = Intrinsic::x86_avx512_vpdpbusd_128; 3176 else if (VecWidth == 256 && !IsSaturating) 3177 IID = Intrinsic::x86_avx512_vpdpbusd_256; 3178 else if (VecWidth == 512 && !IsSaturating) 3179 IID = Intrinsic::x86_avx512_vpdpbusd_512; 3180 else if (VecWidth == 128 && IsSaturating) 3181 IID = Intrinsic::x86_avx512_vpdpbusds_128; 3182 else if (VecWidth == 256 && IsSaturating) 3183 IID = Intrinsic::x86_avx512_vpdpbusds_256; 3184 else if (VecWidth == 512 && IsSaturating) 3185 IID = Intrinsic::x86_avx512_vpdpbusds_512; 3186 else 3187 llvm_unreachable("Unexpected intrinsic"); 3188 3189 Value *Args[] = { CI->getArgOperand(0), CI->getArgOperand(1), 3190 CI->getArgOperand(2) }; 3191 Rep = Builder.CreateCall(Intrinsic::getDeclaration(CI->getModule(), IID), 3192 Args); 3193 Value *PassThru = ZeroMask ? ConstantAggregateZero::get(CI->getType()) 3194 : CI->getArgOperand(0); 3195 Rep = EmitX86Select(Builder, CI->getArgOperand(3), Rep, PassThru); 3196 } else if (IsX86 && (Name.startswith("avx512.mask.vpdpwssd.") || 3197 Name.startswith("avx512.maskz.vpdpwssd.") || 3198 Name.startswith("avx512.mask.vpdpwssds.") || 3199 Name.startswith("avx512.maskz.vpdpwssds."))) { 3200 bool ZeroMask = Name[11] == 'z'; 3201 bool IsSaturating = Name[ZeroMask ? 21 : 20] == 's'; 3202 unsigned VecWidth = CI->getType()->getPrimitiveSizeInBits(); 3203 Intrinsic::ID IID; 3204 if (VecWidth == 128 && !IsSaturating) 3205 IID = Intrinsic::x86_avx512_vpdpwssd_128; 3206 else if (VecWidth == 256 && !IsSaturating) 3207 IID = Intrinsic::x86_avx512_vpdpwssd_256; 3208 else if (VecWidth == 512 && !IsSaturating) 3209 IID = Intrinsic::x86_avx512_vpdpwssd_512; 3210 else if (VecWidth == 128 && IsSaturating) 3211 IID = Intrinsic::x86_avx512_vpdpwssds_128; 3212 else if (VecWidth == 256 && IsSaturating) 3213 IID = Intrinsic::x86_avx512_vpdpwssds_256; 3214 else if (VecWidth == 512 && IsSaturating) 3215 IID = Intrinsic::x86_avx512_vpdpwssds_512; 3216 else 3217 llvm_unreachable("Unexpected intrinsic"); 3218 3219 Value *Args[] = { CI->getArgOperand(0), CI->getArgOperand(1), 3220 CI->getArgOperand(2) }; 3221 Rep = Builder.CreateCall(Intrinsic::getDeclaration(CI->getModule(), IID), 3222 Args); 3223 Value *PassThru = ZeroMask ? ConstantAggregateZero::get(CI->getType()) 3224 : CI->getArgOperand(0); 3225 Rep = EmitX86Select(Builder, CI->getArgOperand(3), Rep, PassThru); 3226 } else if (IsX86 && (Name == "addcarryx.u32" || Name == "addcarryx.u64" || 3227 Name == "addcarry.u32" || Name == "addcarry.u64" || 3228 Name == "subborrow.u32" || Name == "subborrow.u64")) { 3229 Intrinsic::ID IID; 3230 if (Name[0] == 'a' && Name.back() == '2') 3231 IID = Intrinsic::x86_addcarry_32; 3232 else if (Name[0] == 'a' && Name.back() == '4') 3233 IID = Intrinsic::x86_addcarry_64; 3234 else if (Name[0] == 's' && Name.back() == '2') 3235 IID = Intrinsic::x86_subborrow_32; 3236 else if (Name[0] == 's' && Name.back() == '4') 3237 IID = Intrinsic::x86_subborrow_64; 3238 else 3239 llvm_unreachable("Unexpected intrinsic"); 3240 3241 // Make a call with 3 operands. 3242 Value *Args[] = { CI->getArgOperand(0), CI->getArgOperand(1), 3243 CI->getArgOperand(2)}; 3244 Value *NewCall = Builder.CreateCall( 3245 Intrinsic::getDeclaration(CI->getModule(), IID), 3246 Args); 3247 3248 // Extract the second result and store it. 3249 Value *Data = Builder.CreateExtractValue(NewCall, 1); 3250 // Cast the pointer to the right type. 3251 Value *Ptr = Builder.CreateBitCast(CI->getArgOperand(3), 3252 llvm::PointerType::getUnqual(Data->getType())); 3253 Builder.CreateAlignedStore(Data, Ptr, 1); 3254 // Replace the original call result with the first result of the new call. 3255 Value *CF = Builder.CreateExtractValue(NewCall, 0); 3256 3257 CI->replaceAllUsesWith(CF); 3258 Rep = nullptr; 3259 } else if (IsX86 && Name.startswith("avx512.mask.") && 3260 upgradeAVX512MaskToSelect(Name, Builder, *CI, Rep)) { 3261 // Rep will be updated by the call in the condition. 3262 } else if (IsNVVM && (Name == "abs.i" || Name == "abs.ll")) { 3263 Value *Arg = CI->getArgOperand(0); 3264 Value *Neg = Builder.CreateNeg(Arg, "neg"); 3265 Value *Cmp = Builder.CreateICmpSGE( 3266 Arg, llvm::Constant::getNullValue(Arg->getType()), "abs.cond"); 3267 Rep = Builder.CreateSelect(Cmp, Arg, Neg, "abs"); 3268 } else if (IsNVVM && (Name == "max.i" || Name == "max.ll" || 3269 Name == "max.ui" || Name == "max.ull")) { 3270 Value *Arg0 = CI->getArgOperand(0); 3271 Value *Arg1 = CI->getArgOperand(1); 3272 Value *Cmp = Name.endswith(".ui") || Name.endswith(".ull") 3273 ? Builder.CreateICmpUGE(Arg0, Arg1, "max.cond") 3274 : Builder.CreateICmpSGE(Arg0, Arg1, "max.cond"); 3275 Rep = Builder.CreateSelect(Cmp, Arg0, Arg1, "max"); 3276 } else if (IsNVVM && (Name == "min.i" || Name == "min.ll" || 3277 Name == "min.ui" || Name == "min.ull")) { 3278 Value *Arg0 = CI->getArgOperand(0); 3279 Value *Arg1 = CI->getArgOperand(1); 3280 Value *Cmp = Name.endswith(".ui") || Name.endswith(".ull") 3281 ? Builder.CreateICmpULE(Arg0, Arg1, "min.cond") 3282 : Builder.CreateICmpSLE(Arg0, Arg1, "min.cond"); 3283 Rep = Builder.CreateSelect(Cmp, Arg0, Arg1, "min"); 3284 } else if (IsNVVM && Name == "clz.ll") { 3285 // llvm.nvvm.clz.ll returns an i32, but llvm.ctlz.i64 and returns an i64. 3286 Value *Arg = CI->getArgOperand(0); 3287 Value *Ctlz = Builder.CreateCall( 3288 Intrinsic::getDeclaration(F->getParent(), Intrinsic::ctlz, 3289 {Arg->getType()}), 3290 {Arg, Builder.getFalse()}, "ctlz"); 3291 Rep = Builder.CreateTrunc(Ctlz, Builder.getInt32Ty(), "ctlz.trunc"); 3292 } else if (IsNVVM && Name == "popc.ll") { 3293 // llvm.nvvm.popc.ll returns an i32, but llvm.ctpop.i64 and returns an 3294 // i64. 3295 Value *Arg = CI->getArgOperand(0); 3296 Value *Popc = Builder.CreateCall( 3297 Intrinsic::getDeclaration(F->getParent(), Intrinsic::ctpop, 3298 {Arg->getType()}), 3299 Arg, "ctpop"); 3300 Rep = Builder.CreateTrunc(Popc, Builder.getInt32Ty(), "ctpop.trunc"); 3301 } else if (IsNVVM && Name == "h2f") { 3302 Rep = Builder.CreateCall(Intrinsic::getDeclaration( 3303 F->getParent(), Intrinsic::convert_from_fp16, 3304 {Builder.getFloatTy()}), 3305 CI->getArgOperand(0), "h2f"); 3306 } else { 3307 llvm_unreachable("Unknown function for CallInst upgrade."); 3308 } 3309 3310 if (Rep) 3311 CI->replaceAllUsesWith(Rep); 3312 CI->eraseFromParent(); 3313 return; 3314 } 3315 3316 const auto &DefaultCase = [&NewFn, &CI]() -> void { 3317 // Handle generic mangling change, but nothing else 3318 assert( 3319 (CI->getCalledFunction()->getName() != NewFn->getName()) && 3320 "Unknown function for CallInst upgrade and isn't just a name change"); 3321 CI->setCalledFunction(NewFn); 3322 }; 3323 CallInst *NewCall = nullptr; 3324 switch (NewFn->getIntrinsicID()) { 3325 default: { 3326 DefaultCase(); 3327 return; 3328 } 3329 3330 case Intrinsic::arm_neon_vld1: 3331 case Intrinsic::arm_neon_vld2: 3332 case Intrinsic::arm_neon_vld3: 3333 case Intrinsic::arm_neon_vld4: 3334 case Intrinsic::arm_neon_vld2lane: 3335 case Intrinsic::arm_neon_vld3lane: 3336 case Intrinsic::arm_neon_vld4lane: 3337 case Intrinsic::arm_neon_vst1: 3338 case Intrinsic::arm_neon_vst2: 3339 case Intrinsic::arm_neon_vst3: 3340 case Intrinsic::arm_neon_vst4: 3341 case Intrinsic::arm_neon_vst2lane: 3342 case Intrinsic::arm_neon_vst3lane: 3343 case Intrinsic::arm_neon_vst4lane: { 3344 SmallVector<Value *, 4> Args(CI->arg_operands().begin(), 3345 CI->arg_operands().end()); 3346 NewCall = Builder.CreateCall(NewFn, Args); 3347 break; 3348 } 3349 3350 case Intrinsic::bitreverse: 3351 NewCall = Builder.CreateCall(NewFn, {CI->getArgOperand(0)}); 3352 break; 3353 3354 case Intrinsic::ctlz: 3355 case Intrinsic::cttz: 3356 assert(CI->getNumArgOperands() == 1 && 3357 "Mismatch between function args and call args"); 3358 NewCall = 3359 Builder.CreateCall(NewFn, {CI->getArgOperand(0), Builder.getFalse()}); 3360 break; 3361 3362 case Intrinsic::objectsize: { 3363 Value *NullIsUnknownSize = CI->getNumArgOperands() == 2 3364 ? Builder.getFalse() 3365 : CI->getArgOperand(2); 3366 NewCall = Builder.CreateCall( 3367 NewFn, {CI->getArgOperand(0), CI->getArgOperand(1), NullIsUnknownSize}); 3368 break; 3369 } 3370 3371 case Intrinsic::ctpop: 3372 NewCall = Builder.CreateCall(NewFn, {CI->getArgOperand(0)}); 3373 break; 3374 3375 case Intrinsic::convert_from_fp16: 3376 NewCall = Builder.CreateCall(NewFn, {CI->getArgOperand(0)}); 3377 break; 3378 3379 case Intrinsic::dbg_value: 3380 // Upgrade from the old version that had an extra offset argument. 3381 assert(CI->getNumArgOperands() == 4); 3382 // Drop nonzero offsets instead of attempting to upgrade them. 3383 if (auto *Offset = dyn_cast_or_null<Constant>(CI->getArgOperand(1))) 3384 if (Offset->isZeroValue()) { 3385 NewCall = Builder.CreateCall( 3386 NewFn, 3387 {CI->getArgOperand(0), CI->getArgOperand(2), CI->getArgOperand(3)}); 3388 break; 3389 } 3390 CI->eraseFromParent(); 3391 return; 3392 3393 case Intrinsic::x86_xop_vfrcz_ss: 3394 case Intrinsic::x86_xop_vfrcz_sd: 3395 NewCall = Builder.CreateCall(NewFn, {CI->getArgOperand(1)}); 3396 break; 3397 3398 case Intrinsic::x86_xop_vpermil2pd: 3399 case Intrinsic::x86_xop_vpermil2ps: 3400 case Intrinsic::x86_xop_vpermil2pd_256: 3401 case Intrinsic::x86_xop_vpermil2ps_256: { 3402 SmallVector<Value *, 4> Args(CI->arg_operands().begin(), 3403 CI->arg_operands().end()); 3404 VectorType *FltIdxTy = cast<VectorType>(Args[2]->getType()); 3405 VectorType *IntIdxTy = VectorType::getInteger(FltIdxTy); 3406 Args[2] = Builder.CreateBitCast(Args[2], IntIdxTy); 3407 NewCall = Builder.CreateCall(NewFn, Args); 3408 break; 3409 } 3410 3411 case Intrinsic::x86_sse41_ptestc: 3412 case Intrinsic::x86_sse41_ptestz: 3413 case Intrinsic::x86_sse41_ptestnzc: { 3414 // The arguments for these intrinsics used to be v4f32, and changed 3415 // to v2i64. This is purely a nop, since those are bitwise intrinsics. 3416 // So, the only thing required is a bitcast for both arguments. 3417 // First, check the arguments have the old type. 3418 Value *Arg0 = CI->getArgOperand(0); 3419 if (Arg0->getType() != VectorType::get(Type::getFloatTy(C), 4)) 3420 return; 3421 3422 // Old intrinsic, add bitcasts 3423 Value *Arg1 = CI->getArgOperand(1); 3424 3425 Type *NewVecTy = VectorType::get(Type::getInt64Ty(C), 2); 3426 3427 Value *BC0 = Builder.CreateBitCast(Arg0, NewVecTy, "cast"); 3428 Value *BC1 = Builder.CreateBitCast(Arg1, NewVecTy, "cast"); 3429 3430 NewCall = Builder.CreateCall(NewFn, {BC0, BC1}); 3431 break; 3432 } 3433 3434 case Intrinsic::x86_rdtscp: { 3435 // This used to take 1 arguments. If we have no arguments, it is already 3436 // upgraded. 3437 if (CI->getNumOperands() == 0) 3438 return; 3439 3440 NewCall = Builder.CreateCall(NewFn); 3441 // Extract the second result and store it. 3442 Value *Data = Builder.CreateExtractValue(NewCall, 1); 3443 // Cast the pointer to the right type. 3444 Value *Ptr = Builder.CreateBitCast(CI->getArgOperand(0), 3445 llvm::PointerType::getUnqual(Data->getType())); 3446 Builder.CreateAlignedStore(Data, Ptr, 1); 3447 // Replace the original call result with the first result of the new call. 3448 Value *TSC = Builder.CreateExtractValue(NewCall, 0); 3449 3450 std::string Name = CI->getName(); 3451 if (!Name.empty()) { 3452 CI->setName(Name + ".old"); 3453 NewCall->setName(Name); 3454 } 3455 CI->replaceAllUsesWith(TSC); 3456 CI->eraseFromParent(); 3457 return; 3458 } 3459 3460 case Intrinsic::x86_sse41_insertps: 3461 case Intrinsic::x86_sse41_dppd: 3462 case Intrinsic::x86_sse41_dpps: 3463 case Intrinsic::x86_sse41_mpsadbw: 3464 case Intrinsic::x86_avx_dp_ps_256: 3465 case Intrinsic::x86_avx2_mpsadbw: { 3466 // Need to truncate the last argument from i32 to i8 -- this argument models 3467 // an inherently 8-bit immediate operand to these x86 instructions. 3468 SmallVector<Value *, 4> Args(CI->arg_operands().begin(), 3469 CI->arg_operands().end()); 3470 3471 // Replace the last argument with a trunc. 3472 Args.back() = Builder.CreateTrunc(Args.back(), Type::getInt8Ty(C), "trunc"); 3473 NewCall = Builder.CreateCall(NewFn, Args); 3474 break; 3475 } 3476 3477 case Intrinsic::thread_pointer: { 3478 NewCall = Builder.CreateCall(NewFn, {}); 3479 break; 3480 } 3481 3482 case Intrinsic::invariant_start: 3483 case Intrinsic::invariant_end: 3484 case Intrinsic::masked_load: 3485 case Intrinsic::masked_store: 3486 case Intrinsic::masked_gather: 3487 case Intrinsic::masked_scatter: { 3488 SmallVector<Value *, 4> Args(CI->arg_operands().begin(), 3489 CI->arg_operands().end()); 3490 NewCall = Builder.CreateCall(NewFn, Args); 3491 break; 3492 } 3493 3494 case Intrinsic::memcpy: 3495 case Intrinsic::memmove: 3496 case Intrinsic::memset: { 3497 // We have to make sure that the call signature is what we're expecting. 3498 // We only want to change the old signatures by removing the alignment arg: 3499 // @llvm.mem[cpy|move]...(i8*, i8*, i[32|i64], i32, i1) 3500 // -> @llvm.mem[cpy|move]...(i8*, i8*, i[32|i64], i1) 3501 // @llvm.memset...(i8*, i8, i[32|64], i32, i1) 3502 // -> @llvm.memset...(i8*, i8, i[32|64], i1) 3503 // Note: i8*'s in the above can be any pointer type 3504 if (CI->getNumArgOperands() != 5) { 3505 DefaultCase(); 3506 return; 3507 } 3508 // Remove alignment argument (3), and add alignment attributes to the 3509 // dest/src pointers. 3510 Value *Args[4] = {CI->getArgOperand(0), CI->getArgOperand(1), 3511 CI->getArgOperand(2), CI->getArgOperand(4)}; 3512 NewCall = Builder.CreateCall(NewFn, Args); 3513 auto *MemCI = cast<MemIntrinsic>(NewCall); 3514 // All mem intrinsics support dest alignment. 3515 const ConstantInt *Align = cast<ConstantInt>(CI->getArgOperand(3)); 3516 MemCI->setDestAlignment(Align->getZExtValue()); 3517 // Memcpy/Memmove also support source alignment. 3518 if (auto *MTI = dyn_cast<MemTransferInst>(MemCI)) 3519 MTI->setSourceAlignment(Align->getZExtValue()); 3520 break; 3521 } 3522 } 3523 assert(NewCall && "Should have either set this variable or returned through " 3524 "the default case"); 3525 std::string Name = CI->getName(); 3526 if (!Name.empty()) { 3527 CI->setName(Name + ".old"); 3528 NewCall->setName(Name); 3529 } 3530 CI->replaceAllUsesWith(NewCall); 3531 CI->eraseFromParent(); 3532 } 3533 3534 void llvm::UpgradeCallsToIntrinsic(Function *F) { 3535 assert(F && "Illegal attempt to upgrade a non-existent intrinsic."); 3536 3537 // Check if this function should be upgraded and get the replacement function 3538 // if there is one. 3539 Function *NewFn; 3540 if (UpgradeIntrinsicFunction(F, NewFn)) { 3541 // Replace all users of the old function with the new function or new 3542 // instructions. This is not a range loop because the call is deleted. 3543 for (auto UI = F->user_begin(), UE = F->user_end(); UI != UE; ) 3544 if (CallInst *CI = dyn_cast<CallInst>(*UI++)) 3545 UpgradeIntrinsicCall(CI, NewFn); 3546 3547 // Remove old function, no longer used, from the module. 3548 F->eraseFromParent(); 3549 } 3550 } 3551 3552 MDNode *llvm::UpgradeTBAANode(MDNode &MD) { 3553 // Check if the tag uses struct-path aware TBAA format. 3554 if (isa<MDNode>(MD.getOperand(0)) && MD.getNumOperands() >= 3) 3555 return &MD; 3556 3557 auto &Context = MD.getContext(); 3558 if (MD.getNumOperands() == 3) { 3559 Metadata *Elts[] = {MD.getOperand(0), MD.getOperand(1)}; 3560 MDNode *ScalarType = MDNode::get(Context, Elts); 3561 // Create a MDNode <ScalarType, ScalarType, offset 0, const> 3562 Metadata *Elts2[] = {ScalarType, ScalarType, 3563 ConstantAsMetadata::get( 3564 Constant::getNullValue(Type::getInt64Ty(Context))), 3565 MD.getOperand(2)}; 3566 return MDNode::get(Context, Elts2); 3567 } 3568 // Create a MDNode <MD, MD, offset 0> 3569 Metadata *Elts[] = {&MD, &MD, ConstantAsMetadata::get(Constant::getNullValue( 3570 Type::getInt64Ty(Context)))}; 3571 return MDNode::get(Context, Elts); 3572 } 3573 3574 Instruction *llvm::UpgradeBitCastInst(unsigned Opc, Value *V, Type *DestTy, 3575 Instruction *&Temp) { 3576 if (Opc != Instruction::BitCast) 3577 return nullptr; 3578 3579 Temp = nullptr; 3580 Type *SrcTy = V->getType(); 3581 if (SrcTy->isPtrOrPtrVectorTy() && DestTy->isPtrOrPtrVectorTy() && 3582 SrcTy->getPointerAddressSpace() != DestTy->getPointerAddressSpace()) { 3583 LLVMContext &Context = V->getContext(); 3584 3585 // We have no information about target data layout, so we assume that 3586 // the maximum pointer size is 64bit. 3587 Type *MidTy = Type::getInt64Ty(Context); 3588 Temp = CastInst::Create(Instruction::PtrToInt, V, MidTy); 3589 3590 return CastInst::Create(Instruction::IntToPtr, Temp, DestTy); 3591 } 3592 3593 return nullptr; 3594 } 3595 3596 Value *llvm::UpgradeBitCastExpr(unsigned Opc, Constant *C, Type *DestTy) { 3597 if (Opc != Instruction::BitCast) 3598 return nullptr; 3599 3600 Type *SrcTy = C->getType(); 3601 if (SrcTy->isPtrOrPtrVectorTy() && DestTy->isPtrOrPtrVectorTy() && 3602 SrcTy->getPointerAddressSpace() != DestTy->getPointerAddressSpace()) { 3603 LLVMContext &Context = C->getContext(); 3604 3605 // We have no information about target data layout, so we assume that 3606 // the maximum pointer size is 64bit. 3607 Type *MidTy = Type::getInt64Ty(Context); 3608 3609 return ConstantExpr::getIntToPtr(ConstantExpr::getPtrToInt(C, MidTy), 3610 DestTy); 3611 } 3612 3613 return nullptr; 3614 } 3615 3616 /// Check the debug info version number, if it is out-dated, drop the debug 3617 /// info. Return true if module is modified. 3618 bool llvm::UpgradeDebugInfo(Module &M) { 3619 unsigned Version = getDebugMetadataVersionFromModule(M); 3620 if (Version == DEBUG_METADATA_VERSION) { 3621 bool BrokenDebugInfo = false; 3622 if (verifyModule(M, &llvm::errs(), &BrokenDebugInfo)) 3623 report_fatal_error("Broken module found, compilation aborted!"); 3624 if (!BrokenDebugInfo) 3625 // Everything is ok. 3626 return false; 3627 else { 3628 // Diagnose malformed debug info. 3629 DiagnosticInfoIgnoringInvalidDebugMetadata Diag(M); 3630 M.getContext().diagnose(Diag); 3631 } 3632 } 3633 bool Modified = StripDebugInfo(M); 3634 if (Modified && Version != DEBUG_METADATA_VERSION) { 3635 // Diagnose a version mismatch. 3636 DiagnosticInfoDebugMetadataVersion DiagVersion(M, Version); 3637 M.getContext().diagnose(DiagVersion); 3638 } 3639 return Modified; 3640 } 3641 3642 bool llvm::UpgradeRetainReleaseMarker(Module &M) { 3643 bool Changed = false; 3644 NamedMDNode *ModRetainReleaseMarker = 3645 M.getNamedMetadata("clang.arc.retainAutoreleasedReturnValueMarker"); 3646 if (ModRetainReleaseMarker) { 3647 MDNode *Op = ModRetainReleaseMarker->getOperand(0); 3648 if (Op) { 3649 MDString *ID = dyn_cast_or_null<MDString>(Op->getOperand(0)); 3650 if (ID) { 3651 SmallVector<StringRef, 4> ValueComp; 3652 ID->getString().split(ValueComp, "#"); 3653 if (ValueComp.size() == 2) { 3654 std::string NewValue = ValueComp[0].str() + ";" + ValueComp[1].str(); 3655 Metadata *Ops[1] = {MDString::get(M.getContext(), NewValue)}; 3656 ModRetainReleaseMarker->setOperand(0, 3657 MDNode::get(M.getContext(), Ops)); 3658 Changed = true; 3659 } 3660 } 3661 } 3662 } 3663 return Changed; 3664 } 3665 3666 bool llvm::UpgradeModuleFlags(Module &M) { 3667 NamedMDNode *ModFlags = M.getModuleFlagsMetadata(); 3668 if (!ModFlags) 3669 return false; 3670 3671 bool HasObjCFlag = false, HasClassProperties = false, Changed = false; 3672 for (unsigned I = 0, E = ModFlags->getNumOperands(); I != E; ++I) { 3673 MDNode *Op = ModFlags->getOperand(I); 3674 if (Op->getNumOperands() != 3) 3675 continue; 3676 MDString *ID = dyn_cast_or_null<MDString>(Op->getOperand(1)); 3677 if (!ID) 3678 continue; 3679 if (ID->getString() == "Objective-C Image Info Version") 3680 HasObjCFlag = true; 3681 if (ID->getString() == "Objective-C Class Properties") 3682 HasClassProperties = true; 3683 // Upgrade PIC/PIE Module Flags. The module flag behavior for these two 3684 // field was Error and now they are Max. 3685 if (ID->getString() == "PIC Level" || ID->getString() == "PIE Level") { 3686 if (auto *Behavior = 3687 mdconst::dyn_extract_or_null<ConstantInt>(Op->getOperand(0))) { 3688 if (Behavior->getLimitedValue() == Module::Error) { 3689 Type *Int32Ty = Type::getInt32Ty(M.getContext()); 3690 Metadata *Ops[3] = { 3691 ConstantAsMetadata::get(ConstantInt::get(Int32Ty, Module::Max)), 3692 MDString::get(M.getContext(), ID->getString()), 3693 Op->getOperand(2)}; 3694 ModFlags->setOperand(I, MDNode::get(M.getContext(), Ops)); 3695 Changed = true; 3696 } 3697 } 3698 } 3699 // Upgrade Objective-C Image Info Section. Removed the whitespce in the 3700 // section name so that llvm-lto will not complain about mismatching 3701 // module flags that is functionally the same. 3702 if (ID->getString() == "Objective-C Image Info Section") { 3703 if (auto *Value = dyn_cast_or_null<MDString>(Op->getOperand(2))) { 3704 SmallVector<StringRef, 4> ValueComp; 3705 Value->getString().split(ValueComp, " "); 3706 if (ValueComp.size() != 1) { 3707 std::string NewValue; 3708 for (auto &S : ValueComp) 3709 NewValue += S.str(); 3710 Metadata *Ops[3] = {Op->getOperand(0), Op->getOperand(1), 3711 MDString::get(M.getContext(), NewValue)}; 3712 ModFlags->setOperand(I, MDNode::get(M.getContext(), Ops)); 3713 Changed = true; 3714 } 3715 } 3716 } 3717 } 3718 3719 // "Objective-C Class Properties" is recently added for Objective-C. We 3720 // upgrade ObjC bitcodes to contain a "Objective-C Class Properties" module 3721 // flag of value 0, so we can correclty downgrade this flag when trying to 3722 // link an ObjC bitcode without this module flag with an ObjC bitcode with 3723 // this module flag. 3724 if (HasObjCFlag && !HasClassProperties) { 3725 M.addModuleFlag(llvm::Module::Override, "Objective-C Class Properties", 3726 (uint32_t)0); 3727 Changed = true; 3728 } 3729 3730 return Changed; 3731 } 3732 3733 void llvm::UpgradeSectionAttributes(Module &M) { 3734 auto TrimSpaces = [](StringRef Section) -> std::string { 3735 SmallVector<StringRef, 5> Components; 3736 Section.split(Components, ','); 3737 3738 SmallString<32> Buffer; 3739 raw_svector_ostream OS(Buffer); 3740 3741 for (auto Component : Components) 3742 OS << ',' << Component.trim(); 3743 3744 return OS.str().substr(1); 3745 }; 3746 3747 for (auto &GV : M.globals()) { 3748 if (!GV.hasSection()) 3749 continue; 3750 3751 StringRef Section = GV.getSection(); 3752 3753 if (!Section.startswith("__DATA, __objc_catlist")) 3754 continue; 3755 3756 // __DATA, __objc_catlist, regular, no_dead_strip 3757 // __DATA,__objc_catlist,regular,no_dead_strip 3758 GV.setSection(TrimSpaces(Section)); 3759 } 3760 } 3761 3762 static bool isOldLoopArgument(Metadata *MD) { 3763 auto *T = dyn_cast_or_null<MDTuple>(MD); 3764 if (!T) 3765 return false; 3766 if (T->getNumOperands() < 1) 3767 return false; 3768 auto *S = dyn_cast_or_null<MDString>(T->getOperand(0)); 3769 if (!S) 3770 return false; 3771 return S->getString().startswith("llvm.vectorizer."); 3772 } 3773 3774 static MDString *upgradeLoopTag(LLVMContext &C, StringRef OldTag) { 3775 StringRef OldPrefix = "llvm.vectorizer."; 3776 assert(OldTag.startswith(OldPrefix) && "Expected old prefix"); 3777 3778 if (OldTag == "llvm.vectorizer.unroll") 3779 return MDString::get(C, "llvm.loop.interleave.count"); 3780 3781 return MDString::get( 3782 C, (Twine("llvm.loop.vectorize.") + OldTag.drop_front(OldPrefix.size())) 3783 .str()); 3784 } 3785 3786 static Metadata *upgradeLoopArgument(Metadata *MD) { 3787 auto *T = dyn_cast_or_null<MDTuple>(MD); 3788 if (!T) 3789 return MD; 3790 if (T->getNumOperands() < 1) 3791 return MD; 3792 auto *OldTag = dyn_cast_or_null<MDString>(T->getOperand(0)); 3793 if (!OldTag) 3794 return MD; 3795 if (!OldTag->getString().startswith("llvm.vectorizer.")) 3796 return MD; 3797 3798 // This has an old tag. Upgrade it. 3799 SmallVector<Metadata *, 8> Ops; 3800 Ops.reserve(T->getNumOperands()); 3801 Ops.push_back(upgradeLoopTag(T->getContext(), OldTag->getString())); 3802 for (unsigned I = 1, E = T->getNumOperands(); I != E; ++I) 3803 Ops.push_back(T->getOperand(I)); 3804 3805 return MDTuple::get(T->getContext(), Ops); 3806 } 3807 3808 MDNode *llvm::upgradeInstructionLoopAttachment(MDNode &N) { 3809 auto *T = dyn_cast<MDTuple>(&N); 3810 if (!T) 3811 return &N; 3812 3813 if (none_of(T->operands(), isOldLoopArgument)) 3814 return &N; 3815 3816 SmallVector<Metadata *, 8> Ops; 3817 Ops.reserve(T->getNumOperands()); 3818 for (Metadata *MD : T->operands()) 3819 Ops.push_back(upgradeLoopArgument(MD)); 3820 3821 return MDTuple::get(T->getContext(), Ops); 3822 } 3823