1 //===-- AutoUpgrade.cpp - Implement auto-upgrade helper functions ---------===//
2 //
3 //                     The LLVM Compiler Infrastructure
4 //
5 // This file is distributed under the University of Illinois Open Source
6 // License. See LICENSE.TXT for details.
7 //
8 //===----------------------------------------------------------------------===//
9 //
10 // This file implements the auto-upgrade helper functions.
11 // This is where deprecated IR intrinsics and other IR features are updated to
12 // current specifications.
13 //
14 //===----------------------------------------------------------------------===//
15 
16 #include "llvm/IR/AutoUpgrade.h"
17 #include "llvm/ADT/StringSwitch.h"
18 #include "llvm/IR/Constants.h"
19 #include "llvm/IR/DIBuilder.h"
20 #include "llvm/IR/DebugInfo.h"
21 #include "llvm/IR/DiagnosticInfo.h"
22 #include "llvm/IR/Function.h"
23 #include "llvm/IR/IRBuilder.h"
24 #include "llvm/IR/Instruction.h"
25 #include "llvm/IR/IntrinsicInst.h"
26 #include "llvm/IR/LLVMContext.h"
27 #include "llvm/IR/Module.h"
28 #include "llvm/IR/Verifier.h"
29 #include "llvm/Support/ErrorHandling.h"
30 #include "llvm/Support/Regex.h"
31 #include <cstring>
32 using namespace llvm;
33 
34 static void rename(GlobalValue *GV) { GV->setName(GV->getName() + ".old"); }
35 
36 // Upgrade the declarations of the SSE4.1 ptest intrinsics whose arguments have
37 // changed their type from v4f32 to v2i64.
38 static bool UpgradePTESTIntrinsic(Function* F, Intrinsic::ID IID,
39                                   Function *&NewFn) {
40   // Check whether this is an old version of the function, which received
41   // v4f32 arguments.
42   Type *Arg0Type = F->getFunctionType()->getParamType(0);
43   if (Arg0Type != VectorType::get(Type::getFloatTy(F->getContext()), 4))
44     return false;
45 
46   // Yes, it's old, replace it with new version.
47   rename(F);
48   NewFn = Intrinsic::getDeclaration(F->getParent(), IID);
49   return true;
50 }
51 
52 // Upgrade the declarations of intrinsic functions whose 8-bit immediate mask
53 // arguments have changed their type from i32 to i8.
54 static bool UpgradeX86IntrinsicsWith8BitMask(Function *F, Intrinsic::ID IID,
55                                              Function *&NewFn) {
56   // Check that the last argument is an i32.
57   Type *LastArgType = F->getFunctionType()->getParamType(
58      F->getFunctionType()->getNumParams() - 1);
59   if (!LastArgType->isIntegerTy(32))
60     return false;
61 
62   // Move this function aside and map down.
63   rename(F);
64   NewFn = Intrinsic::getDeclaration(F->getParent(), IID);
65   return true;
66 }
67 
68 static bool UpgradeADCSBBIntrinsic(Function *F, Intrinsic::ID IID,
69                                    Function *&NewFn) {
70   // If this intrinsic has 3 operands, it's the new version.
71   if (F->getFunctionType()->getNumParams() == 3)
72     return false;
73 
74   rename(F);
75   NewFn = Intrinsic::getDeclaration(F->getParent(), IID);
76   return true;
77 }
78 
79 static bool ShouldUpgradeX86Intrinsic(Function *F, StringRef Name) {
80   // All of the intrinsics matches below should be marked with which llvm
81   // version started autoupgrading them. At some point in the future we would
82   // like to use this information to remove upgrade code for some older
83   // intrinsics. It is currently undecided how we will determine that future
84   // point.
85   if (Name.startswith("sse2.paddus.") || // Added in 8.0
86       Name.startswith("sse2.psubus.") || // Added in 8.0
87       Name.startswith("avx2.paddus.") || // Added in 8.0
88       Name.startswith("avx2.psubus.") || // Added in 8.0
89       Name.startswith("avx512.mask.paddus.") || // Added in 8.0
90       Name.startswith("avx512.mask.psubus.") || // Added in 8.0
91       Name=="ssse3.pabs.b.128" || // Added in 6.0
92       Name=="ssse3.pabs.w.128" || // Added in 6.0
93       Name=="ssse3.pabs.d.128" || // Added in 6.0
94       Name.startswith("fma4.vfmadd.s") || // Added in 7.0
95       Name.startswith("fma.vfmadd.") || // Added in 7.0
96       Name.startswith("fma.vfmsub.") || // Added in 7.0
97       Name.startswith("fma.vfmaddsub.") || // Added in 7.0
98       Name.startswith("fma.vfmsubadd.") || // Added in 7.0
99       Name.startswith("fma.vfnmadd.") || // Added in 7.0
100       Name.startswith("fma.vfnmsub.") || // Added in 7.0
101       Name.startswith("avx512.mask.vfmadd.") || // Added in 7.0
102       Name.startswith("avx512.mask.vfnmadd.") || // Added in 7.0
103       Name.startswith("avx512.mask.vfnmsub.") || // Added in 7.0
104       Name.startswith("avx512.mask3.vfmadd.") || // Added in 7.0
105       Name.startswith("avx512.maskz.vfmadd.") || // Added in 7.0
106       Name.startswith("avx512.mask3.vfmsub.") || // Added in 7.0
107       Name.startswith("avx512.mask3.vfnmsub.") || // Added in 7.0
108       Name.startswith("avx512.mask.vfmaddsub.") || // Added in 7.0
109       Name.startswith("avx512.maskz.vfmaddsub.") || // Added in 7.0
110       Name.startswith("avx512.mask3.vfmaddsub.") || // Added in 7.0
111       Name.startswith("avx512.mask3.vfmsubadd.") || // Added in 7.0
112       Name.startswith("avx512.mask.shuf.i") || // Added in 6.0
113       Name.startswith("avx512.mask.shuf.f") || // Added in 6.0
114       Name.startswith("avx512.kunpck") || //added in 6.0
115       Name.startswith("avx2.pabs.") || // Added in 6.0
116       Name.startswith("avx512.mask.pabs.") || // Added in 6.0
117       Name.startswith("avx512.broadcastm") || // Added in 6.0
118       Name == "sse.sqrt.ss" || // Added in 7.0
119       Name == "sse2.sqrt.sd" || // Added in 7.0
120       Name.startswith("avx512.mask.sqrt.p") || // Added in 7.0
121       Name.startswith("avx.sqrt.p") || // Added in 7.0
122       Name.startswith("sse2.sqrt.p") || // Added in 7.0
123       Name.startswith("sse.sqrt.p") || // Added in 7.0
124       Name.startswith("avx512.mask.pbroadcast") || // Added in 6.0
125       Name.startswith("sse2.pcmpeq.") || // Added in 3.1
126       Name.startswith("sse2.pcmpgt.") || // Added in 3.1
127       Name.startswith("avx2.pcmpeq.") || // Added in 3.1
128       Name.startswith("avx2.pcmpgt.") || // Added in 3.1
129       Name.startswith("avx512.mask.pcmpeq.") || // Added in 3.9
130       Name.startswith("avx512.mask.pcmpgt.") || // Added in 3.9
131       Name.startswith("avx.vperm2f128.") || // Added in 6.0
132       Name == "avx2.vperm2i128" || // Added in 6.0
133       Name == "sse.add.ss" || // Added in 4.0
134       Name == "sse2.add.sd" || // Added in 4.0
135       Name == "sse.sub.ss" || // Added in 4.0
136       Name == "sse2.sub.sd" || // Added in 4.0
137       Name == "sse.mul.ss" || // Added in 4.0
138       Name == "sse2.mul.sd" || // Added in 4.0
139       Name == "sse.div.ss" || // Added in 4.0
140       Name == "sse2.div.sd" || // Added in 4.0
141       Name == "sse41.pmaxsb" || // Added in 3.9
142       Name == "sse2.pmaxs.w" || // Added in 3.9
143       Name == "sse41.pmaxsd" || // Added in 3.9
144       Name == "sse2.pmaxu.b" || // Added in 3.9
145       Name == "sse41.pmaxuw" || // Added in 3.9
146       Name == "sse41.pmaxud" || // Added in 3.9
147       Name == "sse41.pminsb" || // Added in 3.9
148       Name == "sse2.pmins.w" || // Added in 3.9
149       Name == "sse41.pminsd" || // Added in 3.9
150       Name == "sse2.pminu.b" || // Added in 3.9
151       Name == "sse41.pminuw" || // Added in 3.9
152       Name == "sse41.pminud" || // Added in 3.9
153       Name == "avx512.kand.w" || // Added in 7.0
154       Name == "avx512.kandn.w" || // Added in 7.0
155       Name == "avx512.knot.w" || // Added in 7.0
156       Name == "avx512.kor.w" || // Added in 7.0
157       Name == "avx512.kxor.w" || // Added in 7.0
158       Name == "avx512.kxnor.w" || // Added in 7.0
159       Name == "avx512.kortestc.w" || // Added in 7.0
160       Name == "avx512.kortestz.w" || // Added in 7.0
161       Name.startswith("avx512.mask.pshuf.b.") || // Added in 4.0
162       Name.startswith("avx2.pmax") || // Added in 3.9
163       Name.startswith("avx2.pmin") || // Added in 3.9
164       Name.startswith("avx512.mask.pmax") || // Added in 4.0
165       Name.startswith("avx512.mask.pmin") || // Added in 4.0
166       Name.startswith("avx2.vbroadcast") || // Added in 3.8
167       Name.startswith("avx2.pbroadcast") || // Added in 3.8
168       Name.startswith("avx.vpermil.") || // Added in 3.1
169       Name.startswith("sse2.pshuf") || // Added in 3.9
170       Name.startswith("avx512.pbroadcast") || // Added in 3.9
171       Name.startswith("avx512.mask.broadcast.s") || // Added in 3.9
172       Name.startswith("avx512.mask.movddup") || // Added in 3.9
173       Name.startswith("avx512.mask.movshdup") || // Added in 3.9
174       Name.startswith("avx512.mask.movsldup") || // Added in 3.9
175       Name.startswith("avx512.mask.pshuf.d.") || // Added in 3.9
176       Name.startswith("avx512.mask.pshufl.w.") || // Added in 3.9
177       Name.startswith("avx512.mask.pshufh.w.") || // Added in 3.9
178       Name.startswith("avx512.mask.shuf.p") || // Added in 4.0
179       Name.startswith("avx512.mask.vpermil.p") || // Added in 3.9
180       Name.startswith("avx512.mask.perm.df.") || // Added in 3.9
181       Name.startswith("avx512.mask.perm.di.") || // Added in 3.9
182       Name.startswith("avx512.mask.punpckl") || // Added in 3.9
183       Name.startswith("avx512.mask.punpckh") || // Added in 3.9
184       Name.startswith("avx512.mask.unpckl.") || // Added in 3.9
185       Name.startswith("avx512.mask.unpckh.") || // Added in 3.9
186       Name.startswith("avx512.mask.pand.") || // Added in 3.9
187       Name.startswith("avx512.mask.pandn.") || // Added in 3.9
188       Name.startswith("avx512.mask.por.") || // Added in 3.9
189       Name.startswith("avx512.mask.pxor.") || // Added in 3.9
190       Name.startswith("avx512.mask.and.") || // Added in 3.9
191       Name.startswith("avx512.mask.andn.") || // Added in 3.9
192       Name.startswith("avx512.mask.or.") || // Added in 3.9
193       Name.startswith("avx512.mask.xor.") || // Added in 3.9
194       Name.startswith("avx512.mask.padd.") || // Added in 4.0
195       Name.startswith("avx512.mask.psub.") || // Added in 4.0
196       Name.startswith("avx512.mask.pmull.") || // Added in 4.0
197       Name.startswith("avx512.mask.cvtdq2pd.") || // Added in 4.0
198       Name.startswith("avx512.mask.cvtudq2pd.") || // Added in 4.0
199       Name == "avx512.mask.cvtudq2ps.128" || // Added in 7.0
200       Name == "avx512.mask.cvtudq2ps.256" || // Added in 7.0
201       Name == "avx512.mask.cvtqq2pd.128" || // Added in 7.0
202       Name == "avx512.mask.cvtqq2pd.256" || // Added in 7.0
203       Name == "avx512.mask.cvtuqq2pd.128" || // Added in 7.0
204       Name == "avx512.mask.cvtuqq2pd.256" || // Added in 7.0
205       Name == "avx512.mask.cvtdq2ps.128" || // Added in 7.0
206       Name == "avx512.mask.cvtdq2ps.256" || // Added in 7.0
207       Name == "avx512.mask.cvtpd2dq.256" || // Added in 7.0
208       Name == "avx512.mask.cvtpd2ps.256" || // Added in 7.0
209       Name == "avx512.mask.cvttpd2dq.256" || // Added in 7.0
210       Name == "avx512.mask.cvttps2dq.128" || // Added in 7.0
211       Name == "avx512.mask.cvttps2dq.256" || // Added in 7.0
212       Name == "avx512.mask.cvtps2pd.128" || // Added in 7.0
213       Name == "avx512.mask.cvtps2pd.256" || // Added in 7.0
214       Name == "avx512.cvtusi2sd" || // Added in 7.0
215       Name.startswith("avx512.mask.permvar.") || // Added in 7.0
216       Name.startswith("avx512.mask.permvar.") || // Added in 7.0
217       Name == "sse2.pmulu.dq" || // Added in 7.0
218       Name == "sse41.pmuldq" || // Added in 7.0
219       Name == "avx2.pmulu.dq" || // Added in 7.0
220       Name == "avx2.pmul.dq" || // Added in 7.0
221       Name == "avx512.pmulu.dq.512" || // Added in 7.0
222       Name == "avx512.pmul.dq.512" || // Added in 7.0
223       Name.startswith("avx512.mask.pmul.dq.") || // Added in 4.0
224       Name.startswith("avx512.mask.pmulu.dq.") || // Added in 4.0
225       Name.startswith("avx512.mask.pmul.hr.sw.") || // Added in 7.0
226       Name.startswith("avx512.mask.pmulh.w.") || // Added in 7.0
227       Name.startswith("avx512.mask.pmulhu.w.") || // Added in 7.0
228       Name.startswith("avx512.mask.pmaddw.d.") || // Added in 7.0
229       Name.startswith("avx512.mask.pmaddubs.w.") || // Added in 7.0
230       Name.startswith("avx512.mask.packsswb.") || // Added in 5.0
231       Name.startswith("avx512.mask.packssdw.") || // Added in 5.0
232       Name.startswith("avx512.mask.packuswb.") || // Added in 5.0
233       Name.startswith("avx512.mask.packusdw.") || // Added in 5.0
234       Name.startswith("avx512.mask.cmp.b") || // Added in 5.0
235       Name.startswith("avx512.mask.cmp.d") || // Added in 5.0
236       Name.startswith("avx512.mask.cmp.q") || // Added in 5.0
237       Name.startswith("avx512.mask.cmp.w") || // Added in 5.0
238       Name.startswith("avx512.mask.cmp.p") || // Added in 7.0
239       Name.startswith("avx512.mask.ucmp.") || // Added in 5.0
240       Name.startswith("avx512.cvtb2mask.") || // Added in 7.0
241       Name.startswith("avx512.cvtw2mask.") || // Added in 7.0
242       Name.startswith("avx512.cvtd2mask.") || // Added in 7.0
243       Name.startswith("avx512.cvtq2mask.") || // Added in 7.0
244       Name.startswith("avx512.mask.vpermilvar.") || // Added in 4.0
245       Name.startswith("avx512.mask.psll.d") || // Added in 4.0
246       Name.startswith("avx512.mask.psll.q") || // Added in 4.0
247       Name.startswith("avx512.mask.psll.w") || // Added in 4.0
248       Name.startswith("avx512.mask.psra.d") || // Added in 4.0
249       Name.startswith("avx512.mask.psra.q") || // Added in 4.0
250       Name.startswith("avx512.mask.psra.w") || // Added in 4.0
251       Name.startswith("avx512.mask.psrl.d") || // Added in 4.0
252       Name.startswith("avx512.mask.psrl.q") || // Added in 4.0
253       Name.startswith("avx512.mask.psrl.w") || // Added in 4.0
254       Name.startswith("avx512.mask.pslli") || // Added in 4.0
255       Name.startswith("avx512.mask.psrai") || // Added in 4.0
256       Name.startswith("avx512.mask.psrli") || // Added in 4.0
257       Name.startswith("avx512.mask.psllv") || // Added in 4.0
258       Name.startswith("avx512.mask.psrav") || // Added in 4.0
259       Name.startswith("avx512.mask.psrlv") || // Added in 4.0
260       Name.startswith("sse41.pmovsx") || // Added in 3.8
261       Name.startswith("sse41.pmovzx") || // Added in 3.9
262       Name.startswith("avx2.pmovsx") || // Added in 3.9
263       Name.startswith("avx2.pmovzx") || // Added in 3.9
264       Name.startswith("avx512.mask.pmovsx") || // Added in 4.0
265       Name.startswith("avx512.mask.pmovzx") || // Added in 4.0
266       Name.startswith("avx512.mask.lzcnt.") || // Added in 5.0
267       Name.startswith("avx512.mask.pternlog.") || // Added in 7.0
268       Name.startswith("avx512.maskz.pternlog.") || // Added in 7.0
269       Name.startswith("avx512.mask.vpmadd52") || // Added in 7.0
270       Name.startswith("avx512.maskz.vpmadd52") || // Added in 7.0
271       Name.startswith("avx512.mask.vpermi2var.") || // Added in 7.0
272       Name.startswith("avx512.mask.vpermt2var.") || // Added in 7.0
273       Name.startswith("avx512.maskz.vpermt2var.") || // Added in 7.0
274       Name.startswith("avx512.mask.vpdpbusd.") || // Added in 7.0
275       Name.startswith("avx512.maskz.vpdpbusd.") || // Added in 7.0
276       Name.startswith("avx512.mask.vpdpbusds.") || // Added in 7.0
277       Name.startswith("avx512.maskz.vpdpbusds.") || // Added in 7.0
278       Name.startswith("avx512.mask.vpdpwssd.") || // Added in 7.0
279       Name.startswith("avx512.maskz.vpdpwssd.") || // Added in 7.0
280       Name.startswith("avx512.mask.vpdpwssds.") || // Added in 7.0
281       Name.startswith("avx512.maskz.vpdpwssds.") || // Added in 7.0
282       Name.startswith("avx512.mask.dbpsadbw.") || // Added in 7.0
283       Name.startswith("avx512.mask.vpshld.") || // Added in 7.0
284       Name.startswith("avx512.mask.vpshrd.") || // Added in 7.0
285       Name.startswith("avx512.mask.add.p") || // Added in 7.0. 128/256 in 4.0
286       Name.startswith("avx512.mask.sub.p") || // Added in 7.0. 128/256 in 4.0
287       Name.startswith("avx512.mask.mul.p") || // Added in 7.0. 128/256 in 4.0
288       Name.startswith("avx512.mask.div.p") || // Added in 7.0. 128/256 in 4.0
289       Name.startswith("avx512.mask.max.p") || // Added in 7.0. 128/256 in 5.0
290       Name.startswith("avx512.mask.min.p") || // Added in 7.0. 128/256 in 5.0
291       Name.startswith("avx512.mask.fpclass.p") || // Added in 7.0
292       Name.startswith("avx512.mask.prorv.") || // Added in 7.0
293       Name.startswith("avx512.mask.pror.") || // Added in 7.0
294       Name.startswith("avx512.mask.prolv.") || // Added in 7.0
295       Name.startswith("avx512.mask.prol.") || // Added in 7.0
296       Name.startswith("avx512.mask.padds.") || // Added in 8.0
297       Name.startswith("avx512.mask.psubs.") || // Added in 8.0
298       Name == "sse.cvtsi2ss" || // Added in 7.0
299       Name == "sse.cvtsi642ss" || // Added in 7.0
300       Name == "sse2.cvtsi2sd" || // Added in 7.0
301       Name == "sse2.cvtsi642sd" || // Added in 7.0
302       Name == "sse2.cvtss2sd" || // Added in 7.0
303       Name == "sse2.cvtdq2pd" || // Added in 3.9
304       Name == "sse2.cvtdq2ps" || // Added in 7.0
305       Name == "sse2.cvtps2pd" || // Added in 3.9
306       Name == "avx.cvtdq2.pd.256" || // Added in 3.9
307       Name == "avx.cvtdq2.ps.256" || // Added in 7.0
308       Name == "avx.cvt.ps2.pd.256" || // Added in 3.9
309       Name.startswith("avx.vinsertf128.") || // Added in 3.7
310       Name == "avx2.vinserti128" || // Added in 3.7
311       Name.startswith("avx512.mask.insert") || // Added in 4.0
312       Name.startswith("avx.vextractf128.") || // Added in 3.7
313       Name == "avx2.vextracti128" || // Added in 3.7
314       Name.startswith("avx512.mask.vextract") || // Added in 4.0
315       Name.startswith("sse4a.movnt.") || // Added in 3.9
316       Name.startswith("avx.movnt.") || // Added in 3.2
317       Name.startswith("avx512.storent.") || // Added in 3.9
318       Name == "sse41.movntdqa" || // Added in 5.0
319       Name == "avx2.movntdqa" || // Added in 5.0
320       Name == "avx512.movntdqa" || // Added in 5.0
321       Name == "sse2.storel.dq" || // Added in 3.9
322       Name.startswith("sse.storeu.") || // Added in 3.9
323       Name.startswith("sse2.storeu.") || // Added in 3.9
324       Name.startswith("avx.storeu.") || // Added in 3.9
325       Name.startswith("avx512.mask.storeu.") || // Added in 3.9
326       Name.startswith("avx512.mask.store.p") || // Added in 3.9
327       Name.startswith("avx512.mask.store.b.") || // Added in 3.9
328       Name.startswith("avx512.mask.store.w.") || // Added in 3.9
329       Name.startswith("avx512.mask.store.d.") || // Added in 3.9
330       Name.startswith("avx512.mask.store.q.") || // Added in 3.9
331       Name == "avx512.mask.store.ss" || // Added in 7.0
332       Name.startswith("avx512.mask.loadu.") || // Added in 3.9
333       Name.startswith("avx512.mask.load.") || // Added in 3.9
334       Name.startswith("avx512.mask.expand.load.") || // Added in 7.0
335       Name.startswith("avx512.mask.compress.store.") || // Added in 7.0
336       Name == "sse42.crc32.64.8" || // Added in 3.4
337       Name.startswith("avx.vbroadcast.s") || // Added in 3.5
338       Name.startswith("avx512.vbroadcast.s") || // Added in 7.0
339       Name.startswith("avx512.mask.palignr.") || // Added in 3.9
340       Name.startswith("avx512.mask.valign.") || // Added in 4.0
341       Name.startswith("sse2.psll.dq") || // Added in 3.7
342       Name.startswith("sse2.psrl.dq") || // Added in 3.7
343       Name.startswith("avx2.psll.dq") || // Added in 3.7
344       Name.startswith("avx2.psrl.dq") || // Added in 3.7
345       Name.startswith("avx512.psll.dq") || // Added in 3.9
346       Name.startswith("avx512.psrl.dq") || // Added in 3.9
347       Name == "sse41.pblendw" || // Added in 3.7
348       Name.startswith("sse41.blendp") || // Added in 3.7
349       Name.startswith("avx.blend.p") || // Added in 3.7
350       Name == "avx2.pblendw" || // Added in 3.7
351       Name.startswith("avx2.pblendd.") || // Added in 3.7
352       Name.startswith("avx.vbroadcastf128") || // Added in 4.0
353       Name == "avx2.vbroadcasti128" || // Added in 3.7
354       Name.startswith("avx512.mask.broadcastf") || // Added in 6.0
355       Name.startswith("avx512.mask.broadcasti") || // Added in 6.0
356       Name == "xop.vpcmov" || // Added in 3.8
357       Name == "xop.vpcmov.256" || // Added in 5.0
358       Name.startswith("avx512.mask.move.s") || // Added in 4.0
359       Name.startswith("avx512.cvtmask2") || // Added in 5.0
360       (Name.startswith("xop.vpcom") && // Added in 3.2
361        F->arg_size() == 2) ||
362       Name.startswith("avx512.ptestm") || //Added in 6.0
363       Name.startswith("avx512.ptestnm") || //Added in 6.0
364       Name.startswith("sse2.pavg") || // Added in 6.0
365       Name.startswith("avx2.pavg") || // Added in 6.0
366       Name.startswith("avx512.mask.pavg")) // Added in 6.0
367     return true;
368 
369   return false;
370 }
371 
372 static bool UpgradeX86IntrinsicFunction(Function *F, StringRef Name,
373                                         Function *&NewFn) {
374   // Only handle intrinsics that start with "x86.".
375   if (!Name.startswith("x86."))
376     return false;
377   // Remove "x86." prefix.
378   Name = Name.substr(4);
379 
380   if (ShouldUpgradeX86Intrinsic(F, Name)) {
381     NewFn = nullptr;
382     return true;
383   }
384 
385   if (Name == "addcarryx.u32")
386     return UpgradeADCSBBIntrinsic(F, Intrinsic::x86_addcarryx_u32, NewFn);
387   if (Name == "addcarryx.u64")
388     return UpgradeADCSBBIntrinsic(F, Intrinsic::x86_addcarryx_u64, NewFn);
389   if (Name == "addcarry.u32")
390     return UpgradeADCSBBIntrinsic(F, Intrinsic::x86_addcarry_u32, NewFn);
391   if (Name == "addcarry.u64")
392     return UpgradeADCSBBIntrinsic(F, Intrinsic::x86_addcarry_u64, NewFn);
393   if (Name == "subborrow.u32")
394     return UpgradeADCSBBIntrinsic(F, Intrinsic::x86_subborrow_u32, NewFn);
395   if (Name == "subborrow.u64")
396     return UpgradeADCSBBIntrinsic(F, Intrinsic::x86_subborrow_u64, NewFn);
397 
398   if (Name == "rdtscp") {
399     // If this intrinsic has 0 operands, it's the new version.
400     if (F->getFunctionType()->getNumParams() == 0)
401       return false;
402 
403     rename(F);
404     NewFn = Intrinsic::getDeclaration(F->getParent(),
405                                       Intrinsic::x86_rdtscp);
406     return true;
407   }
408 
409   // SSE4.1 ptest functions may have an old signature.
410   if (Name.startswith("sse41.ptest")) { // Added in 3.2
411     if (Name.substr(11) == "c")
412       return UpgradePTESTIntrinsic(F, Intrinsic::x86_sse41_ptestc, NewFn);
413     if (Name.substr(11) == "z")
414       return UpgradePTESTIntrinsic(F, Intrinsic::x86_sse41_ptestz, NewFn);
415     if (Name.substr(11) == "nzc")
416       return UpgradePTESTIntrinsic(F, Intrinsic::x86_sse41_ptestnzc, NewFn);
417   }
418   // Several blend and other instructions with masks used the wrong number of
419   // bits.
420   if (Name == "sse41.insertps") // Added in 3.6
421     return UpgradeX86IntrinsicsWith8BitMask(F, Intrinsic::x86_sse41_insertps,
422                                             NewFn);
423   if (Name == "sse41.dppd") // Added in 3.6
424     return UpgradeX86IntrinsicsWith8BitMask(F, Intrinsic::x86_sse41_dppd,
425                                             NewFn);
426   if (Name == "sse41.dpps") // Added in 3.6
427     return UpgradeX86IntrinsicsWith8BitMask(F, Intrinsic::x86_sse41_dpps,
428                                             NewFn);
429   if (Name == "sse41.mpsadbw") // Added in 3.6
430     return UpgradeX86IntrinsicsWith8BitMask(F, Intrinsic::x86_sse41_mpsadbw,
431                                             NewFn);
432   if (Name == "avx.dp.ps.256") // Added in 3.6
433     return UpgradeX86IntrinsicsWith8BitMask(F, Intrinsic::x86_avx_dp_ps_256,
434                                             NewFn);
435   if (Name == "avx2.mpsadbw") // Added in 3.6
436     return UpgradeX86IntrinsicsWith8BitMask(F, Intrinsic::x86_avx2_mpsadbw,
437                                             NewFn);
438 
439   // frcz.ss/sd may need to have an argument dropped. Added in 3.2
440   if (Name.startswith("xop.vfrcz.ss") && F->arg_size() == 2) {
441     rename(F);
442     NewFn = Intrinsic::getDeclaration(F->getParent(),
443                                       Intrinsic::x86_xop_vfrcz_ss);
444     return true;
445   }
446   if (Name.startswith("xop.vfrcz.sd") && F->arg_size() == 2) {
447     rename(F);
448     NewFn = Intrinsic::getDeclaration(F->getParent(),
449                                       Intrinsic::x86_xop_vfrcz_sd);
450     return true;
451   }
452   // Upgrade any XOP PERMIL2 index operand still using a float/double vector.
453   if (Name.startswith("xop.vpermil2")) { // Added in 3.9
454     auto Idx = F->getFunctionType()->getParamType(2);
455     if (Idx->isFPOrFPVectorTy()) {
456       rename(F);
457       unsigned IdxSize = Idx->getPrimitiveSizeInBits();
458       unsigned EltSize = Idx->getScalarSizeInBits();
459       Intrinsic::ID Permil2ID;
460       if (EltSize == 64 && IdxSize == 128)
461         Permil2ID = Intrinsic::x86_xop_vpermil2pd;
462       else if (EltSize == 32 && IdxSize == 128)
463         Permil2ID = Intrinsic::x86_xop_vpermil2ps;
464       else if (EltSize == 64 && IdxSize == 256)
465         Permil2ID = Intrinsic::x86_xop_vpermil2pd_256;
466       else
467         Permil2ID = Intrinsic::x86_xop_vpermil2ps_256;
468       NewFn = Intrinsic::getDeclaration(F->getParent(), Permil2ID);
469       return true;
470     }
471   }
472 
473   return false;
474 }
475 
476 static bool UpgradeIntrinsicFunction1(Function *F, Function *&NewFn) {
477   assert(F && "Illegal to upgrade a non-existent Function.");
478 
479   // Quickly eliminate it, if it's not a candidate.
480   StringRef Name = F->getName();
481   if (Name.size() <= 8 || !Name.startswith("llvm."))
482     return false;
483   Name = Name.substr(5); // Strip off "llvm."
484 
485   switch (Name[0]) {
486   default: break;
487   case 'a': {
488     if (Name.startswith("arm.rbit") || Name.startswith("aarch64.rbit")) {
489       NewFn = Intrinsic::getDeclaration(F->getParent(), Intrinsic::bitreverse,
490                                         F->arg_begin()->getType());
491       return true;
492     }
493     if (Name.startswith("arm.neon.vclz")) {
494       Type* args[2] = {
495         F->arg_begin()->getType(),
496         Type::getInt1Ty(F->getContext())
497       };
498       // Can't use Intrinsic::getDeclaration here as it adds a ".i1" to
499       // the end of the name. Change name from llvm.arm.neon.vclz.* to
500       //  llvm.ctlz.*
501       FunctionType* fType = FunctionType::get(F->getReturnType(), args, false);
502       NewFn = Function::Create(fType, F->getLinkage(), F->getAddressSpace(),
503                                "llvm.ctlz." + Name.substr(14), F->getParent());
504       return true;
505     }
506     if (Name.startswith("arm.neon.vcnt")) {
507       NewFn = Intrinsic::getDeclaration(F->getParent(), Intrinsic::ctpop,
508                                         F->arg_begin()->getType());
509       return true;
510     }
511     Regex vldRegex("^arm\\.neon\\.vld([1234]|[234]lane)\\.v[a-z0-9]*$");
512     if (vldRegex.match(Name)) {
513       auto fArgs = F->getFunctionType()->params();
514       SmallVector<Type *, 4> Tys(fArgs.begin(), fArgs.end());
515       // Can't use Intrinsic::getDeclaration here as the return types might
516       // then only be structurally equal.
517       FunctionType* fType = FunctionType::get(F->getReturnType(), Tys, false);
518       NewFn = Function::Create(fType, F->getLinkage(), F->getAddressSpace(),
519                                "llvm." + Name + ".p0i8", F->getParent());
520       return true;
521     }
522     Regex vstRegex("^arm\\.neon\\.vst([1234]|[234]lane)\\.v[a-z0-9]*$");
523     if (vstRegex.match(Name)) {
524       static const Intrinsic::ID StoreInts[] = {Intrinsic::arm_neon_vst1,
525                                                 Intrinsic::arm_neon_vst2,
526                                                 Intrinsic::arm_neon_vst3,
527                                                 Intrinsic::arm_neon_vst4};
528 
529       static const Intrinsic::ID StoreLaneInts[] = {
530         Intrinsic::arm_neon_vst2lane, Intrinsic::arm_neon_vst3lane,
531         Intrinsic::arm_neon_vst4lane
532       };
533 
534       auto fArgs = F->getFunctionType()->params();
535       Type *Tys[] = {fArgs[0], fArgs[1]};
536       if (Name.find("lane") == StringRef::npos)
537         NewFn = Intrinsic::getDeclaration(F->getParent(),
538                                           StoreInts[fArgs.size() - 3], Tys);
539       else
540         NewFn = Intrinsic::getDeclaration(F->getParent(),
541                                           StoreLaneInts[fArgs.size() - 5], Tys);
542       return true;
543     }
544     if (Name == "aarch64.thread.pointer" || Name == "arm.thread.pointer") {
545       NewFn = Intrinsic::getDeclaration(F->getParent(), Intrinsic::thread_pointer);
546       return true;
547     }
548     break;
549   }
550 
551   case 'c': {
552     if (Name.startswith("ctlz.") && F->arg_size() == 1) {
553       rename(F);
554       NewFn = Intrinsic::getDeclaration(F->getParent(), Intrinsic::ctlz,
555                                         F->arg_begin()->getType());
556       return true;
557     }
558     if (Name.startswith("cttz.") && F->arg_size() == 1) {
559       rename(F);
560       NewFn = Intrinsic::getDeclaration(F->getParent(), Intrinsic::cttz,
561                                         F->arg_begin()->getType());
562       return true;
563     }
564     break;
565   }
566   case 'd': {
567     if (Name == "dbg.value" && F->arg_size() == 4) {
568       rename(F);
569       NewFn = Intrinsic::getDeclaration(F->getParent(), Intrinsic::dbg_value);
570       return true;
571     }
572     break;
573   }
574   case 'i':
575   case 'l': {
576     bool IsLifetimeStart = Name.startswith("lifetime.start");
577     if (IsLifetimeStart || Name.startswith("invariant.start")) {
578       Intrinsic::ID ID = IsLifetimeStart ?
579         Intrinsic::lifetime_start : Intrinsic::invariant_start;
580       auto Args = F->getFunctionType()->params();
581       Type* ObjectPtr[1] = {Args[1]};
582       if (F->getName() != Intrinsic::getName(ID, ObjectPtr)) {
583         rename(F);
584         NewFn = Intrinsic::getDeclaration(F->getParent(), ID, ObjectPtr);
585         return true;
586       }
587     }
588 
589     bool IsLifetimeEnd = Name.startswith("lifetime.end");
590     if (IsLifetimeEnd || Name.startswith("invariant.end")) {
591       Intrinsic::ID ID = IsLifetimeEnd ?
592         Intrinsic::lifetime_end : Intrinsic::invariant_end;
593 
594       auto Args = F->getFunctionType()->params();
595       Type* ObjectPtr[1] = {Args[IsLifetimeEnd ? 1 : 2]};
596       if (F->getName() != Intrinsic::getName(ID, ObjectPtr)) {
597         rename(F);
598         NewFn = Intrinsic::getDeclaration(F->getParent(), ID, ObjectPtr);
599         return true;
600       }
601     }
602     if (Name.startswith("invariant.group.barrier")) {
603       // Rename invariant.group.barrier to launder.invariant.group
604       auto Args = F->getFunctionType()->params();
605       Type* ObjectPtr[1] = {Args[0]};
606       rename(F);
607       NewFn = Intrinsic::getDeclaration(F->getParent(),
608           Intrinsic::launder_invariant_group, ObjectPtr);
609       return true;
610 
611     }
612 
613     break;
614   }
615   case 'm': {
616     if (Name.startswith("masked.load.")) {
617       Type *Tys[] = { F->getReturnType(), F->arg_begin()->getType() };
618       if (F->getName() != Intrinsic::getName(Intrinsic::masked_load, Tys)) {
619         rename(F);
620         NewFn = Intrinsic::getDeclaration(F->getParent(),
621                                           Intrinsic::masked_load,
622                                           Tys);
623         return true;
624       }
625     }
626     if (Name.startswith("masked.store.")) {
627       auto Args = F->getFunctionType()->params();
628       Type *Tys[] = { Args[0], Args[1] };
629       if (F->getName() != Intrinsic::getName(Intrinsic::masked_store, Tys)) {
630         rename(F);
631         NewFn = Intrinsic::getDeclaration(F->getParent(),
632                                           Intrinsic::masked_store,
633                                           Tys);
634         return true;
635       }
636     }
637     // Renaming gather/scatter intrinsics with no address space overloading
638     // to the new overload which includes an address space
639     if (Name.startswith("masked.gather.")) {
640       Type *Tys[] = {F->getReturnType(), F->arg_begin()->getType()};
641       if (F->getName() != Intrinsic::getName(Intrinsic::masked_gather, Tys)) {
642         rename(F);
643         NewFn = Intrinsic::getDeclaration(F->getParent(),
644                                           Intrinsic::masked_gather, Tys);
645         return true;
646       }
647     }
648     if (Name.startswith("masked.scatter.")) {
649       auto Args = F->getFunctionType()->params();
650       Type *Tys[] = {Args[0], Args[1]};
651       if (F->getName() != Intrinsic::getName(Intrinsic::masked_scatter, Tys)) {
652         rename(F);
653         NewFn = Intrinsic::getDeclaration(F->getParent(),
654                                           Intrinsic::masked_scatter, Tys);
655         return true;
656       }
657     }
658     // Updating the memory intrinsics (memcpy/memmove/memset) that have an
659     // alignment parameter to embedding the alignment as an attribute of
660     // the pointer args.
661     if (Name.startswith("memcpy.") && F->arg_size() == 5) {
662       rename(F);
663       // Get the types of dest, src, and len
664       ArrayRef<Type *> ParamTypes = F->getFunctionType()->params().slice(0, 3);
665       NewFn = Intrinsic::getDeclaration(F->getParent(), Intrinsic::memcpy,
666                                         ParamTypes);
667       return true;
668     }
669     if (Name.startswith("memmove.") && F->arg_size() == 5) {
670       rename(F);
671       // Get the types of dest, src, and len
672       ArrayRef<Type *> ParamTypes = F->getFunctionType()->params().slice(0, 3);
673       NewFn = Intrinsic::getDeclaration(F->getParent(), Intrinsic::memmove,
674                                         ParamTypes);
675       return true;
676     }
677     if (Name.startswith("memset.") && F->arg_size() == 5) {
678       rename(F);
679       // Get the types of dest, and len
680       const auto *FT = F->getFunctionType();
681       Type *ParamTypes[2] = {
682           FT->getParamType(0), // Dest
683           FT->getParamType(2)  // len
684       };
685       NewFn = Intrinsic::getDeclaration(F->getParent(), Intrinsic::memset,
686                                         ParamTypes);
687       return true;
688     }
689     break;
690   }
691   case 'n': {
692     if (Name.startswith("nvvm.")) {
693       Name = Name.substr(5);
694 
695       // The following nvvm intrinsics correspond exactly to an LLVM intrinsic.
696       Intrinsic::ID IID = StringSwitch<Intrinsic::ID>(Name)
697                               .Cases("brev32", "brev64", Intrinsic::bitreverse)
698                               .Case("clz.i", Intrinsic::ctlz)
699                               .Case("popc.i", Intrinsic::ctpop)
700                               .Default(Intrinsic::not_intrinsic);
701       if (IID != Intrinsic::not_intrinsic && F->arg_size() == 1) {
702         NewFn = Intrinsic::getDeclaration(F->getParent(), IID,
703                                           {F->getReturnType()});
704         return true;
705       }
706 
707       // The following nvvm intrinsics correspond exactly to an LLVM idiom, but
708       // not to an intrinsic alone.  We expand them in UpgradeIntrinsicCall.
709       //
710       // TODO: We could add lohi.i2d.
711       bool Expand = StringSwitch<bool>(Name)
712                         .Cases("abs.i", "abs.ll", true)
713                         .Cases("clz.ll", "popc.ll", "h2f", true)
714                         .Cases("max.i", "max.ll", "max.ui", "max.ull", true)
715                         .Cases("min.i", "min.ll", "min.ui", "min.ull", true)
716                         .Default(false);
717       if (Expand) {
718         NewFn = nullptr;
719         return true;
720       }
721     }
722     break;
723   }
724   case 'o':
725     // We only need to change the name to match the mangling including the
726     // address space.
727     if (Name.startswith("objectsize.")) {
728       Type *Tys[2] = { F->getReturnType(), F->arg_begin()->getType() };
729       if (F->arg_size() == 2 ||
730           F->getName() != Intrinsic::getName(Intrinsic::objectsize, Tys)) {
731         rename(F);
732         NewFn = Intrinsic::getDeclaration(F->getParent(), Intrinsic::objectsize,
733                                           Tys);
734         return true;
735       }
736     }
737     break;
738 
739   case 's':
740     if (Name == "stackprotectorcheck") {
741       NewFn = nullptr;
742       return true;
743     }
744     break;
745 
746   case 'x':
747     if (UpgradeX86IntrinsicFunction(F, Name, NewFn))
748       return true;
749   }
750   // Remangle our intrinsic since we upgrade the mangling
751   auto Result = llvm::Intrinsic::remangleIntrinsicFunction(F);
752   if (Result != None) {
753     NewFn = Result.getValue();
754     return true;
755   }
756 
757   //  This may not belong here. This function is effectively being overloaded
758   //  to both detect an intrinsic which needs upgrading, and to provide the
759   //  upgraded form of the intrinsic. We should perhaps have two separate
760   //  functions for this.
761   return false;
762 }
763 
764 bool llvm::UpgradeIntrinsicFunction(Function *F, Function *&NewFn) {
765   NewFn = nullptr;
766   bool Upgraded = UpgradeIntrinsicFunction1(F, NewFn);
767   assert(F != NewFn && "Intrinsic function upgraded to the same function");
768 
769   // Upgrade intrinsic attributes.  This does not change the function.
770   if (NewFn)
771     F = NewFn;
772   if (Intrinsic::ID id = F->getIntrinsicID())
773     F->setAttributes(Intrinsic::getAttributes(F->getContext(), id));
774   return Upgraded;
775 }
776 
777 bool llvm::UpgradeGlobalVariable(GlobalVariable *GV) {
778   // Nothing to do yet.
779   return false;
780 }
781 
782 // Handles upgrading SSE2/AVX2/AVX512BW PSLLDQ intrinsics by converting them
783 // to byte shuffles.
784 static Value *UpgradeX86PSLLDQIntrinsics(IRBuilder<> &Builder,
785                                          Value *Op, unsigned Shift) {
786   Type *ResultTy = Op->getType();
787   unsigned NumElts = ResultTy->getVectorNumElements() * 8;
788 
789   // Bitcast from a 64-bit element type to a byte element type.
790   Type *VecTy = VectorType::get(Builder.getInt8Ty(), NumElts);
791   Op = Builder.CreateBitCast(Op, VecTy, "cast");
792 
793   // We'll be shuffling in zeroes.
794   Value *Res = Constant::getNullValue(VecTy);
795 
796   // If shift is less than 16, emit a shuffle to move the bytes. Otherwise,
797   // we'll just return the zero vector.
798   if (Shift < 16) {
799     uint32_t Idxs[64];
800     // 256/512-bit version is split into 2/4 16-byte lanes.
801     for (unsigned l = 0; l != NumElts; l += 16)
802       for (unsigned i = 0; i != 16; ++i) {
803         unsigned Idx = NumElts + i - Shift;
804         if (Idx < NumElts)
805           Idx -= NumElts - 16; // end of lane, switch operand.
806         Idxs[l + i] = Idx + l;
807       }
808 
809     Res = Builder.CreateShuffleVector(Res, Op, makeArrayRef(Idxs, NumElts));
810   }
811 
812   // Bitcast back to a 64-bit element type.
813   return Builder.CreateBitCast(Res, ResultTy, "cast");
814 }
815 
816 // Handles upgrading SSE2/AVX2/AVX512BW PSRLDQ intrinsics by converting them
817 // to byte shuffles.
818 static Value *UpgradeX86PSRLDQIntrinsics(IRBuilder<> &Builder, Value *Op,
819                                          unsigned Shift) {
820   Type *ResultTy = Op->getType();
821   unsigned NumElts = ResultTy->getVectorNumElements() * 8;
822 
823   // Bitcast from a 64-bit element type to a byte element type.
824   Type *VecTy = VectorType::get(Builder.getInt8Ty(), NumElts);
825   Op = Builder.CreateBitCast(Op, VecTy, "cast");
826 
827   // We'll be shuffling in zeroes.
828   Value *Res = Constant::getNullValue(VecTy);
829 
830   // If shift is less than 16, emit a shuffle to move the bytes. Otherwise,
831   // we'll just return the zero vector.
832   if (Shift < 16) {
833     uint32_t Idxs[64];
834     // 256/512-bit version is split into 2/4 16-byte lanes.
835     for (unsigned l = 0; l != NumElts; l += 16)
836       for (unsigned i = 0; i != 16; ++i) {
837         unsigned Idx = i + Shift;
838         if (Idx >= 16)
839           Idx += NumElts - 16; // end of lane, switch operand.
840         Idxs[l + i] = Idx + l;
841       }
842 
843     Res = Builder.CreateShuffleVector(Op, Res, makeArrayRef(Idxs, NumElts));
844   }
845 
846   // Bitcast back to a 64-bit element type.
847   return Builder.CreateBitCast(Res, ResultTy, "cast");
848 }
849 
850 static Value *getX86MaskVec(IRBuilder<> &Builder, Value *Mask,
851                             unsigned NumElts) {
852   llvm::VectorType *MaskTy = llvm::VectorType::get(Builder.getInt1Ty(),
853                              cast<IntegerType>(Mask->getType())->getBitWidth());
854   Mask = Builder.CreateBitCast(Mask, MaskTy);
855 
856   // If we have less than 8 elements, then the starting mask was an i8 and
857   // we need to extract down to the right number of elements.
858   if (NumElts < 8) {
859     uint32_t Indices[4];
860     for (unsigned i = 0; i != NumElts; ++i)
861       Indices[i] = i;
862     Mask = Builder.CreateShuffleVector(Mask, Mask,
863                                        makeArrayRef(Indices, NumElts),
864                                        "extract");
865   }
866 
867   return Mask;
868 }
869 
870 static Value *EmitX86Select(IRBuilder<> &Builder, Value *Mask,
871                             Value *Op0, Value *Op1) {
872   // If the mask is all ones just emit the first operation.
873   if (const auto *C = dyn_cast<Constant>(Mask))
874     if (C->isAllOnesValue())
875       return Op0;
876 
877   Mask = getX86MaskVec(Builder, Mask, Op0->getType()->getVectorNumElements());
878   return Builder.CreateSelect(Mask, Op0, Op1);
879 }
880 
881 static Value *EmitX86ScalarSelect(IRBuilder<> &Builder, Value *Mask,
882                                   Value *Op0, Value *Op1) {
883   // If the mask is all ones just emit the first operation.
884   if (const auto *C = dyn_cast<Constant>(Mask))
885     if (C->isAllOnesValue())
886       return Op0;
887 
888   llvm::VectorType *MaskTy =
889     llvm::VectorType::get(Builder.getInt1Ty(),
890                           Mask->getType()->getIntegerBitWidth());
891   Mask = Builder.CreateBitCast(Mask, MaskTy);
892   Mask = Builder.CreateExtractElement(Mask, (uint64_t)0);
893   return Builder.CreateSelect(Mask, Op0, Op1);
894 }
895 
896 // Handle autoupgrade for masked PALIGNR and VALIGND/Q intrinsics.
897 // PALIGNR handles large immediates by shifting while VALIGN masks the immediate
898 // so we need to handle both cases. VALIGN also doesn't have 128-bit lanes.
899 static Value *UpgradeX86ALIGNIntrinsics(IRBuilder<> &Builder, Value *Op0,
900                                         Value *Op1, Value *Shift,
901                                         Value *Passthru, Value *Mask,
902                                         bool IsVALIGN) {
903   unsigned ShiftVal = cast<llvm::ConstantInt>(Shift)->getZExtValue();
904 
905   unsigned NumElts = Op0->getType()->getVectorNumElements();
906   assert((IsVALIGN || NumElts % 16 == 0) && "Illegal NumElts for PALIGNR!");
907   assert((!IsVALIGN || NumElts <= 16) && "NumElts too large for VALIGN!");
908   assert(isPowerOf2_32(NumElts) && "NumElts not a power of 2!");
909 
910   // Mask the immediate for VALIGN.
911   if (IsVALIGN)
912     ShiftVal &= (NumElts - 1);
913 
914   // If palignr is shifting the pair of vectors more than the size of two
915   // lanes, emit zero.
916   if (ShiftVal >= 32)
917     return llvm::Constant::getNullValue(Op0->getType());
918 
919   // If palignr is shifting the pair of input vectors more than one lane,
920   // but less than two lanes, convert to shifting in zeroes.
921   if (ShiftVal > 16) {
922     ShiftVal -= 16;
923     Op1 = Op0;
924     Op0 = llvm::Constant::getNullValue(Op0->getType());
925   }
926 
927   uint32_t Indices[64];
928   // 256-bit palignr operates on 128-bit lanes so we need to handle that
929   for (unsigned l = 0; l < NumElts; l += 16) {
930     for (unsigned i = 0; i != 16; ++i) {
931       unsigned Idx = ShiftVal + i;
932       if (!IsVALIGN && Idx >= 16) // Disable wrap for VALIGN.
933         Idx += NumElts - 16; // End of lane, switch operand.
934       Indices[l + i] = Idx + l;
935     }
936   }
937 
938   Value *Align = Builder.CreateShuffleVector(Op1, Op0,
939                                              makeArrayRef(Indices, NumElts),
940                                              "palignr");
941 
942   return EmitX86Select(Builder, Mask, Align, Passthru);
943 }
944 
945 static Value *UpgradeX86AddSubSatIntrinsics(IRBuilder<> &Builder, CallInst &CI,
946                                             bool IsAddition) {
947   Value *Op0 = CI.getOperand(0);
948   Value *Op1 = CI.getOperand(1);
949 
950   // Collect vector elements and type data.
951   Type *ResultType = CI.getType();
952 
953   Value *Res;
954   if (IsAddition) {
955     // ADDUS: a > (a+b) ? ~0 : (a+b)
956     // If Op0 > Add, overflow occured.
957     Value *Add = Builder.CreateAdd(Op0, Op1);
958     Value *ICmp = Builder.CreateICmp(ICmpInst::ICMP_UGT, Op0, Add);
959     Value *Max = llvm::Constant::getAllOnesValue(ResultType);
960     Res = Builder.CreateSelect(ICmp, Max, Add);
961   } else {
962     // SUBUS: max(a, b) - b
963     Value *ICmp = Builder.CreateICmp(ICmpInst::ICMP_UGT, Op0, Op1);
964     Value *Select = Builder.CreateSelect(ICmp, Op0, Op1);
965     Res = Builder.CreateSub(Select, Op1);
966   }
967 
968   if (CI.getNumArgOperands() == 4) { // For masked intrinsics.
969     Value *VecSrc = CI.getOperand(2);
970     Value *Mask = CI.getOperand(3);
971     Res = EmitX86Select(Builder, Mask, Res, VecSrc);
972   }
973   return Res;
974 }
975 
976 static Value *UpgradeMaskedStore(IRBuilder<> &Builder,
977                                  Value *Ptr, Value *Data, Value *Mask,
978                                  bool Aligned) {
979   // Cast the pointer to the right type.
980   Ptr = Builder.CreateBitCast(Ptr,
981                               llvm::PointerType::getUnqual(Data->getType()));
982   unsigned Align =
983     Aligned ? cast<VectorType>(Data->getType())->getBitWidth() / 8 : 1;
984 
985   // If the mask is all ones just emit a regular store.
986   if (const auto *C = dyn_cast<Constant>(Mask))
987     if (C->isAllOnesValue())
988       return Builder.CreateAlignedStore(Data, Ptr, Align);
989 
990   // Convert the mask from an integer type to a vector of i1.
991   unsigned NumElts = Data->getType()->getVectorNumElements();
992   Mask = getX86MaskVec(Builder, Mask, NumElts);
993   return Builder.CreateMaskedStore(Data, Ptr, Align, Mask);
994 }
995 
996 static Value *UpgradeMaskedLoad(IRBuilder<> &Builder,
997                                 Value *Ptr, Value *Passthru, Value *Mask,
998                                 bool Aligned) {
999   // Cast the pointer to the right type.
1000   Ptr = Builder.CreateBitCast(Ptr,
1001                              llvm::PointerType::getUnqual(Passthru->getType()));
1002   unsigned Align =
1003     Aligned ? cast<VectorType>(Passthru->getType())->getBitWidth() / 8 : 1;
1004 
1005   // If the mask is all ones just emit a regular store.
1006   if (const auto *C = dyn_cast<Constant>(Mask))
1007     if (C->isAllOnesValue())
1008       return Builder.CreateAlignedLoad(Ptr, Align);
1009 
1010   // Convert the mask from an integer type to a vector of i1.
1011   unsigned NumElts = Passthru->getType()->getVectorNumElements();
1012   Mask = getX86MaskVec(Builder, Mask, NumElts);
1013   return Builder.CreateMaskedLoad(Ptr, Align, Mask, Passthru);
1014 }
1015 
1016 static Value *upgradeAbs(IRBuilder<> &Builder, CallInst &CI) {
1017   Value *Op0 = CI.getArgOperand(0);
1018   llvm::Type *Ty = Op0->getType();
1019   Value *Zero = llvm::Constant::getNullValue(Ty);
1020   Value *Cmp = Builder.CreateICmp(ICmpInst::ICMP_SGT, Op0, Zero);
1021   Value *Neg = Builder.CreateNeg(Op0);
1022   Value *Res = Builder.CreateSelect(Cmp, Op0, Neg);
1023 
1024   if (CI.getNumArgOperands() == 3)
1025     Res = EmitX86Select(Builder,CI.getArgOperand(2), Res, CI.getArgOperand(1));
1026 
1027   return Res;
1028 }
1029 
1030 static Value *upgradeIntMinMax(IRBuilder<> &Builder, CallInst &CI,
1031                                ICmpInst::Predicate Pred) {
1032   Value *Op0 = CI.getArgOperand(0);
1033   Value *Op1 = CI.getArgOperand(1);
1034   Value *Cmp = Builder.CreateICmp(Pred, Op0, Op1);
1035   Value *Res = Builder.CreateSelect(Cmp, Op0, Op1);
1036 
1037   if (CI.getNumArgOperands() == 4)
1038     Res = EmitX86Select(Builder, CI.getArgOperand(3), Res, CI.getArgOperand(2));
1039 
1040   return Res;
1041 }
1042 
1043 static Value *upgradePMULDQ(IRBuilder<> &Builder, CallInst &CI, bool IsSigned) {
1044   Type *Ty = CI.getType();
1045 
1046   // Arguments have a vXi32 type so cast to vXi64.
1047   Value *LHS = Builder.CreateBitCast(CI.getArgOperand(0), Ty);
1048   Value *RHS = Builder.CreateBitCast(CI.getArgOperand(1), Ty);
1049 
1050   if (IsSigned) {
1051     // Shift left then arithmetic shift right.
1052     Constant *ShiftAmt = ConstantInt::get(Ty, 32);
1053     LHS = Builder.CreateShl(LHS, ShiftAmt);
1054     LHS = Builder.CreateAShr(LHS, ShiftAmt);
1055     RHS = Builder.CreateShl(RHS, ShiftAmt);
1056     RHS = Builder.CreateAShr(RHS, ShiftAmt);
1057   } else {
1058     // Clear the upper bits.
1059     Constant *Mask = ConstantInt::get(Ty, 0xffffffff);
1060     LHS = Builder.CreateAnd(LHS, Mask);
1061     RHS = Builder.CreateAnd(RHS, Mask);
1062   }
1063 
1064   Value *Res = Builder.CreateMul(LHS, RHS);
1065 
1066   if (CI.getNumArgOperands() == 4)
1067     Res = EmitX86Select(Builder, CI.getArgOperand(3), Res, CI.getArgOperand(2));
1068 
1069   return Res;
1070 }
1071 
1072 // Applying mask on vector of i1's and make sure result is at least 8 bits wide.
1073 static Value *ApplyX86MaskOn1BitsVec(IRBuilder<> &Builder, Value *Vec,
1074                                      Value *Mask) {
1075   unsigned NumElts = Vec->getType()->getVectorNumElements();
1076   if (Mask) {
1077     const auto *C = dyn_cast<Constant>(Mask);
1078     if (!C || !C->isAllOnesValue())
1079       Vec = Builder.CreateAnd(Vec, getX86MaskVec(Builder, Mask, NumElts));
1080   }
1081 
1082   if (NumElts < 8) {
1083     uint32_t Indices[8];
1084     for (unsigned i = 0; i != NumElts; ++i)
1085       Indices[i] = i;
1086     for (unsigned i = NumElts; i != 8; ++i)
1087       Indices[i] = NumElts + i % NumElts;
1088     Vec = Builder.CreateShuffleVector(Vec,
1089                                       Constant::getNullValue(Vec->getType()),
1090                                       Indices);
1091   }
1092   return Builder.CreateBitCast(Vec, Builder.getIntNTy(std::max(NumElts, 8U)));
1093 }
1094 
1095 static Value *upgradeMaskedCompare(IRBuilder<> &Builder, CallInst &CI,
1096                                    unsigned CC, bool Signed) {
1097   Value *Op0 = CI.getArgOperand(0);
1098   unsigned NumElts = Op0->getType()->getVectorNumElements();
1099 
1100   Value *Cmp;
1101   if (CC == 3) {
1102     Cmp = Constant::getNullValue(llvm::VectorType::get(Builder.getInt1Ty(), NumElts));
1103   } else if (CC == 7) {
1104     Cmp = Constant::getAllOnesValue(llvm::VectorType::get(Builder.getInt1Ty(), NumElts));
1105   } else {
1106     ICmpInst::Predicate Pred;
1107     switch (CC) {
1108     default: llvm_unreachable("Unknown condition code");
1109     case 0: Pred = ICmpInst::ICMP_EQ;  break;
1110     case 1: Pred = Signed ? ICmpInst::ICMP_SLT : ICmpInst::ICMP_ULT; break;
1111     case 2: Pred = Signed ? ICmpInst::ICMP_SLE : ICmpInst::ICMP_ULE; break;
1112     case 4: Pred = ICmpInst::ICMP_NE;  break;
1113     case 5: Pred = Signed ? ICmpInst::ICMP_SGE : ICmpInst::ICMP_UGE; break;
1114     case 6: Pred = Signed ? ICmpInst::ICMP_SGT : ICmpInst::ICMP_UGT; break;
1115     }
1116     Cmp = Builder.CreateICmp(Pred, Op0, CI.getArgOperand(1));
1117   }
1118 
1119   Value *Mask = CI.getArgOperand(CI.getNumArgOperands() - 1);
1120 
1121   return ApplyX86MaskOn1BitsVec(Builder, Cmp, Mask);
1122 }
1123 
1124 // Replace a masked intrinsic with an older unmasked intrinsic.
1125 static Value *UpgradeX86MaskedShift(IRBuilder<> &Builder, CallInst &CI,
1126                                     Intrinsic::ID IID) {
1127   Function *Intrin = Intrinsic::getDeclaration(CI.getModule(), IID);
1128   Value *Rep = Builder.CreateCall(Intrin,
1129                                  { CI.getArgOperand(0), CI.getArgOperand(1) });
1130   return EmitX86Select(Builder, CI.getArgOperand(3), Rep, CI.getArgOperand(2));
1131 }
1132 
1133 static Value* upgradeMaskedMove(IRBuilder<> &Builder, CallInst &CI) {
1134   Value* A = CI.getArgOperand(0);
1135   Value* B = CI.getArgOperand(1);
1136   Value* Src = CI.getArgOperand(2);
1137   Value* Mask = CI.getArgOperand(3);
1138 
1139   Value* AndNode = Builder.CreateAnd(Mask, APInt(8, 1));
1140   Value* Cmp = Builder.CreateIsNotNull(AndNode);
1141   Value* Extract1 = Builder.CreateExtractElement(B, (uint64_t)0);
1142   Value* Extract2 = Builder.CreateExtractElement(Src, (uint64_t)0);
1143   Value* Select = Builder.CreateSelect(Cmp, Extract1, Extract2);
1144   return Builder.CreateInsertElement(A, Select, (uint64_t)0);
1145 }
1146 
1147 
1148 static Value* UpgradeMaskToInt(IRBuilder<> &Builder, CallInst &CI) {
1149   Value* Op = CI.getArgOperand(0);
1150   Type* ReturnOp = CI.getType();
1151   unsigned NumElts = CI.getType()->getVectorNumElements();
1152   Value *Mask = getX86MaskVec(Builder, Op, NumElts);
1153   return Builder.CreateSExt(Mask, ReturnOp, "vpmovm2");
1154 }
1155 
1156 // Replace intrinsic with unmasked version and a select.
1157 static bool upgradeAVX512MaskToSelect(StringRef Name, IRBuilder<> &Builder,
1158                                       CallInst &CI, Value *&Rep) {
1159   Name = Name.substr(12); // Remove avx512.mask.
1160 
1161   unsigned VecWidth = CI.getType()->getPrimitiveSizeInBits();
1162   unsigned EltWidth = CI.getType()->getScalarSizeInBits();
1163   Intrinsic::ID IID;
1164   if (Name.startswith("max.p")) {
1165     if (VecWidth == 128 && EltWidth == 32)
1166       IID = Intrinsic::x86_sse_max_ps;
1167     else if (VecWidth == 128 && EltWidth == 64)
1168       IID = Intrinsic::x86_sse2_max_pd;
1169     else if (VecWidth == 256 && EltWidth == 32)
1170       IID = Intrinsic::x86_avx_max_ps_256;
1171     else if (VecWidth == 256 && EltWidth == 64)
1172       IID = Intrinsic::x86_avx_max_pd_256;
1173     else
1174       llvm_unreachable("Unexpected intrinsic");
1175   } else if (Name.startswith("min.p")) {
1176     if (VecWidth == 128 && EltWidth == 32)
1177       IID = Intrinsic::x86_sse_min_ps;
1178     else if (VecWidth == 128 && EltWidth == 64)
1179       IID = Intrinsic::x86_sse2_min_pd;
1180     else if (VecWidth == 256 && EltWidth == 32)
1181       IID = Intrinsic::x86_avx_min_ps_256;
1182     else if (VecWidth == 256 && EltWidth == 64)
1183       IID = Intrinsic::x86_avx_min_pd_256;
1184     else
1185       llvm_unreachable("Unexpected intrinsic");
1186   } else if (Name.startswith("pshuf.b.")) {
1187     if (VecWidth == 128)
1188       IID = Intrinsic::x86_ssse3_pshuf_b_128;
1189     else if (VecWidth == 256)
1190       IID = Intrinsic::x86_avx2_pshuf_b;
1191     else if (VecWidth == 512)
1192       IID = Intrinsic::x86_avx512_pshuf_b_512;
1193     else
1194       llvm_unreachable("Unexpected intrinsic");
1195   } else if (Name.startswith("pmul.hr.sw.")) {
1196     if (VecWidth == 128)
1197       IID = Intrinsic::x86_ssse3_pmul_hr_sw_128;
1198     else if (VecWidth == 256)
1199       IID = Intrinsic::x86_avx2_pmul_hr_sw;
1200     else if (VecWidth == 512)
1201       IID = Intrinsic::x86_avx512_pmul_hr_sw_512;
1202     else
1203       llvm_unreachable("Unexpected intrinsic");
1204   } else if (Name.startswith("pmulh.w.")) {
1205     if (VecWidth == 128)
1206       IID = Intrinsic::x86_sse2_pmulh_w;
1207     else if (VecWidth == 256)
1208       IID = Intrinsic::x86_avx2_pmulh_w;
1209     else if (VecWidth == 512)
1210       IID = Intrinsic::x86_avx512_pmulh_w_512;
1211     else
1212       llvm_unreachable("Unexpected intrinsic");
1213   } else if (Name.startswith("pmulhu.w.")) {
1214     if (VecWidth == 128)
1215       IID = Intrinsic::x86_sse2_pmulhu_w;
1216     else if (VecWidth == 256)
1217       IID = Intrinsic::x86_avx2_pmulhu_w;
1218     else if (VecWidth == 512)
1219       IID = Intrinsic::x86_avx512_pmulhu_w_512;
1220     else
1221       llvm_unreachable("Unexpected intrinsic");
1222   } else if (Name.startswith("pmaddw.d.")) {
1223     if (VecWidth == 128)
1224       IID = Intrinsic::x86_sse2_pmadd_wd;
1225     else if (VecWidth == 256)
1226       IID = Intrinsic::x86_avx2_pmadd_wd;
1227     else if (VecWidth == 512)
1228       IID = Intrinsic::x86_avx512_pmaddw_d_512;
1229     else
1230       llvm_unreachable("Unexpected intrinsic");
1231   } else if (Name.startswith("pmaddubs.w.")) {
1232     if (VecWidth == 128)
1233       IID = Intrinsic::x86_ssse3_pmadd_ub_sw_128;
1234     else if (VecWidth == 256)
1235       IID = Intrinsic::x86_avx2_pmadd_ub_sw;
1236     else if (VecWidth == 512)
1237       IID = Intrinsic::x86_avx512_pmaddubs_w_512;
1238     else
1239       llvm_unreachable("Unexpected intrinsic");
1240   } else if (Name.startswith("packsswb.")) {
1241     if (VecWidth == 128)
1242       IID = Intrinsic::x86_sse2_packsswb_128;
1243     else if (VecWidth == 256)
1244       IID = Intrinsic::x86_avx2_packsswb;
1245     else if (VecWidth == 512)
1246       IID = Intrinsic::x86_avx512_packsswb_512;
1247     else
1248       llvm_unreachable("Unexpected intrinsic");
1249   } else if (Name.startswith("packssdw.")) {
1250     if (VecWidth == 128)
1251       IID = Intrinsic::x86_sse2_packssdw_128;
1252     else if (VecWidth == 256)
1253       IID = Intrinsic::x86_avx2_packssdw;
1254     else if (VecWidth == 512)
1255       IID = Intrinsic::x86_avx512_packssdw_512;
1256     else
1257       llvm_unreachable("Unexpected intrinsic");
1258   } else if (Name.startswith("packuswb.")) {
1259     if (VecWidth == 128)
1260       IID = Intrinsic::x86_sse2_packuswb_128;
1261     else if (VecWidth == 256)
1262       IID = Intrinsic::x86_avx2_packuswb;
1263     else if (VecWidth == 512)
1264       IID = Intrinsic::x86_avx512_packuswb_512;
1265     else
1266       llvm_unreachable("Unexpected intrinsic");
1267   } else if (Name.startswith("packusdw.")) {
1268     if (VecWidth == 128)
1269       IID = Intrinsic::x86_sse41_packusdw;
1270     else if (VecWidth == 256)
1271       IID = Intrinsic::x86_avx2_packusdw;
1272     else if (VecWidth == 512)
1273       IID = Intrinsic::x86_avx512_packusdw_512;
1274     else
1275       llvm_unreachable("Unexpected intrinsic");
1276   } else if (Name.startswith("vpermilvar.")) {
1277     if (VecWidth == 128 && EltWidth == 32)
1278       IID = Intrinsic::x86_avx_vpermilvar_ps;
1279     else if (VecWidth == 128 && EltWidth == 64)
1280       IID = Intrinsic::x86_avx_vpermilvar_pd;
1281     else if (VecWidth == 256 && EltWidth == 32)
1282       IID = Intrinsic::x86_avx_vpermilvar_ps_256;
1283     else if (VecWidth == 256 && EltWidth == 64)
1284       IID = Intrinsic::x86_avx_vpermilvar_pd_256;
1285     else if (VecWidth == 512 && EltWidth == 32)
1286       IID = Intrinsic::x86_avx512_vpermilvar_ps_512;
1287     else if (VecWidth == 512 && EltWidth == 64)
1288       IID = Intrinsic::x86_avx512_vpermilvar_pd_512;
1289     else
1290       llvm_unreachable("Unexpected intrinsic");
1291   } else if (Name == "cvtpd2dq.256") {
1292     IID = Intrinsic::x86_avx_cvt_pd2dq_256;
1293   } else if (Name == "cvtpd2ps.256") {
1294     IID = Intrinsic::x86_avx_cvt_pd2_ps_256;
1295   } else if (Name == "cvttpd2dq.256") {
1296     IID = Intrinsic::x86_avx_cvtt_pd2dq_256;
1297   } else if (Name == "cvttps2dq.128") {
1298     IID = Intrinsic::x86_sse2_cvttps2dq;
1299   } else if (Name == "cvttps2dq.256") {
1300     IID = Intrinsic::x86_avx_cvtt_ps2dq_256;
1301   } else if (Name.startswith("permvar.")) {
1302     bool IsFloat = CI.getType()->isFPOrFPVectorTy();
1303     if (VecWidth == 256 && EltWidth == 32 && IsFloat)
1304       IID = Intrinsic::x86_avx2_permps;
1305     else if (VecWidth == 256 && EltWidth == 32 && !IsFloat)
1306       IID = Intrinsic::x86_avx2_permd;
1307     else if (VecWidth == 256 && EltWidth == 64 && IsFloat)
1308       IID = Intrinsic::x86_avx512_permvar_df_256;
1309     else if (VecWidth == 256 && EltWidth == 64 && !IsFloat)
1310       IID = Intrinsic::x86_avx512_permvar_di_256;
1311     else if (VecWidth == 512 && EltWidth == 32 && IsFloat)
1312       IID = Intrinsic::x86_avx512_permvar_sf_512;
1313     else if (VecWidth == 512 && EltWidth == 32 && !IsFloat)
1314       IID = Intrinsic::x86_avx512_permvar_si_512;
1315     else if (VecWidth == 512 && EltWidth == 64 && IsFloat)
1316       IID = Intrinsic::x86_avx512_permvar_df_512;
1317     else if (VecWidth == 512 && EltWidth == 64 && !IsFloat)
1318       IID = Intrinsic::x86_avx512_permvar_di_512;
1319     else if (VecWidth == 128 && EltWidth == 16)
1320       IID = Intrinsic::x86_avx512_permvar_hi_128;
1321     else if (VecWidth == 256 && EltWidth == 16)
1322       IID = Intrinsic::x86_avx512_permvar_hi_256;
1323     else if (VecWidth == 512 && EltWidth == 16)
1324       IID = Intrinsic::x86_avx512_permvar_hi_512;
1325     else if (VecWidth == 128 && EltWidth == 8)
1326       IID = Intrinsic::x86_avx512_permvar_qi_128;
1327     else if (VecWidth == 256 && EltWidth == 8)
1328       IID = Intrinsic::x86_avx512_permvar_qi_256;
1329     else if (VecWidth == 512 && EltWidth == 8)
1330       IID = Intrinsic::x86_avx512_permvar_qi_512;
1331     else
1332       llvm_unreachable("Unexpected intrinsic");
1333   } else if (Name.startswith("dbpsadbw.")) {
1334     if (VecWidth == 128)
1335       IID = Intrinsic::x86_avx512_dbpsadbw_128;
1336     else if (VecWidth == 256)
1337       IID = Intrinsic::x86_avx512_dbpsadbw_256;
1338     else if (VecWidth == 512)
1339       IID = Intrinsic::x86_avx512_dbpsadbw_512;
1340     else
1341       llvm_unreachable("Unexpected intrinsic");
1342   } else if (Name.startswith("vpshld.")) {
1343     if (VecWidth == 128 && Name[7] == 'q')
1344       IID = Intrinsic::x86_avx512_vpshld_q_128;
1345     else if (VecWidth == 128 && Name[7] == 'd')
1346       IID = Intrinsic::x86_avx512_vpshld_d_128;
1347     else if (VecWidth == 128 && Name[7] == 'w')
1348       IID = Intrinsic::x86_avx512_vpshld_w_128;
1349     else if (VecWidth == 256 && Name[7] == 'q')
1350       IID = Intrinsic::x86_avx512_vpshld_q_256;
1351     else if (VecWidth == 256 && Name[7] == 'd')
1352       IID = Intrinsic::x86_avx512_vpshld_d_256;
1353     else if (VecWidth == 256 && Name[7] == 'w')
1354       IID = Intrinsic::x86_avx512_vpshld_w_256;
1355     else if (VecWidth == 512 && Name[7] == 'q')
1356       IID = Intrinsic::x86_avx512_vpshld_q_512;
1357     else if (VecWidth == 512 && Name[7] == 'd')
1358       IID = Intrinsic::x86_avx512_vpshld_d_512;
1359     else if (VecWidth == 512 && Name[7] == 'w')
1360       IID = Intrinsic::x86_avx512_vpshld_w_512;
1361     else
1362       llvm_unreachable("Unexpected intrinsic");
1363   } else if (Name.startswith("vpshrd.")) {
1364     if (VecWidth == 128 && Name[7] == 'q')
1365       IID = Intrinsic::x86_avx512_vpshrd_q_128;
1366     else if (VecWidth == 128 && Name[7] == 'd')
1367       IID = Intrinsic::x86_avx512_vpshrd_d_128;
1368     else if (VecWidth == 128 && Name[7] == 'w')
1369       IID = Intrinsic::x86_avx512_vpshrd_w_128;
1370     else if (VecWidth == 256 && Name[7] == 'q')
1371       IID = Intrinsic::x86_avx512_vpshrd_q_256;
1372     else if (VecWidth == 256 && Name[7] == 'd')
1373       IID = Intrinsic::x86_avx512_vpshrd_d_256;
1374     else if (VecWidth == 256 && Name[7] == 'w')
1375       IID = Intrinsic::x86_avx512_vpshrd_w_256;
1376     else if (VecWidth == 512 && Name[7] == 'q')
1377       IID = Intrinsic::x86_avx512_vpshrd_q_512;
1378     else if (VecWidth == 512 && Name[7] == 'd')
1379       IID = Intrinsic::x86_avx512_vpshrd_d_512;
1380     else if (VecWidth == 512 && Name[7] == 'w')
1381       IID = Intrinsic::x86_avx512_vpshrd_w_512;
1382     else
1383       llvm_unreachable("Unexpected intrinsic");
1384   } else if (Name.startswith("prorv.")) {
1385     if (VecWidth == 128 && EltWidth == 32)
1386       IID = Intrinsic::x86_avx512_prorv_d_128;
1387     else if (VecWidth == 256 && EltWidth == 32)
1388       IID = Intrinsic::x86_avx512_prorv_d_256;
1389     else if (VecWidth == 512 && EltWidth == 32)
1390       IID = Intrinsic::x86_avx512_prorv_d_512;
1391     else if (VecWidth == 128 && EltWidth == 64)
1392       IID = Intrinsic::x86_avx512_prorv_q_128;
1393     else if (VecWidth == 256 && EltWidth == 64)
1394       IID = Intrinsic::x86_avx512_prorv_q_256;
1395     else if (VecWidth == 512 && EltWidth == 64)
1396       IID = Intrinsic::x86_avx512_prorv_q_512;
1397     else
1398       llvm_unreachable("Unexpected intrinsic");
1399   } else if (Name.startswith("prolv.")) {
1400     if (VecWidth == 128 && EltWidth == 32)
1401       IID = Intrinsic::x86_avx512_prolv_d_128;
1402     else if (VecWidth == 256 && EltWidth == 32)
1403       IID = Intrinsic::x86_avx512_prolv_d_256;
1404     else if (VecWidth == 512 && EltWidth == 32)
1405       IID = Intrinsic::x86_avx512_prolv_d_512;
1406     else if (VecWidth == 128 && EltWidth == 64)
1407       IID = Intrinsic::x86_avx512_prolv_q_128;
1408     else if (VecWidth == 256 && EltWidth == 64)
1409       IID = Intrinsic::x86_avx512_prolv_q_256;
1410     else if (VecWidth == 512 && EltWidth == 64)
1411       IID = Intrinsic::x86_avx512_prolv_q_512;
1412     else
1413       llvm_unreachable("Unexpected intrinsic");
1414   } else if (Name.startswith("pror.")) {
1415     if (VecWidth == 128 && EltWidth == 32)
1416       IID = Intrinsic::x86_avx512_pror_d_128;
1417     else if (VecWidth == 256 && EltWidth == 32)
1418       IID = Intrinsic::x86_avx512_pror_d_256;
1419     else if (VecWidth == 512 && EltWidth == 32)
1420       IID = Intrinsic::x86_avx512_pror_d_512;
1421     else if (VecWidth == 128 && EltWidth == 64)
1422       IID = Intrinsic::x86_avx512_pror_q_128;
1423     else if (VecWidth == 256 && EltWidth == 64)
1424       IID = Intrinsic::x86_avx512_pror_q_256;
1425     else if (VecWidth == 512 && EltWidth == 64)
1426       IID = Intrinsic::x86_avx512_pror_q_512;
1427     else
1428       llvm_unreachable("Unexpected intrinsic");
1429   } else if (Name.startswith("prol.")) {
1430     if (VecWidth == 128 && EltWidth == 32)
1431       IID = Intrinsic::x86_avx512_prol_d_128;
1432     else if (VecWidth == 256 && EltWidth == 32)
1433       IID = Intrinsic::x86_avx512_prol_d_256;
1434     else if (VecWidth == 512 && EltWidth == 32)
1435       IID = Intrinsic::x86_avx512_prol_d_512;
1436     else if (VecWidth == 128 && EltWidth == 64)
1437       IID = Intrinsic::x86_avx512_prol_q_128;
1438     else if (VecWidth == 256 && EltWidth == 64)
1439       IID = Intrinsic::x86_avx512_prol_q_256;
1440     else if (VecWidth == 512 && EltWidth == 64)
1441       IID = Intrinsic::x86_avx512_prol_q_512;
1442     else
1443       llvm_unreachable("Unexpected intrinsic");
1444   } else if (Name.startswith("padds.")) {
1445     if (VecWidth == 128 && EltWidth == 8)
1446       IID = Intrinsic::x86_sse2_padds_b;
1447     else if (VecWidth == 256 && EltWidth == 8)
1448       IID = Intrinsic::x86_avx2_padds_b;
1449     else if (VecWidth == 512 && EltWidth == 8)
1450       IID = Intrinsic::x86_avx512_padds_b_512;
1451     else if (VecWidth == 128 && EltWidth == 16)
1452       IID = Intrinsic::x86_sse2_padds_w;
1453     else if (VecWidth == 256 && EltWidth == 16)
1454       IID = Intrinsic::x86_avx2_padds_w;
1455     else if (VecWidth == 512 && EltWidth == 16)
1456       IID = Intrinsic::x86_avx512_padds_w_512;
1457     else
1458       llvm_unreachable("Unexpected intrinsic");
1459   } else if (Name.startswith("psubs.")) {
1460     if (VecWidth == 128 && EltWidth == 8)
1461       IID = Intrinsic::x86_sse2_psubs_b;
1462     else if (VecWidth == 256 && EltWidth == 8)
1463       IID = Intrinsic::x86_avx2_psubs_b;
1464     else if (VecWidth == 512 && EltWidth == 8)
1465       IID = Intrinsic::x86_avx512_psubs_b_512;
1466     else if (VecWidth == 128 && EltWidth == 16)
1467       IID = Intrinsic::x86_sse2_psubs_w;
1468     else if (VecWidth == 256 && EltWidth == 16)
1469       IID = Intrinsic::x86_avx2_psubs_w;
1470     else if (VecWidth == 512 && EltWidth == 16)
1471       IID = Intrinsic::x86_avx512_psubs_w_512;
1472     else
1473       llvm_unreachable("Unexpected intrinsic");
1474   } else
1475     return false;
1476 
1477   SmallVector<Value *, 4> Args(CI.arg_operands().begin(),
1478                                CI.arg_operands().end());
1479   Args.pop_back();
1480   Args.pop_back();
1481   Rep = Builder.CreateCall(Intrinsic::getDeclaration(CI.getModule(), IID),
1482                            Args);
1483   unsigned NumArgs = CI.getNumArgOperands();
1484   Rep = EmitX86Select(Builder, CI.getArgOperand(NumArgs - 1), Rep,
1485                       CI.getArgOperand(NumArgs - 2));
1486   return true;
1487 }
1488 
1489 /// Upgrade comment in call to inline asm that represents an objc retain release
1490 /// marker.
1491 void llvm::UpgradeInlineAsmString(std::string *AsmStr) {
1492   size_t Pos;
1493   if (AsmStr->find("mov\tfp") == 0 &&
1494       AsmStr->find("objc_retainAutoreleaseReturnValue") != std::string::npos &&
1495       (Pos = AsmStr->find("# marker")) != std::string::npos) {
1496     AsmStr->replace(Pos, 1, ";");
1497   }
1498   return;
1499 }
1500 
1501 /// Upgrade a call to an old intrinsic. All argument and return casting must be
1502 /// provided to seamlessly integrate with existing context.
1503 void llvm::UpgradeIntrinsicCall(CallInst *CI, Function *NewFn) {
1504   Function *F = CI->getCalledFunction();
1505   LLVMContext &C = CI->getContext();
1506   IRBuilder<> Builder(C);
1507   Builder.SetInsertPoint(CI->getParent(), CI->getIterator());
1508 
1509   assert(F && "Intrinsic call is not direct?");
1510 
1511   if (!NewFn) {
1512     // Get the Function's name.
1513     StringRef Name = F->getName();
1514 
1515     assert(Name.startswith("llvm.") && "Intrinsic doesn't start with 'llvm.'");
1516     Name = Name.substr(5);
1517 
1518     bool IsX86 = Name.startswith("x86.");
1519     if (IsX86)
1520       Name = Name.substr(4);
1521     bool IsNVVM = Name.startswith("nvvm.");
1522     if (IsNVVM)
1523       Name = Name.substr(5);
1524 
1525     if (IsX86 && Name.startswith("sse4a.movnt.")) {
1526       Module *M = F->getParent();
1527       SmallVector<Metadata *, 1> Elts;
1528       Elts.push_back(
1529           ConstantAsMetadata::get(ConstantInt::get(Type::getInt32Ty(C), 1)));
1530       MDNode *Node = MDNode::get(C, Elts);
1531 
1532       Value *Arg0 = CI->getArgOperand(0);
1533       Value *Arg1 = CI->getArgOperand(1);
1534 
1535       // Nontemporal (unaligned) store of the 0'th element of the float/double
1536       // vector.
1537       Type *SrcEltTy = cast<VectorType>(Arg1->getType())->getElementType();
1538       PointerType *EltPtrTy = PointerType::getUnqual(SrcEltTy);
1539       Value *Addr = Builder.CreateBitCast(Arg0, EltPtrTy, "cast");
1540       Value *Extract =
1541           Builder.CreateExtractElement(Arg1, (uint64_t)0, "extractelement");
1542 
1543       StoreInst *SI = Builder.CreateAlignedStore(Extract, Addr, 1);
1544       SI->setMetadata(M->getMDKindID("nontemporal"), Node);
1545 
1546       // Remove intrinsic.
1547       CI->eraseFromParent();
1548       return;
1549     }
1550 
1551     if (IsX86 && (Name.startswith("avx.movnt.") ||
1552                   Name.startswith("avx512.storent."))) {
1553       Module *M = F->getParent();
1554       SmallVector<Metadata *, 1> Elts;
1555       Elts.push_back(
1556           ConstantAsMetadata::get(ConstantInt::get(Type::getInt32Ty(C), 1)));
1557       MDNode *Node = MDNode::get(C, Elts);
1558 
1559       Value *Arg0 = CI->getArgOperand(0);
1560       Value *Arg1 = CI->getArgOperand(1);
1561 
1562       // Convert the type of the pointer to a pointer to the stored type.
1563       Value *BC = Builder.CreateBitCast(Arg0,
1564                                         PointerType::getUnqual(Arg1->getType()),
1565                                         "cast");
1566       VectorType *VTy = cast<VectorType>(Arg1->getType());
1567       StoreInst *SI = Builder.CreateAlignedStore(Arg1, BC,
1568                                                  VTy->getBitWidth() / 8);
1569       SI->setMetadata(M->getMDKindID("nontemporal"), Node);
1570 
1571       // Remove intrinsic.
1572       CI->eraseFromParent();
1573       return;
1574     }
1575 
1576     if (IsX86 && Name == "sse2.storel.dq") {
1577       Value *Arg0 = CI->getArgOperand(0);
1578       Value *Arg1 = CI->getArgOperand(1);
1579 
1580       Type *NewVecTy = VectorType::get(Type::getInt64Ty(C), 2);
1581       Value *BC0 = Builder.CreateBitCast(Arg1, NewVecTy, "cast");
1582       Value *Elt = Builder.CreateExtractElement(BC0, (uint64_t)0);
1583       Value *BC = Builder.CreateBitCast(Arg0,
1584                                         PointerType::getUnqual(Elt->getType()),
1585                                         "cast");
1586       Builder.CreateAlignedStore(Elt, BC, 1);
1587 
1588       // Remove intrinsic.
1589       CI->eraseFromParent();
1590       return;
1591     }
1592 
1593     if (IsX86 && (Name.startswith("sse.storeu.") ||
1594                   Name.startswith("sse2.storeu.") ||
1595                   Name.startswith("avx.storeu."))) {
1596       Value *Arg0 = CI->getArgOperand(0);
1597       Value *Arg1 = CI->getArgOperand(1);
1598 
1599       Arg0 = Builder.CreateBitCast(Arg0,
1600                                    PointerType::getUnqual(Arg1->getType()),
1601                                    "cast");
1602       Builder.CreateAlignedStore(Arg1, Arg0, 1);
1603 
1604       // Remove intrinsic.
1605       CI->eraseFromParent();
1606       return;
1607     }
1608 
1609     if (IsX86 && Name == "avx512.mask.store.ss") {
1610       Value *Mask = Builder.CreateAnd(CI->getArgOperand(2), Builder.getInt8(1));
1611       UpgradeMaskedStore(Builder, CI->getArgOperand(0), CI->getArgOperand(1),
1612                          Mask, false);
1613 
1614       // Remove intrinsic.
1615       CI->eraseFromParent();
1616       return;
1617     }
1618 
1619     if (IsX86 && (Name.startswith("avx512.mask.store"))) {
1620       // "avx512.mask.storeu." or "avx512.mask.store."
1621       bool Aligned = Name[17] != 'u'; // "avx512.mask.storeu".
1622       UpgradeMaskedStore(Builder, CI->getArgOperand(0), CI->getArgOperand(1),
1623                          CI->getArgOperand(2), Aligned);
1624 
1625       // Remove intrinsic.
1626       CI->eraseFromParent();
1627       return;
1628     }
1629 
1630     Value *Rep;
1631     // Upgrade packed integer vector compare intrinsics to compare instructions.
1632     if (IsX86 && (Name.startswith("sse2.pcmp") ||
1633                   Name.startswith("avx2.pcmp"))) {
1634       // "sse2.pcpmpeq." "sse2.pcmpgt." "avx2.pcmpeq." or "avx2.pcmpgt."
1635       bool CmpEq = Name[9] == 'e';
1636       Rep = Builder.CreateICmp(CmpEq ? ICmpInst::ICMP_EQ : ICmpInst::ICMP_SGT,
1637                                CI->getArgOperand(0), CI->getArgOperand(1));
1638       Rep = Builder.CreateSExt(Rep, CI->getType(), "");
1639     } else if (IsX86 && (Name.startswith("avx512.broadcastm"))) {
1640       Type *ExtTy = Type::getInt32Ty(C);
1641       if (CI->getOperand(0)->getType()->isIntegerTy(8))
1642         ExtTy = Type::getInt64Ty(C);
1643       unsigned NumElts = CI->getType()->getPrimitiveSizeInBits() /
1644                          ExtTy->getPrimitiveSizeInBits();
1645       Rep = Builder.CreateZExt(CI->getArgOperand(0), ExtTy);
1646       Rep = Builder.CreateVectorSplat(NumElts, Rep);
1647     } else if (IsX86 && (Name == "sse.sqrt.ss" ||
1648                          Name == "sse2.sqrt.sd")) {
1649       Value *Vec = CI->getArgOperand(0);
1650       Value *Elt0 = Builder.CreateExtractElement(Vec, (uint64_t)0);
1651       Function *Intr = Intrinsic::getDeclaration(F->getParent(),
1652                                                  Intrinsic::sqrt, Elt0->getType());
1653       Elt0 = Builder.CreateCall(Intr, Elt0);
1654       Rep = Builder.CreateInsertElement(Vec, Elt0, (uint64_t)0);
1655     } else if (IsX86 && (Name.startswith("avx.sqrt.p") ||
1656                          Name.startswith("sse2.sqrt.p") ||
1657                          Name.startswith("sse.sqrt.p"))) {
1658       Rep = Builder.CreateCall(Intrinsic::getDeclaration(F->getParent(),
1659                                                          Intrinsic::sqrt,
1660                                                          CI->getType()),
1661                                {CI->getArgOperand(0)});
1662     } else if (IsX86 && (Name.startswith("avx512.mask.sqrt.p"))) {
1663       if (CI->getNumArgOperands() == 4 &&
1664           (!isa<ConstantInt>(CI->getArgOperand(3)) ||
1665            cast<ConstantInt>(CI->getArgOperand(3))->getZExtValue() != 4)) {
1666         Intrinsic::ID IID = Name[18] == 's' ? Intrinsic::x86_avx512_sqrt_ps_512
1667                                             : Intrinsic::x86_avx512_sqrt_pd_512;
1668 
1669         Value *Args[] = { CI->getArgOperand(0), CI->getArgOperand(3) };
1670         Rep = Builder.CreateCall(Intrinsic::getDeclaration(CI->getModule(),
1671                                                            IID), Args);
1672       } else {
1673         Rep = Builder.CreateCall(Intrinsic::getDeclaration(F->getParent(),
1674                                                            Intrinsic::sqrt,
1675                                                            CI->getType()),
1676                                  {CI->getArgOperand(0)});
1677       }
1678       Rep = EmitX86Select(Builder, CI->getArgOperand(2), Rep,
1679                           CI->getArgOperand(1));
1680     } else if (IsX86 && (Name.startswith("avx512.ptestm") ||
1681                          Name.startswith("avx512.ptestnm"))) {
1682       Value *Op0 = CI->getArgOperand(0);
1683       Value *Op1 = CI->getArgOperand(1);
1684       Value *Mask = CI->getArgOperand(2);
1685       Rep = Builder.CreateAnd(Op0, Op1);
1686       llvm::Type *Ty = Op0->getType();
1687       Value *Zero = llvm::Constant::getNullValue(Ty);
1688       ICmpInst::Predicate Pred =
1689         Name.startswith("avx512.ptestm") ? ICmpInst::ICMP_NE : ICmpInst::ICMP_EQ;
1690       Rep = Builder.CreateICmp(Pred, Rep, Zero);
1691       Rep = ApplyX86MaskOn1BitsVec(Builder, Rep, Mask);
1692     } else if (IsX86 && (Name.startswith("avx512.mask.pbroadcast"))){
1693       unsigned NumElts =
1694           CI->getArgOperand(1)->getType()->getVectorNumElements();
1695       Rep = Builder.CreateVectorSplat(NumElts, CI->getArgOperand(0));
1696       Rep = EmitX86Select(Builder, CI->getArgOperand(2), Rep,
1697                           CI->getArgOperand(1));
1698     } else if (IsX86 && (Name.startswith("avx512.kunpck"))) {
1699       unsigned NumElts = CI->getType()->getScalarSizeInBits();
1700       Value *LHS = getX86MaskVec(Builder, CI->getArgOperand(0), NumElts);
1701       Value *RHS = getX86MaskVec(Builder, CI->getArgOperand(1), NumElts);
1702       uint32_t Indices[64];
1703       for (unsigned i = 0; i != NumElts; ++i)
1704         Indices[i] = i;
1705 
1706       // First extract half of each vector. This gives better codegen than
1707       // doing it in a single shuffle.
1708       LHS = Builder.CreateShuffleVector(LHS, LHS,
1709                                         makeArrayRef(Indices, NumElts / 2));
1710       RHS = Builder.CreateShuffleVector(RHS, RHS,
1711                                         makeArrayRef(Indices, NumElts / 2));
1712       // Concat the vectors.
1713       // NOTE: Operands have to be swapped to match intrinsic definition.
1714       Rep = Builder.CreateShuffleVector(RHS, LHS,
1715                                         makeArrayRef(Indices, NumElts));
1716       Rep = Builder.CreateBitCast(Rep, CI->getType());
1717     } else if (IsX86 && Name == "avx512.kand.w") {
1718       Value *LHS = getX86MaskVec(Builder, CI->getArgOperand(0), 16);
1719       Value *RHS = getX86MaskVec(Builder, CI->getArgOperand(1), 16);
1720       Rep = Builder.CreateAnd(LHS, RHS);
1721       Rep = Builder.CreateBitCast(Rep, CI->getType());
1722     } else if (IsX86 && Name == "avx512.kandn.w") {
1723       Value *LHS = getX86MaskVec(Builder, CI->getArgOperand(0), 16);
1724       Value *RHS = getX86MaskVec(Builder, CI->getArgOperand(1), 16);
1725       LHS = Builder.CreateNot(LHS);
1726       Rep = Builder.CreateAnd(LHS, RHS);
1727       Rep = Builder.CreateBitCast(Rep, CI->getType());
1728     } else if (IsX86 && Name == "avx512.kor.w") {
1729       Value *LHS = getX86MaskVec(Builder, CI->getArgOperand(0), 16);
1730       Value *RHS = getX86MaskVec(Builder, CI->getArgOperand(1), 16);
1731       Rep = Builder.CreateOr(LHS, RHS);
1732       Rep = Builder.CreateBitCast(Rep, CI->getType());
1733     } else if (IsX86 && Name == "avx512.kxor.w") {
1734       Value *LHS = getX86MaskVec(Builder, CI->getArgOperand(0), 16);
1735       Value *RHS = getX86MaskVec(Builder, CI->getArgOperand(1), 16);
1736       Rep = Builder.CreateXor(LHS, RHS);
1737       Rep = Builder.CreateBitCast(Rep, CI->getType());
1738     } else if (IsX86 && Name == "avx512.kxnor.w") {
1739       Value *LHS = getX86MaskVec(Builder, CI->getArgOperand(0), 16);
1740       Value *RHS = getX86MaskVec(Builder, CI->getArgOperand(1), 16);
1741       LHS = Builder.CreateNot(LHS);
1742       Rep = Builder.CreateXor(LHS, RHS);
1743       Rep = Builder.CreateBitCast(Rep, CI->getType());
1744     } else if (IsX86 && Name == "avx512.knot.w") {
1745       Rep = getX86MaskVec(Builder, CI->getArgOperand(0), 16);
1746       Rep = Builder.CreateNot(Rep);
1747       Rep = Builder.CreateBitCast(Rep, CI->getType());
1748     } else if (IsX86 &&
1749                (Name == "avx512.kortestz.w" || Name == "avx512.kortestc.w")) {
1750       Value *LHS = getX86MaskVec(Builder, CI->getArgOperand(0), 16);
1751       Value *RHS = getX86MaskVec(Builder, CI->getArgOperand(1), 16);
1752       Rep = Builder.CreateOr(LHS, RHS);
1753       Rep = Builder.CreateBitCast(Rep, Builder.getInt16Ty());
1754       Value *C;
1755       if (Name[14] == 'c')
1756         C = ConstantInt::getAllOnesValue(Builder.getInt16Ty());
1757       else
1758         C = ConstantInt::getNullValue(Builder.getInt16Ty());
1759       Rep = Builder.CreateICmpEQ(Rep, C);
1760       Rep = Builder.CreateZExt(Rep, Builder.getInt32Ty());
1761     } else if (IsX86 && (Name == "sse.add.ss" || Name == "sse2.add.sd" ||
1762                          Name == "sse.sub.ss" || Name == "sse2.sub.sd" ||
1763                          Name == "sse.mul.ss" || Name == "sse2.mul.sd" ||
1764                          Name == "sse.div.ss" || Name == "sse2.div.sd")) {
1765       Type *I32Ty = Type::getInt32Ty(C);
1766       Value *Elt0 = Builder.CreateExtractElement(CI->getArgOperand(0),
1767                                                  ConstantInt::get(I32Ty, 0));
1768       Value *Elt1 = Builder.CreateExtractElement(CI->getArgOperand(1),
1769                                                  ConstantInt::get(I32Ty, 0));
1770       Value *EltOp;
1771       if (Name.contains(".add."))
1772         EltOp = Builder.CreateFAdd(Elt0, Elt1);
1773       else if (Name.contains(".sub."))
1774         EltOp = Builder.CreateFSub(Elt0, Elt1);
1775       else if (Name.contains(".mul."))
1776         EltOp = Builder.CreateFMul(Elt0, Elt1);
1777       else
1778         EltOp = Builder.CreateFDiv(Elt0, Elt1);
1779       Rep = Builder.CreateInsertElement(CI->getArgOperand(0), EltOp,
1780                                         ConstantInt::get(I32Ty, 0));
1781     } else if (IsX86 && Name.startswith("avx512.mask.pcmp")) {
1782       // "avx512.mask.pcmpeq." or "avx512.mask.pcmpgt."
1783       bool CmpEq = Name[16] == 'e';
1784       Rep = upgradeMaskedCompare(Builder, *CI, CmpEq ? 0 : 6, true);
1785     } else if (IsX86 && Name.startswith("avx512.mask.fpclass.p")) {
1786       Type *OpTy = CI->getArgOperand(0)->getType();
1787       unsigned VecWidth = OpTy->getPrimitiveSizeInBits();
1788       unsigned EltWidth = OpTy->getScalarSizeInBits();
1789       Intrinsic::ID IID;
1790       if (VecWidth == 128 && EltWidth == 32)
1791         IID = Intrinsic::x86_avx512_fpclass_ps_128;
1792       else if (VecWidth == 256 && EltWidth == 32)
1793         IID = Intrinsic::x86_avx512_fpclass_ps_256;
1794       else if (VecWidth == 512 && EltWidth == 32)
1795         IID = Intrinsic::x86_avx512_fpclass_ps_512;
1796       else if (VecWidth == 128 && EltWidth == 64)
1797         IID = Intrinsic::x86_avx512_fpclass_pd_128;
1798       else if (VecWidth == 256 && EltWidth == 64)
1799         IID = Intrinsic::x86_avx512_fpclass_pd_256;
1800       else if (VecWidth == 512 && EltWidth == 64)
1801         IID = Intrinsic::x86_avx512_fpclass_pd_512;
1802       else
1803         llvm_unreachable("Unexpected intrinsic");
1804 
1805       Rep = Builder.CreateCall(Intrinsic::getDeclaration(F->getParent(), IID),
1806                                { CI->getOperand(0), CI->getArgOperand(1) });
1807       Rep = ApplyX86MaskOn1BitsVec(Builder, Rep, CI->getArgOperand(2));
1808     } else if (IsX86 && Name.startswith("avx512.mask.cmp.p")) {
1809       Type *OpTy = CI->getArgOperand(0)->getType();
1810       unsigned VecWidth = OpTy->getPrimitiveSizeInBits();
1811       unsigned EltWidth = OpTy->getScalarSizeInBits();
1812       Intrinsic::ID IID;
1813       if (VecWidth == 128 && EltWidth == 32)
1814         IID = Intrinsic::x86_avx512_cmp_ps_128;
1815       else if (VecWidth == 256 && EltWidth == 32)
1816         IID = Intrinsic::x86_avx512_cmp_ps_256;
1817       else if (VecWidth == 512 && EltWidth == 32)
1818         IID = Intrinsic::x86_avx512_cmp_ps_512;
1819       else if (VecWidth == 128 && EltWidth == 64)
1820         IID = Intrinsic::x86_avx512_cmp_pd_128;
1821       else if (VecWidth == 256 && EltWidth == 64)
1822         IID = Intrinsic::x86_avx512_cmp_pd_256;
1823       else if (VecWidth == 512 && EltWidth == 64)
1824         IID = Intrinsic::x86_avx512_cmp_pd_512;
1825       else
1826         llvm_unreachable("Unexpected intrinsic");
1827 
1828       SmallVector<Value *, 4> Args;
1829       Args.push_back(CI->getArgOperand(0));
1830       Args.push_back(CI->getArgOperand(1));
1831       Args.push_back(CI->getArgOperand(2));
1832       if (CI->getNumArgOperands() == 5)
1833         Args.push_back(CI->getArgOperand(4));
1834 
1835       Rep = Builder.CreateCall(Intrinsic::getDeclaration(F->getParent(), IID),
1836                                Args);
1837       Rep = ApplyX86MaskOn1BitsVec(Builder, Rep, CI->getArgOperand(3));
1838     } else if (IsX86 && Name.startswith("avx512.mask.cmp.") &&
1839                Name[16] != 'p') {
1840       // Integer compare intrinsics.
1841       unsigned Imm = cast<ConstantInt>(CI->getArgOperand(2))->getZExtValue();
1842       Rep = upgradeMaskedCompare(Builder, *CI, Imm, true);
1843     } else if (IsX86 && Name.startswith("avx512.mask.ucmp.")) {
1844       unsigned Imm = cast<ConstantInt>(CI->getArgOperand(2))->getZExtValue();
1845       Rep = upgradeMaskedCompare(Builder, *CI, Imm, false);
1846     } else if (IsX86 && (Name.startswith("avx512.cvtb2mask.") ||
1847                          Name.startswith("avx512.cvtw2mask.") ||
1848                          Name.startswith("avx512.cvtd2mask.") ||
1849                          Name.startswith("avx512.cvtq2mask."))) {
1850       Value *Op = CI->getArgOperand(0);
1851       Value *Zero = llvm::Constant::getNullValue(Op->getType());
1852       Rep = Builder.CreateICmp(ICmpInst::ICMP_SLT, Op, Zero);
1853       Rep = ApplyX86MaskOn1BitsVec(Builder, Rep, nullptr);
1854     } else if(IsX86 && (Name == "ssse3.pabs.b.128" ||
1855                         Name == "ssse3.pabs.w.128" ||
1856                         Name == "ssse3.pabs.d.128" ||
1857                         Name.startswith("avx2.pabs") ||
1858                         Name.startswith("avx512.mask.pabs"))) {
1859       Rep = upgradeAbs(Builder, *CI);
1860     } else if (IsX86 && (Name == "sse41.pmaxsb" ||
1861                          Name == "sse2.pmaxs.w" ||
1862                          Name == "sse41.pmaxsd" ||
1863                          Name.startswith("avx2.pmaxs") ||
1864                          Name.startswith("avx512.mask.pmaxs"))) {
1865       Rep = upgradeIntMinMax(Builder, *CI, ICmpInst::ICMP_SGT);
1866     } else if (IsX86 && (Name == "sse2.pmaxu.b" ||
1867                          Name == "sse41.pmaxuw" ||
1868                          Name == "sse41.pmaxud" ||
1869                          Name.startswith("avx2.pmaxu") ||
1870                          Name.startswith("avx512.mask.pmaxu"))) {
1871       Rep = upgradeIntMinMax(Builder, *CI, ICmpInst::ICMP_UGT);
1872     } else if (IsX86 && (Name == "sse41.pminsb" ||
1873                          Name == "sse2.pmins.w" ||
1874                          Name == "sse41.pminsd" ||
1875                          Name.startswith("avx2.pmins") ||
1876                          Name.startswith("avx512.mask.pmins"))) {
1877       Rep = upgradeIntMinMax(Builder, *CI, ICmpInst::ICMP_SLT);
1878     } else if (IsX86 && (Name == "sse2.pminu.b" ||
1879                          Name == "sse41.pminuw" ||
1880                          Name == "sse41.pminud" ||
1881                          Name.startswith("avx2.pminu") ||
1882                          Name.startswith("avx512.mask.pminu"))) {
1883       Rep = upgradeIntMinMax(Builder, *CI, ICmpInst::ICMP_ULT);
1884     } else if (IsX86 && (Name == "sse2.pmulu.dq" ||
1885                          Name == "avx2.pmulu.dq" ||
1886                          Name == "avx512.pmulu.dq.512" ||
1887                          Name.startswith("avx512.mask.pmulu.dq."))) {
1888       Rep = upgradePMULDQ(Builder, *CI, /*Signed*/false);
1889     } else if (IsX86 && (Name == "sse41.pmuldq" ||
1890                          Name == "avx2.pmul.dq" ||
1891                          Name == "avx512.pmul.dq.512" ||
1892                          Name.startswith("avx512.mask.pmul.dq."))) {
1893       Rep = upgradePMULDQ(Builder, *CI, /*Signed*/true);
1894     } else if (IsX86 && (Name == "sse.cvtsi2ss" ||
1895                          Name == "sse2.cvtsi2sd" ||
1896                          Name == "sse.cvtsi642ss" ||
1897                          Name == "sse2.cvtsi642sd")) {
1898       Rep = Builder.CreateSIToFP(CI->getArgOperand(1),
1899                                  CI->getType()->getVectorElementType());
1900       Rep = Builder.CreateInsertElement(CI->getArgOperand(0), Rep, (uint64_t)0);
1901     } else if (IsX86 && Name == "avx512.cvtusi2sd") {
1902       Rep = Builder.CreateUIToFP(CI->getArgOperand(1),
1903                                  CI->getType()->getVectorElementType());
1904       Rep = Builder.CreateInsertElement(CI->getArgOperand(0), Rep, (uint64_t)0);
1905     } else if (IsX86 && Name == "sse2.cvtss2sd") {
1906       Rep = Builder.CreateExtractElement(CI->getArgOperand(1), (uint64_t)0);
1907       Rep = Builder.CreateFPExt(Rep, CI->getType()->getVectorElementType());
1908       Rep = Builder.CreateInsertElement(CI->getArgOperand(0), Rep, (uint64_t)0);
1909     } else if (IsX86 && (Name == "sse2.cvtdq2pd" ||
1910                          Name == "sse2.cvtdq2ps" ||
1911                          Name == "avx.cvtdq2.pd.256" ||
1912                          Name == "avx.cvtdq2.ps.256" ||
1913                          Name.startswith("avx512.mask.cvtdq2pd.") ||
1914                          Name.startswith("avx512.mask.cvtudq2pd.") ||
1915                          Name == "avx512.mask.cvtdq2ps.128" ||
1916                          Name == "avx512.mask.cvtdq2ps.256" ||
1917                          Name == "avx512.mask.cvtudq2ps.128" ||
1918                          Name == "avx512.mask.cvtudq2ps.256" ||
1919                          Name == "avx512.mask.cvtqq2pd.128" ||
1920                          Name == "avx512.mask.cvtqq2pd.256" ||
1921                          Name == "avx512.mask.cvtuqq2pd.128" ||
1922                          Name == "avx512.mask.cvtuqq2pd.256" ||
1923                          Name == "sse2.cvtps2pd" ||
1924                          Name == "avx.cvt.ps2.pd.256" ||
1925                          Name == "avx512.mask.cvtps2pd.128" ||
1926                          Name == "avx512.mask.cvtps2pd.256")) {
1927       Type *DstTy = CI->getType();
1928       Rep = CI->getArgOperand(0);
1929 
1930       unsigned NumDstElts = DstTy->getVectorNumElements();
1931       if (NumDstElts < Rep->getType()->getVectorNumElements()) {
1932         assert(NumDstElts == 2 && "Unexpected vector size");
1933         uint32_t ShuffleMask[2] = { 0, 1 };
1934         Rep = Builder.CreateShuffleVector(Rep, Rep, ShuffleMask);
1935       }
1936 
1937       bool IsPS2PD = (StringRef::npos != Name.find("ps2"));
1938       bool IsUnsigned = (StringRef::npos != Name.find("cvtu"));
1939       if (IsPS2PD)
1940         Rep = Builder.CreateFPExt(Rep, DstTy, "cvtps2pd");
1941       else if (IsUnsigned)
1942         Rep = Builder.CreateUIToFP(Rep, DstTy, "cvt");
1943       else
1944         Rep = Builder.CreateSIToFP(Rep, DstTy, "cvt");
1945 
1946       if (CI->getNumArgOperands() == 3)
1947         Rep = EmitX86Select(Builder, CI->getArgOperand(2), Rep,
1948                             CI->getArgOperand(1));
1949     } else if (IsX86 && (Name.startswith("avx512.mask.loadu."))) {
1950       Rep = UpgradeMaskedLoad(Builder, CI->getArgOperand(0),
1951                               CI->getArgOperand(1), CI->getArgOperand(2),
1952                               /*Aligned*/false);
1953     } else if (IsX86 && (Name.startswith("avx512.mask.load."))) {
1954       Rep = UpgradeMaskedLoad(Builder, CI->getArgOperand(0),
1955                               CI->getArgOperand(1),CI->getArgOperand(2),
1956                               /*Aligned*/true);
1957     } else if (IsX86 && Name.startswith("avx512.mask.expand.load.")) {
1958       Type *ResultTy = CI->getType();
1959       Type *PtrTy = ResultTy->getVectorElementType();
1960 
1961       // Cast the pointer to element type.
1962       Value *Ptr = Builder.CreateBitCast(CI->getOperand(0),
1963                                          llvm::PointerType::getUnqual(PtrTy));
1964 
1965       Value *MaskVec = getX86MaskVec(Builder, CI->getArgOperand(2),
1966                                      ResultTy->getVectorNumElements());
1967 
1968       Function *ELd = Intrinsic::getDeclaration(F->getParent(),
1969                                                 Intrinsic::masked_expandload,
1970                                                 ResultTy);
1971       Rep = Builder.CreateCall(ELd, { Ptr, MaskVec, CI->getOperand(1) });
1972     } else if (IsX86 && Name.startswith("avx512.mask.compress.store.")) {
1973       Type *ResultTy = CI->getArgOperand(1)->getType();
1974       Type *PtrTy = ResultTy->getVectorElementType();
1975 
1976       // Cast the pointer to element type.
1977       Value *Ptr = Builder.CreateBitCast(CI->getOperand(0),
1978                                          llvm::PointerType::getUnqual(PtrTy));
1979 
1980       Value *MaskVec = getX86MaskVec(Builder, CI->getArgOperand(2),
1981                                      ResultTy->getVectorNumElements());
1982 
1983       Function *CSt = Intrinsic::getDeclaration(F->getParent(),
1984                                                 Intrinsic::masked_compressstore,
1985                                                 ResultTy);
1986       Rep = Builder.CreateCall(CSt, { CI->getArgOperand(1), Ptr, MaskVec });
1987     } else if (IsX86 && Name.startswith("xop.vpcom")) {
1988       Intrinsic::ID intID;
1989       if (Name.endswith("ub"))
1990         intID = Intrinsic::x86_xop_vpcomub;
1991       else if (Name.endswith("uw"))
1992         intID = Intrinsic::x86_xop_vpcomuw;
1993       else if (Name.endswith("ud"))
1994         intID = Intrinsic::x86_xop_vpcomud;
1995       else if (Name.endswith("uq"))
1996         intID = Intrinsic::x86_xop_vpcomuq;
1997       else if (Name.endswith("b"))
1998         intID = Intrinsic::x86_xop_vpcomb;
1999       else if (Name.endswith("w"))
2000         intID = Intrinsic::x86_xop_vpcomw;
2001       else if (Name.endswith("d"))
2002         intID = Intrinsic::x86_xop_vpcomd;
2003       else if (Name.endswith("q"))
2004         intID = Intrinsic::x86_xop_vpcomq;
2005       else
2006         llvm_unreachable("Unknown suffix");
2007 
2008       Name = Name.substr(9); // strip off "xop.vpcom"
2009       unsigned Imm;
2010       if (Name.startswith("lt"))
2011         Imm = 0;
2012       else if (Name.startswith("le"))
2013         Imm = 1;
2014       else if (Name.startswith("gt"))
2015         Imm = 2;
2016       else if (Name.startswith("ge"))
2017         Imm = 3;
2018       else if (Name.startswith("eq"))
2019         Imm = 4;
2020       else if (Name.startswith("ne"))
2021         Imm = 5;
2022       else if (Name.startswith("false"))
2023         Imm = 6;
2024       else if (Name.startswith("true"))
2025         Imm = 7;
2026       else
2027         llvm_unreachable("Unknown condition");
2028 
2029       Function *VPCOM = Intrinsic::getDeclaration(F->getParent(), intID);
2030       Rep =
2031           Builder.CreateCall(VPCOM, {CI->getArgOperand(0), CI->getArgOperand(1),
2032                                      Builder.getInt8(Imm)});
2033     } else if (IsX86 && Name.startswith("xop.vpcmov")) {
2034       Value *Sel = CI->getArgOperand(2);
2035       Value *NotSel = Builder.CreateNot(Sel);
2036       Value *Sel0 = Builder.CreateAnd(CI->getArgOperand(0), Sel);
2037       Value *Sel1 = Builder.CreateAnd(CI->getArgOperand(1), NotSel);
2038       Rep = Builder.CreateOr(Sel0, Sel1);
2039     } else if (IsX86 && Name == "sse42.crc32.64.8") {
2040       Function *CRC32 = Intrinsic::getDeclaration(F->getParent(),
2041                                                Intrinsic::x86_sse42_crc32_32_8);
2042       Value *Trunc0 = Builder.CreateTrunc(CI->getArgOperand(0), Type::getInt32Ty(C));
2043       Rep = Builder.CreateCall(CRC32, {Trunc0, CI->getArgOperand(1)});
2044       Rep = Builder.CreateZExt(Rep, CI->getType(), "");
2045     } else if (IsX86 && (Name.startswith("avx.vbroadcast.s") ||
2046                          Name.startswith("avx512.vbroadcast.s"))) {
2047       // Replace broadcasts with a series of insertelements.
2048       Type *VecTy = CI->getType();
2049       Type *EltTy = VecTy->getVectorElementType();
2050       unsigned EltNum = VecTy->getVectorNumElements();
2051       Value *Cast = Builder.CreateBitCast(CI->getArgOperand(0),
2052                                           EltTy->getPointerTo());
2053       Value *Load = Builder.CreateLoad(EltTy, Cast);
2054       Type *I32Ty = Type::getInt32Ty(C);
2055       Rep = UndefValue::get(VecTy);
2056       for (unsigned I = 0; I < EltNum; ++I)
2057         Rep = Builder.CreateInsertElement(Rep, Load,
2058                                           ConstantInt::get(I32Ty, I));
2059     } else if (IsX86 && (Name.startswith("sse41.pmovsx") ||
2060                          Name.startswith("sse41.pmovzx") ||
2061                          Name.startswith("avx2.pmovsx") ||
2062                          Name.startswith("avx2.pmovzx") ||
2063                          Name.startswith("avx512.mask.pmovsx") ||
2064                          Name.startswith("avx512.mask.pmovzx"))) {
2065       VectorType *SrcTy = cast<VectorType>(CI->getArgOperand(0)->getType());
2066       VectorType *DstTy = cast<VectorType>(CI->getType());
2067       unsigned NumDstElts = DstTy->getNumElements();
2068 
2069       // Extract a subvector of the first NumDstElts lanes and sign/zero extend.
2070       SmallVector<uint32_t, 8> ShuffleMask(NumDstElts);
2071       for (unsigned i = 0; i != NumDstElts; ++i)
2072         ShuffleMask[i] = i;
2073 
2074       Value *SV = Builder.CreateShuffleVector(
2075           CI->getArgOperand(0), UndefValue::get(SrcTy), ShuffleMask);
2076 
2077       bool DoSext = (StringRef::npos != Name.find("pmovsx"));
2078       Rep = DoSext ? Builder.CreateSExt(SV, DstTy)
2079                    : Builder.CreateZExt(SV, DstTy);
2080       // If there are 3 arguments, it's a masked intrinsic so we need a select.
2081       if (CI->getNumArgOperands() == 3)
2082         Rep = EmitX86Select(Builder, CI->getArgOperand(2), Rep,
2083                             CI->getArgOperand(1));
2084     } else if (IsX86 && (Name.startswith("avx.vbroadcastf128") ||
2085                          Name == "avx2.vbroadcasti128")) {
2086       // Replace vbroadcastf128/vbroadcasti128 with a vector load+shuffle.
2087       Type *EltTy = CI->getType()->getVectorElementType();
2088       unsigned NumSrcElts = 128 / EltTy->getPrimitiveSizeInBits();
2089       Type *VT = VectorType::get(EltTy, NumSrcElts);
2090       Value *Op = Builder.CreatePointerCast(CI->getArgOperand(0),
2091                                             PointerType::getUnqual(VT));
2092       Value *Load = Builder.CreateAlignedLoad(Op, 1);
2093       if (NumSrcElts == 2)
2094         Rep = Builder.CreateShuffleVector(Load, UndefValue::get(Load->getType()),
2095                                           { 0, 1, 0, 1 });
2096       else
2097         Rep = Builder.CreateShuffleVector(Load, UndefValue::get(Load->getType()),
2098                                           { 0, 1, 2, 3, 0, 1, 2, 3 });
2099     } else if (IsX86 && (Name.startswith("avx512.mask.shuf.i") ||
2100                          Name.startswith("avx512.mask.shuf.f"))) {
2101       unsigned Imm = cast<ConstantInt>(CI->getArgOperand(2))->getZExtValue();
2102       Type *VT = CI->getType();
2103       unsigned NumLanes = VT->getPrimitiveSizeInBits() / 128;
2104       unsigned NumElementsInLane = 128 / VT->getScalarSizeInBits();
2105       unsigned ControlBitsMask = NumLanes - 1;
2106       unsigned NumControlBits = NumLanes / 2;
2107       SmallVector<uint32_t, 8> ShuffleMask(0);
2108 
2109       for (unsigned l = 0; l != NumLanes; ++l) {
2110         unsigned LaneMask = (Imm >> (l * NumControlBits)) & ControlBitsMask;
2111         // We actually need the other source.
2112         if (l >= NumLanes / 2)
2113           LaneMask += NumLanes;
2114         for (unsigned i = 0; i != NumElementsInLane; ++i)
2115           ShuffleMask.push_back(LaneMask * NumElementsInLane + i);
2116       }
2117       Rep = Builder.CreateShuffleVector(CI->getArgOperand(0),
2118                                         CI->getArgOperand(1), ShuffleMask);
2119       Rep = EmitX86Select(Builder, CI->getArgOperand(4), Rep,
2120                           CI->getArgOperand(3));
2121     }else if (IsX86 && (Name.startswith("avx512.mask.broadcastf") ||
2122                          Name.startswith("avx512.mask.broadcasti"))) {
2123       unsigned NumSrcElts =
2124                         CI->getArgOperand(0)->getType()->getVectorNumElements();
2125       unsigned NumDstElts = CI->getType()->getVectorNumElements();
2126 
2127       SmallVector<uint32_t, 8> ShuffleMask(NumDstElts);
2128       for (unsigned i = 0; i != NumDstElts; ++i)
2129         ShuffleMask[i] = i % NumSrcElts;
2130 
2131       Rep = Builder.CreateShuffleVector(CI->getArgOperand(0),
2132                                         CI->getArgOperand(0),
2133                                         ShuffleMask);
2134       Rep = EmitX86Select(Builder, CI->getArgOperand(2), Rep,
2135                           CI->getArgOperand(1));
2136     } else if (IsX86 && (Name.startswith("avx2.pbroadcast") ||
2137                          Name.startswith("avx2.vbroadcast") ||
2138                          Name.startswith("avx512.pbroadcast") ||
2139                          Name.startswith("avx512.mask.broadcast.s"))) {
2140       // Replace vp?broadcasts with a vector shuffle.
2141       Value *Op = CI->getArgOperand(0);
2142       unsigned NumElts = CI->getType()->getVectorNumElements();
2143       Type *MaskTy = VectorType::get(Type::getInt32Ty(C), NumElts);
2144       Rep = Builder.CreateShuffleVector(Op, UndefValue::get(Op->getType()),
2145                                         Constant::getNullValue(MaskTy));
2146 
2147       if (CI->getNumArgOperands() == 3)
2148         Rep = EmitX86Select(Builder, CI->getArgOperand(2), Rep,
2149                             CI->getArgOperand(1));
2150     } else if (IsX86 && (Name.startswith("sse2.paddus.") ||
2151                          Name.startswith("sse2.psubus.") ||
2152                          Name.startswith("avx2.paddus.") ||
2153                          Name.startswith("avx2.psubus.") ||
2154                          Name.startswith("avx512.mask.paddus.") ||
2155                          Name.startswith("avx512.mask.psubus."))) {
2156       bool IsAdd = Name.contains(".paddus");
2157       Rep = UpgradeX86AddSubSatIntrinsics(Builder, *CI, IsAdd);
2158     } else if (IsX86 && Name.startswith("avx512.mask.palignr.")) {
2159       Rep = UpgradeX86ALIGNIntrinsics(Builder, CI->getArgOperand(0),
2160                                       CI->getArgOperand(1),
2161                                       CI->getArgOperand(2),
2162                                       CI->getArgOperand(3),
2163                                       CI->getArgOperand(4),
2164                                       false);
2165     } else if (IsX86 && Name.startswith("avx512.mask.valign.")) {
2166       Rep = UpgradeX86ALIGNIntrinsics(Builder, CI->getArgOperand(0),
2167                                       CI->getArgOperand(1),
2168                                       CI->getArgOperand(2),
2169                                       CI->getArgOperand(3),
2170                                       CI->getArgOperand(4),
2171                                       true);
2172     } else if (IsX86 && (Name == "sse2.psll.dq" ||
2173                          Name == "avx2.psll.dq")) {
2174       // 128/256-bit shift left specified in bits.
2175       unsigned Shift = cast<ConstantInt>(CI->getArgOperand(1))->getZExtValue();
2176       Rep = UpgradeX86PSLLDQIntrinsics(Builder, CI->getArgOperand(0),
2177                                        Shift / 8); // Shift is in bits.
2178     } else if (IsX86 && (Name == "sse2.psrl.dq" ||
2179                          Name == "avx2.psrl.dq")) {
2180       // 128/256-bit shift right specified in bits.
2181       unsigned Shift = cast<ConstantInt>(CI->getArgOperand(1))->getZExtValue();
2182       Rep = UpgradeX86PSRLDQIntrinsics(Builder, CI->getArgOperand(0),
2183                                        Shift / 8); // Shift is in bits.
2184     } else if (IsX86 && (Name == "sse2.psll.dq.bs" ||
2185                          Name == "avx2.psll.dq.bs" ||
2186                          Name == "avx512.psll.dq.512")) {
2187       // 128/256/512-bit shift left specified in bytes.
2188       unsigned Shift = cast<ConstantInt>(CI->getArgOperand(1))->getZExtValue();
2189       Rep = UpgradeX86PSLLDQIntrinsics(Builder, CI->getArgOperand(0), Shift);
2190     } else if (IsX86 && (Name == "sse2.psrl.dq.bs" ||
2191                          Name == "avx2.psrl.dq.bs" ||
2192                          Name == "avx512.psrl.dq.512")) {
2193       // 128/256/512-bit shift right specified in bytes.
2194       unsigned Shift = cast<ConstantInt>(CI->getArgOperand(1))->getZExtValue();
2195       Rep = UpgradeX86PSRLDQIntrinsics(Builder, CI->getArgOperand(0), Shift);
2196     } else if (IsX86 && (Name == "sse41.pblendw" ||
2197                          Name.startswith("sse41.blendp") ||
2198                          Name.startswith("avx.blend.p") ||
2199                          Name == "avx2.pblendw" ||
2200                          Name.startswith("avx2.pblendd."))) {
2201       Value *Op0 = CI->getArgOperand(0);
2202       Value *Op1 = CI->getArgOperand(1);
2203       unsigned Imm = cast <ConstantInt>(CI->getArgOperand(2))->getZExtValue();
2204       VectorType *VecTy = cast<VectorType>(CI->getType());
2205       unsigned NumElts = VecTy->getNumElements();
2206 
2207       SmallVector<uint32_t, 16> Idxs(NumElts);
2208       for (unsigned i = 0; i != NumElts; ++i)
2209         Idxs[i] = ((Imm >> (i%8)) & 1) ? i + NumElts : i;
2210 
2211       Rep = Builder.CreateShuffleVector(Op0, Op1, Idxs);
2212     } else if (IsX86 && (Name.startswith("avx.vinsertf128.") ||
2213                          Name == "avx2.vinserti128" ||
2214                          Name.startswith("avx512.mask.insert"))) {
2215       Value *Op0 = CI->getArgOperand(0);
2216       Value *Op1 = CI->getArgOperand(1);
2217       unsigned Imm = cast<ConstantInt>(CI->getArgOperand(2))->getZExtValue();
2218       unsigned DstNumElts = CI->getType()->getVectorNumElements();
2219       unsigned SrcNumElts = Op1->getType()->getVectorNumElements();
2220       unsigned Scale = DstNumElts / SrcNumElts;
2221 
2222       // Mask off the high bits of the immediate value; hardware ignores those.
2223       Imm = Imm % Scale;
2224 
2225       // Extend the second operand into a vector the size of the destination.
2226       Value *UndefV = UndefValue::get(Op1->getType());
2227       SmallVector<uint32_t, 8> Idxs(DstNumElts);
2228       for (unsigned i = 0; i != SrcNumElts; ++i)
2229         Idxs[i] = i;
2230       for (unsigned i = SrcNumElts; i != DstNumElts; ++i)
2231         Idxs[i] = SrcNumElts;
2232       Rep = Builder.CreateShuffleVector(Op1, UndefV, Idxs);
2233 
2234       // Insert the second operand into the first operand.
2235 
2236       // Note that there is no guarantee that instruction lowering will actually
2237       // produce a vinsertf128 instruction for the created shuffles. In
2238       // particular, the 0 immediate case involves no lane changes, so it can
2239       // be handled as a blend.
2240 
2241       // Example of shuffle mask for 32-bit elements:
2242       // Imm = 1  <i32 0, i32 1, i32 2,  i32 3,  i32 8, i32 9, i32 10, i32 11>
2243       // Imm = 0  <i32 8, i32 9, i32 10, i32 11, i32 4, i32 5, i32 6,  i32 7 >
2244 
2245       // First fill with identify mask.
2246       for (unsigned i = 0; i != DstNumElts; ++i)
2247         Idxs[i] = i;
2248       // Then replace the elements where we need to insert.
2249       for (unsigned i = 0; i != SrcNumElts; ++i)
2250         Idxs[i + Imm * SrcNumElts] = i + DstNumElts;
2251       Rep = Builder.CreateShuffleVector(Op0, Rep, Idxs);
2252 
2253       // If the intrinsic has a mask operand, handle that.
2254       if (CI->getNumArgOperands() == 5)
2255         Rep = EmitX86Select(Builder, CI->getArgOperand(4), Rep,
2256                             CI->getArgOperand(3));
2257     } else if (IsX86 && (Name.startswith("avx.vextractf128.") ||
2258                          Name == "avx2.vextracti128" ||
2259                          Name.startswith("avx512.mask.vextract"))) {
2260       Value *Op0 = CI->getArgOperand(0);
2261       unsigned Imm = cast<ConstantInt>(CI->getArgOperand(1))->getZExtValue();
2262       unsigned DstNumElts = CI->getType()->getVectorNumElements();
2263       unsigned SrcNumElts = Op0->getType()->getVectorNumElements();
2264       unsigned Scale = SrcNumElts / DstNumElts;
2265 
2266       // Mask off the high bits of the immediate value; hardware ignores those.
2267       Imm = Imm % Scale;
2268 
2269       // Get indexes for the subvector of the input vector.
2270       SmallVector<uint32_t, 8> Idxs(DstNumElts);
2271       for (unsigned i = 0; i != DstNumElts; ++i) {
2272         Idxs[i] = i + (Imm * DstNumElts);
2273       }
2274       Rep = Builder.CreateShuffleVector(Op0, Op0, Idxs);
2275 
2276       // If the intrinsic has a mask operand, handle that.
2277       if (CI->getNumArgOperands() == 4)
2278         Rep = EmitX86Select(Builder, CI->getArgOperand(3), Rep,
2279                             CI->getArgOperand(2));
2280     } else if (!IsX86 && Name == "stackprotectorcheck") {
2281       Rep = nullptr;
2282     } else if (IsX86 && (Name.startswith("avx512.mask.perm.df.") ||
2283                          Name.startswith("avx512.mask.perm.di."))) {
2284       Value *Op0 = CI->getArgOperand(0);
2285       unsigned Imm = cast<ConstantInt>(CI->getArgOperand(1))->getZExtValue();
2286       VectorType *VecTy = cast<VectorType>(CI->getType());
2287       unsigned NumElts = VecTy->getNumElements();
2288 
2289       SmallVector<uint32_t, 8> Idxs(NumElts);
2290       for (unsigned i = 0; i != NumElts; ++i)
2291         Idxs[i] = (i & ~0x3) + ((Imm >> (2 * (i & 0x3))) & 3);
2292 
2293       Rep = Builder.CreateShuffleVector(Op0, Op0, Idxs);
2294 
2295       if (CI->getNumArgOperands() == 4)
2296         Rep = EmitX86Select(Builder, CI->getArgOperand(3), Rep,
2297                             CI->getArgOperand(2));
2298     } else if (IsX86 && (Name.startswith("avx.vperm2f128.") ||
2299                          Name == "avx2.vperm2i128")) {
2300       // The immediate permute control byte looks like this:
2301       //    [1:0] - select 128 bits from sources for low half of destination
2302       //    [2]   - ignore
2303       //    [3]   - zero low half of destination
2304       //    [5:4] - select 128 bits from sources for high half of destination
2305       //    [6]   - ignore
2306       //    [7]   - zero high half of destination
2307 
2308       uint8_t Imm = cast<ConstantInt>(CI->getArgOperand(2))->getZExtValue();
2309 
2310       unsigned NumElts = CI->getType()->getVectorNumElements();
2311       unsigned HalfSize = NumElts / 2;
2312       SmallVector<uint32_t, 8> ShuffleMask(NumElts);
2313 
2314       // Determine which operand(s) are actually in use for this instruction.
2315       Value *V0 = (Imm & 0x02) ? CI->getArgOperand(1) : CI->getArgOperand(0);
2316       Value *V1 = (Imm & 0x20) ? CI->getArgOperand(1) : CI->getArgOperand(0);
2317 
2318       // If needed, replace operands based on zero mask.
2319       V0 = (Imm & 0x08) ? ConstantAggregateZero::get(CI->getType()) : V0;
2320       V1 = (Imm & 0x80) ? ConstantAggregateZero::get(CI->getType()) : V1;
2321 
2322       // Permute low half of result.
2323       unsigned StartIndex = (Imm & 0x01) ? HalfSize : 0;
2324       for (unsigned i = 0; i < HalfSize; ++i)
2325         ShuffleMask[i] = StartIndex + i;
2326 
2327       // Permute high half of result.
2328       StartIndex = (Imm & 0x10) ? HalfSize : 0;
2329       for (unsigned i = 0; i < HalfSize; ++i)
2330         ShuffleMask[i + HalfSize] = NumElts + StartIndex + i;
2331 
2332       Rep = Builder.CreateShuffleVector(V0, V1, ShuffleMask);
2333 
2334     } else if (IsX86 && (Name.startswith("avx.vpermil.") ||
2335                          Name == "sse2.pshuf.d" ||
2336                          Name.startswith("avx512.mask.vpermil.p") ||
2337                          Name.startswith("avx512.mask.pshuf.d."))) {
2338       Value *Op0 = CI->getArgOperand(0);
2339       unsigned Imm = cast<ConstantInt>(CI->getArgOperand(1))->getZExtValue();
2340       VectorType *VecTy = cast<VectorType>(CI->getType());
2341       unsigned NumElts = VecTy->getNumElements();
2342       // Calculate the size of each index in the immediate.
2343       unsigned IdxSize = 64 / VecTy->getScalarSizeInBits();
2344       unsigned IdxMask = ((1 << IdxSize) - 1);
2345 
2346       SmallVector<uint32_t, 8> Idxs(NumElts);
2347       // Lookup the bits for this element, wrapping around the immediate every
2348       // 8-bits. Elements are grouped into sets of 2 or 4 elements so we need
2349       // to offset by the first index of each group.
2350       for (unsigned i = 0; i != NumElts; ++i)
2351         Idxs[i] = ((Imm >> ((i * IdxSize) % 8)) & IdxMask) | (i & ~IdxMask);
2352 
2353       Rep = Builder.CreateShuffleVector(Op0, Op0, Idxs);
2354 
2355       if (CI->getNumArgOperands() == 4)
2356         Rep = EmitX86Select(Builder, CI->getArgOperand(3), Rep,
2357                             CI->getArgOperand(2));
2358     } else if (IsX86 && (Name == "sse2.pshufl.w" ||
2359                          Name.startswith("avx512.mask.pshufl.w."))) {
2360       Value *Op0 = CI->getArgOperand(0);
2361       unsigned Imm = cast<ConstantInt>(CI->getArgOperand(1))->getZExtValue();
2362       unsigned NumElts = CI->getType()->getVectorNumElements();
2363 
2364       SmallVector<uint32_t, 16> Idxs(NumElts);
2365       for (unsigned l = 0; l != NumElts; l += 8) {
2366         for (unsigned i = 0; i != 4; ++i)
2367           Idxs[i + l] = ((Imm >> (2 * i)) & 0x3) + l;
2368         for (unsigned i = 4; i != 8; ++i)
2369           Idxs[i + l] = i + l;
2370       }
2371 
2372       Rep = Builder.CreateShuffleVector(Op0, Op0, Idxs);
2373 
2374       if (CI->getNumArgOperands() == 4)
2375         Rep = EmitX86Select(Builder, CI->getArgOperand(3), Rep,
2376                             CI->getArgOperand(2));
2377     } else if (IsX86 && (Name == "sse2.pshufh.w" ||
2378                          Name.startswith("avx512.mask.pshufh.w."))) {
2379       Value *Op0 = CI->getArgOperand(0);
2380       unsigned Imm = cast<ConstantInt>(CI->getArgOperand(1))->getZExtValue();
2381       unsigned NumElts = CI->getType()->getVectorNumElements();
2382 
2383       SmallVector<uint32_t, 16> Idxs(NumElts);
2384       for (unsigned l = 0; l != NumElts; l += 8) {
2385         for (unsigned i = 0; i != 4; ++i)
2386           Idxs[i + l] = i + l;
2387         for (unsigned i = 0; i != 4; ++i)
2388           Idxs[i + l + 4] = ((Imm >> (2 * i)) & 0x3) + 4 + l;
2389       }
2390 
2391       Rep = Builder.CreateShuffleVector(Op0, Op0, Idxs);
2392 
2393       if (CI->getNumArgOperands() == 4)
2394         Rep = EmitX86Select(Builder, CI->getArgOperand(3), Rep,
2395                             CI->getArgOperand(2));
2396     } else if (IsX86 && Name.startswith("avx512.mask.shuf.p")) {
2397       Value *Op0 = CI->getArgOperand(0);
2398       Value *Op1 = CI->getArgOperand(1);
2399       unsigned Imm = cast<ConstantInt>(CI->getArgOperand(2))->getZExtValue();
2400       unsigned NumElts = CI->getType()->getVectorNumElements();
2401 
2402       unsigned NumLaneElts = 128/CI->getType()->getScalarSizeInBits();
2403       unsigned HalfLaneElts = NumLaneElts / 2;
2404 
2405       SmallVector<uint32_t, 16> Idxs(NumElts);
2406       for (unsigned i = 0; i != NumElts; ++i) {
2407         // Base index is the starting element of the lane.
2408         Idxs[i] = i - (i % NumLaneElts);
2409         // If we are half way through the lane switch to the other source.
2410         if ((i % NumLaneElts) >= HalfLaneElts)
2411           Idxs[i] += NumElts;
2412         // Now select the specific element. By adding HalfLaneElts bits from
2413         // the immediate. Wrapping around the immediate every 8-bits.
2414         Idxs[i] += (Imm >> ((i * HalfLaneElts) % 8)) & ((1 << HalfLaneElts) - 1);
2415       }
2416 
2417       Rep = Builder.CreateShuffleVector(Op0, Op1, Idxs);
2418 
2419       Rep = EmitX86Select(Builder, CI->getArgOperand(4), Rep,
2420                           CI->getArgOperand(3));
2421     } else if (IsX86 && (Name.startswith("avx512.mask.movddup") ||
2422                          Name.startswith("avx512.mask.movshdup") ||
2423                          Name.startswith("avx512.mask.movsldup"))) {
2424       Value *Op0 = CI->getArgOperand(0);
2425       unsigned NumElts = CI->getType()->getVectorNumElements();
2426       unsigned NumLaneElts = 128/CI->getType()->getScalarSizeInBits();
2427 
2428       unsigned Offset = 0;
2429       if (Name.startswith("avx512.mask.movshdup."))
2430         Offset = 1;
2431 
2432       SmallVector<uint32_t, 16> Idxs(NumElts);
2433       for (unsigned l = 0; l != NumElts; l += NumLaneElts)
2434         for (unsigned i = 0; i != NumLaneElts; i += 2) {
2435           Idxs[i + l + 0] = i + l + Offset;
2436           Idxs[i + l + 1] = i + l + Offset;
2437         }
2438 
2439       Rep = Builder.CreateShuffleVector(Op0, Op0, Idxs);
2440 
2441       Rep = EmitX86Select(Builder, CI->getArgOperand(2), Rep,
2442                           CI->getArgOperand(1));
2443     } else if (IsX86 && (Name.startswith("avx512.mask.punpckl") ||
2444                          Name.startswith("avx512.mask.unpckl."))) {
2445       Value *Op0 = CI->getArgOperand(0);
2446       Value *Op1 = CI->getArgOperand(1);
2447       int NumElts = CI->getType()->getVectorNumElements();
2448       int NumLaneElts = 128/CI->getType()->getScalarSizeInBits();
2449 
2450       SmallVector<uint32_t, 64> Idxs(NumElts);
2451       for (int l = 0; l != NumElts; l += NumLaneElts)
2452         for (int i = 0; i != NumLaneElts; ++i)
2453           Idxs[i + l] = l + (i / 2) + NumElts * (i % 2);
2454 
2455       Rep = Builder.CreateShuffleVector(Op0, Op1, Idxs);
2456 
2457       Rep = EmitX86Select(Builder, CI->getArgOperand(3), Rep,
2458                           CI->getArgOperand(2));
2459     } else if (IsX86 && (Name.startswith("avx512.mask.punpckh") ||
2460                          Name.startswith("avx512.mask.unpckh."))) {
2461       Value *Op0 = CI->getArgOperand(0);
2462       Value *Op1 = CI->getArgOperand(1);
2463       int NumElts = CI->getType()->getVectorNumElements();
2464       int NumLaneElts = 128/CI->getType()->getScalarSizeInBits();
2465 
2466       SmallVector<uint32_t, 64> Idxs(NumElts);
2467       for (int l = 0; l != NumElts; l += NumLaneElts)
2468         for (int i = 0; i != NumLaneElts; ++i)
2469           Idxs[i + l] = (NumLaneElts / 2) + l + (i / 2) + NumElts * (i % 2);
2470 
2471       Rep = Builder.CreateShuffleVector(Op0, Op1, Idxs);
2472 
2473       Rep = EmitX86Select(Builder, CI->getArgOperand(3), Rep,
2474                           CI->getArgOperand(2));
2475     } else if (IsX86 && Name.startswith("avx512.mask.pand.")) {
2476       Rep = Builder.CreateAnd(CI->getArgOperand(0), CI->getArgOperand(1));
2477       Rep = EmitX86Select(Builder, CI->getArgOperand(3), Rep,
2478                           CI->getArgOperand(2));
2479     } else if (IsX86 && Name.startswith("avx512.mask.pandn.")) {
2480       Rep = Builder.CreateAnd(Builder.CreateNot(CI->getArgOperand(0)),
2481                               CI->getArgOperand(1));
2482       Rep = EmitX86Select(Builder, CI->getArgOperand(3), Rep,
2483                           CI->getArgOperand(2));
2484     } else if (IsX86 && Name.startswith("avx512.mask.por.")) {
2485       Rep = Builder.CreateOr(CI->getArgOperand(0), CI->getArgOperand(1));
2486       Rep = EmitX86Select(Builder, CI->getArgOperand(3), Rep,
2487                           CI->getArgOperand(2));
2488     } else if (IsX86 && Name.startswith("avx512.mask.pxor.")) {
2489       Rep = Builder.CreateXor(CI->getArgOperand(0), CI->getArgOperand(1));
2490       Rep = EmitX86Select(Builder, CI->getArgOperand(3), Rep,
2491                           CI->getArgOperand(2));
2492     } else if (IsX86 && Name.startswith("avx512.mask.and.")) {
2493       VectorType *FTy = cast<VectorType>(CI->getType());
2494       VectorType *ITy = VectorType::getInteger(FTy);
2495       Rep = Builder.CreateAnd(Builder.CreateBitCast(CI->getArgOperand(0), ITy),
2496                               Builder.CreateBitCast(CI->getArgOperand(1), ITy));
2497       Rep = Builder.CreateBitCast(Rep, FTy);
2498       Rep = EmitX86Select(Builder, CI->getArgOperand(3), Rep,
2499                           CI->getArgOperand(2));
2500     } else if (IsX86 && Name.startswith("avx512.mask.andn.")) {
2501       VectorType *FTy = cast<VectorType>(CI->getType());
2502       VectorType *ITy = VectorType::getInteger(FTy);
2503       Rep = Builder.CreateNot(Builder.CreateBitCast(CI->getArgOperand(0), ITy));
2504       Rep = Builder.CreateAnd(Rep,
2505                               Builder.CreateBitCast(CI->getArgOperand(1), ITy));
2506       Rep = Builder.CreateBitCast(Rep, FTy);
2507       Rep = EmitX86Select(Builder, CI->getArgOperand(3), Rep,
2508                           CI->getArgOperand(2));
2509     } else if (IsX86 && Name.startswith("avx512.mask.or.")) {
2510       VectorType *FTy = cast<VectorType>(CI->getType());
2511       VectorType *ITy = VectorType::getInteger(FTy);
2512       Rep = Builder.CreateOr(Builder.CreateBitCast(CI->getArgOperand(0), ITy),
2513                              Builder.CreateBitCast(CI->getArgOperand(1), ITy));
2514       Rep = Builder.CreateBitCast(Rep, FTy);
2515       Rep = EmitX86Select(Builder, CI->getArgOperand(3), Rep,
2516                           CI->getArgOperand(2));
2517     } else if (IsX86 && Name.startswith("avx512.mask.xor.")) {
2518       VectorType *FTy = cast<VectorType>(CI->getType());
2519       VectorType *ITy = VectorType::getInteger(FTy);
2520       Rep = Builder.CreateXor(Builder.CreateBitCast(CI->getArgOperand(0), ITy),
2521                               Builder.CreateBitCast(CI->getArgOperand(1), ITy));
2522       Rep = Builder.CreateBitCast(Rep, FTy);
2523       Rep = EmitX86Select(Builder, CI->getArgOperand(3), Rep,
2524                           CI->getArgOperand(2));
2525     } else if (IsX86 && Name.startswith("avx512.mask.padd.")) {
2526       Rep = Builder.CreateAdd(CI->getArgOperand(0), CI->getArgOperand(1));
2527       Rep = EmitX86Select(Builder, CI->getArgOperand(3), Rep,
2528                           CI->getArgOperand(2));
2529     } else if (IsX86 && Name.startswith("avx512.mask.psub.")) {
2530       Rep = Builder.CreateSub(CI->getArgOperand(0), CI->getArgOperand(1));
2531       Rep = EmitX86Select(Builder, CI->getArgOperand(3), Rep,
2532                           CI->getArgOperand(2));
2533     } else if (IsX86 && Name.startswith("avx512.mask.pmull.")) {
2534       Rep = Builder.CreateMul(CI->getArgOperand(0), CI->getArgOperand(1));
2535       Rep = EmitX86Select(Builder, CI->getArgOperand(3), Rep,
2536                           CI->getArgOperand(2));
2537     } else if (IsX86 && Name.startswith("avx512.mask.add.p")) {
2538       if (Name.endswith(".512")) {
2539         Intrinsic::ID IID;
2540         if (Name[17] == 's')
2541           IID = Intrinsic::x86_avx512_add_ps_512;
2542         else
2543           IID = Intrinsic::x86_avx512_add_pd_512;
2544 
2545         Rep = Builder.CreateCall(Intrinsic::getDeclaration(F->getParent(), IID),
2546                                  { CI->getArgOperand(0), CI->getArgOperand(1),
2547                                    CI->getArgOperand(4) });
2548       } else {
2549         Rep = Builder.CreateFAdd(CI->getArgOperand(0), CI->getArgOperand(1));
2550       }
2551       Rep = EmitX86Select(Builder, CI->getArgOperand(3), Rep,
2552                           CI->getArgOperand(2));
2553     } else if (IsX86 && Name.startswith("avx512.mask.div.p")) {
2554       if (Name.endswith(".512")) {
2555         Intrinsic::ID IID;
2556         if (Name[17] == 's')
2557           IID = Intrinsic::x86_avx512_div_ps_512;
2558         else
2559           IID = Intrinsic::x86_avx512_div_pd_512;
2560 
2561         Rep = Builder.CreateCall(Intrinsic::getDeclaration(F->getParent(), IID),
2562                                  { CI->getArgOperand(0), CI->getArgOperand(1),
2563                                    CI->getArgOperand(4) });
2564       } else {
2565         Rep = Builder.CreateFDiv(CI->getArgOperand(0), CI->getArgOperand(1));
2566       }
2567       Rep = EmitX86Select(Builder, CI->getArgOperand(3), Rep,
2568                           CI->getArgOperand(2));
2569     } else if (IsX86 && Name.startswith("avx512.mask.mul.p")) {
2570       if (Name.endswith(".512")) {
2571         Intrinsic::ID IID;
2572         if (Name[17] == 's')
2573           IID = Intrinsic::x86_avx512_mul_ps_512;
2574         else
2575           IID = Intrinsic::x86_avx512_mul_pd_512;
2576 
2577         Rep = Builder.CreateCall(Intrinsic::getDeclaration(F->getParent(), IID),
2578                                  { CI->getArgOperand(0), CI->getArgOperand(1),
2579                                    CI->getArgOperand(4) });
2580       } else {
2581         Rep = Builder.CreateFMul(CI->getArgOperand(0), CI->getArgOperand(1));
2582       }
2583       Rep = EmitX86Select(Builder, CI->getArgOperand(3), Rep,
2584                           CI->getArgOperand(2));
2585     } else if (IsX86 && Name.startswith("avx512.mask.sub.p")) {
2586       if (Name.endswith(".512")) {
2587         Intrinsic::ID IID;
2588         if (Name[17] == 's')
2589           IID = Intrinsic::x86_avx512_sub_ps_512;
2590         else
2591           IID = Intrinsic::x86_avx512_sub_pd_512;
2592 
2593         Rep = Builder.CreateCall(Intrinsic::getDeclaration(F->getParent(), IID),
2594                                  { CI->getArgOperand(0), CI->getArgOperand(1),
2595                                    CI->getArgOperand(4) });
2596       } else {
2597         Rep = Builder.CreateFSub(CI->getArgOperand(0), CI->getArgOperand(1));
2598       }
2599       Rep = EmitX86Select(Builder, CI->getArgOperand(3), Rep,
2600                           CI->getArgOperand(2));
2601     } else if (IsX86 && Name.startswith("avx512.mask.max.p") &&
2602                Name.drop_front(18) == ".512") {
2603       Intrinsic::ID IID;
2604       if (Name[17] == 's')
2605         IID = Intrinsic::x86_avx512_max_ps_512;
2606       else
2607         IID = Intrinsic::x86_avx512_max_pd_512;
2608 
2609       Rep = Builder.CreateCall(Intrinsic::getDeclaration(F->getParent(), IID),
2610                                { CI->getArgOperand(0), CI->getArgOperand(1),
2611                                  CI->getArgOperand(4) });
2612       Rep = EmitX86Select(Builder, CI->getArgOperand(3), Rep,
2613                           CI->getArgOperand(2));
2614     } else if (IsX86 && Name.startswith("avx512.mask.min.p") &&
2615                Name.drop_front(18) == ".512") {
2616       Intrinsic::ID IID;
2617       if (Name[17] == 's')
2618         IID = Intrinsic::x86_avx512_min_ps_512;
2619       else
2620         IID = Intrinsic::x86_avx512_min_pd_512;
2621 
2622       Rep = Builder.CreateCall(Intrinsic::getDeclaration(F->getParent(), IID),
2623                                { CI->getArgOperand(0), CI->getArgOperand(1),
2624                                  CI->getArgOperand(4) });
2625       Rep = EmitX86Select(Builder, CI->getArgOperand(3), Rep,
2626                           CI->getArgOperand(2));
2627     } else if (IsX86 && Name.startswith("avx512.mask.lzcnt.")) {
2628       Rep = Builder.CreateCall(Intrinsic::getDeclaration(F->getParent(),
2629                                                          Intrinsic::ctlz,
2630                                                          CI->getType()),
2631                                { CI->getArgOperand(0), Builder.getInt1(false) });
2632       Rep = EmitX86Select(Builder, CI->getArgOperand(2), Rep,
2633                           CI->getArgOperand(1));
2634     } else if (IsX86 && Name.startswith("avx512.mask.psll")) {
2635       bool IsImmediate = Name[16] == 'i' ||
2636                          (Name.size() > 18 && Name[18] == 'i');
2637       bool IsVariable = Name[16] == 'v';
2638       char Size = Name[16] == '.' ? Name[17] :
2639                   Name[17] == '.' ? Name[18] :
2640                   Name[18] == '.' ? Name[19] :
2641                                     Name[20];
2642 
2643       Intrinsic::ID IID;
2644       if (IsVariable && Name[17] != '.') {
2645         if (Size == 'd' && Name[17] == '2') // avx512.mask.psllv2.di
2646           IID = Intrinsic::x86_avx2_psllv_q;
2647         else if (Size == 'd' && Name[17] == '4') // avx512.mask.psllv4.di
2648           IID = Intrinsic::x86_avx2_psllv_q_256;
2649         else if (Size == 's' && Name[17] == '4') // avx512.mask.psllv4.si
2650           IID = Intrinsic::x86_avx2_psllv_d;
2651         else if (Size == 's' && Name[17] == '8') // avx512.mask.psllv8.si
2652           IID = Intrinsic::x86_avx2_psllv_d_256;
2653         else if (Size == 'h' && Name[17] == '8') // avx512.mask.psllv8.hi
2654           IID = Intrinsic::x86_avx512_psllv_w_128;
2655         else if (Size == 'h' && Name[17] == '1') // avx512.mask.psllv16.hi
2656           IID = Intrinsic::x86_avx512_psllv_w_256;
2657         else if (Name[17] == '3' && Name[18] == '2') // avx512.mask.psllv32hi
2658           IID = Intrinsic::x86_avx512_psllv_w_512;
2659         else
2660           llvm_unreachable("Unexpected size");
2661       } else if (Name.endswith(".128")) {
2662         if (Size == 'd') // avx512.mask.psll.d.128, avx512.mask.psll.di.128
2663           IID = IsImmediate ? Intrinsic::x86_sse2_pslli_d
2664                             : Intrinsic::x86_sse2_psll_d;
2665         else if (Size == 'q') // avx512.mask.psll.q.128, avx512.mask.psll.qi.128
2666           IID = IsImmediate ? Intrinsic::x86_sse2_pslli_q
2667                             : Intrinsic::x86_sse2_psll_q;
2668         else if (Size == 'w') // avx512.mask.psll.w.128, avx512.mask.psll.wi.128
2669           IID = IsImmediate ? Intrinsic::x86_sse2_pslli_w
2670                             : Intrinsic::x86_sse2_psll_w;
2671         else
2672           llvm_unreachable("Unexpected size");
2673       } else if (Name.endswith(".256")) {
2674         if (Size == 'd') // avx512.mask.psll.d.256, avx512.mask.psll.di.256
2675           IID = IsImmediate ? Intrinsic::x86_avx2_pslli_d
2676                             : Intrinsic::x86_avx2_psll_d;
2677         else if (Size == 'q') // avx512.mask.psll.q.256, avx512.mask.psll.qi.256
2678           IID = IsImmediate ? Intrinsic::x86_avx2_pslli_q
2679                             : Intrinsic::x86_avx2_psll_q;
2680         else if (Size == 'w') // avx512.mask.psll.w.256, avx512.mask.psll.wi.256
2681           IID = IsImmediate ? Intrinsic::x86_avx2_pslli_w
2682                             : Intrinsic::x86_avx2_psll_w;
2683         else
2684           llvm_unreachable("Unexpected size");
2685       } else {
2686         if (Size == 'd') // psll.di.512, pslli.d, psll.d, psllv.d.512
2687           IID = IsImmediate ? Intrinsic::x86_avx512_pslli_d_512 :
2688                 IsVariable  ? Intrinsic::x86_avx512_psllv_d_512 :
2689                               Intrinsic::x86_avx512_psll_d_512;
2690         else if (Size == 'q') // psll.qi.512, pslli.q, psll.q, psllv.q.512
2691           IID = IsImmediate ? Intrinsic::x86_avx512_pslli_q_512 :
2692                 IsVariable  ? Intrinsic::x86_avx512_psllv_q_512 :
2693                               Intrinsic::x86_avx512_psll_q_512;
2694         else if (Size == 'w') // psll.wi.512, pslli.w, psll.w
2695           IID = IsImmediate ? Intrinsic::x86_avx512_pslli_w_512
2696                             : Intrinsic::x86_avx512_psll_w_512;
2697         else
2698           llvm_unreachable("Unexpected size");
2699       }
2700 
2701       Rep = UpgradeX86MaskedShift(Builder, *CI, IID);
2702     } else if (IsX86 && Name.startswith("avx512.mask.psrl")) {
2703       bool IsImmediate = Name[16] == 'i' ||
2704                          (Name.size() > 18 && Name[18] == 'i');
2705       bool IsVariable = Name[16] == 'v';
2706       char Size = Name[16] == '.' ? Name[17] :
2707                   Name[17] == '.' ? Name[18] :
2708                   Name[18] == '.' ? Name[19] :
2709                                     Name[20];
2710 
2711       Intrinsic::ID IID;
2712       if (IsVariable && Name[17] != '.') {
2713         if (Size == 'd' && Name[17] == '2') // avx512.mask.psrlv2.di
2714           IID = Intrinsic::x86_avx2_psrlv_q;
2715         else if (Size == 'd' && Name[17] == '4') // avx512.mask.psrlv4.di
2716           IID = Intrinsic::x86_avx2_psrlv_q_256;
2717         else if (Size == 's' && Name[17] == '4') // avx512.mask.psrlv4.si
2718           IID = Intrinsic::x86_avx2_psrlv_d;
2719         else if (Size == 's' && Name[17] == '8') // avx512.mask.psrlv8.si
2720           IID = Intrinsic::x86_avx2_psrlv_d_256;
2721         else if (Size == 'h' && Name[17] == '8') // avx512.mask.psrlv8.hi
2722           IID = Intrinsic::x86_avx512_psrlv_w_128;
2723         else if (Size == 'h' && Name[17] == '1') // avx512.mask.psrlv16.hi
2724           IID = Intrinsic::x86_avx512_psrlv_w_256;
2725         else if (Name[17] == '3' && Name[18] == '2') // avx512.mask.psrlv32hi
2726           IID = Intrinsic::x86_avx512_psrlv_w_512;
2727         else
2728           llvm_unreachable("Unexpected size");
2729       } else if (Name.endswith(".128")) {
2730         if (Size == 'd') // avx512.mask.psrl.d.128, avx512.mask.psrl.di.128
2731           IID = IsImmediate ? Intrinsic::x86_sse2_psrli_d
2732                             : Intrinsic::x86_sse2_psrl_d;
2733         else if (Size == 'q') // avx512.mask.psrl.q.128, avx512.mask.psrl.qi.128
2734           IID = IsImmediate ? Intrinsic::x86_sse2_psrli_q
2735                             : Intrinsic::x86_sse2_psrl_q;
2736         else if (Size == 'w') // avx512.mask.psrl.w.128, avx512.mask.psrl.wi.128
2737           IID = IsImmediate ? Intrinsic::x86_sse2_psrli_w
2738                             : Intrinsic::x86_sse2_psrl_w;
2739         else
2740           llvm_unreachable("Unexpected size");
2741       } else if (Name.endswith(".256")) {
2742         if (Size == 'd') // avx512.mask.psrl.d.256, avx512.mask.psrl.di.256
2743           IID = IsImmediate ? Intrinsic::x86_avx2_psrli_d
2744                             : Intrinsic::x86_avx2_psrl_d;
2745         else if (Size == 'q') // avx512.mask.psrl.q.256, avx512.mask.psrl.qi.256
2746           IID = IsImmediate ? Intrinsic::x86_avx2_psrli_q
2747                             : Intrinsic::x86_avx2_psrl_q;
2748         else if (Size == 'w') // avx512.mask.psrl.w.256, avx512.mask.psrl.wi.256
2749           IID = IsImmediate ? Intrinsic::x86_avx2_psrli_w
2750                             : Intrinsic::x86_avx2_psrl_w;
2751         else
2752           llvm_unreachable("Unexpected size");
2753       } else {
2754         if (Size == 'd') // psrl.di.512, psrli.d, psrl.d, psrl.d.512
2755           IID = IsImmediate ? Intrinsic::x86_avx512_psrli_d_512 :
2756                 IsVariable  ? Intrinsic::x86_avx512_psrlv_d_512 :
2757                               Intrinsic::x86_avx512_psrl_d_512;
2758         else if (Size == 'q') // psrl.qi.512, psrli.q, psrl.q, psrl.q.512
2759           IID = IsImmediate ? Intrinsic::x86_avx512_psrli_q_512 :
2760                 IsVariable  ? Intrinsic::x86_avx512_psrlv_q_512 :
2761                               Intrinsic::x86_avx512_psrl_q_512;
2762         else if (Size == 'w') // psrl.wi.512, psrli.w, psrl.w)
2763           IID = IsImmediate ? Intrinsic::x86_avx512_psrli_w_512
2764                             : Intrinsic::x86_avx512_psrl_w_512;
2765         else
2766           llvm_unreachable("Unexpected size");
2767       }
2768 
2769       Rep = UpgradeX86MaskedShift(Builder, *CI, IID);
2770     } else if (IsX86 && Name.startswith("avx512.mask.psra")) {
2771       bool IsImmediate = Name[16] == 'i' ||
2772                          (Name.size() > 18 && Name[18] == 'i');
2773       bool IsVariable = Name[16] == 'v';
2774       char Size = Name[16] == '.' ? Name[17] :
2775                   Name[17] == '.' ? Name[18] :
2776                   Name[18] == '.' ? Name[19] :
2777                                     Name[20];
2778 
2779       Intrinsic::ID IID;
2780       if (IsVariable && Name[17] != '.') {
2781         if (Size == 's' && Name[17] == '4') // avx512.mask.psrav4.si
2782           IID = Intrinsic::x86_avx2_psrav_d;
2783         else if (Size == 's' && Name[17] == '8') // avx512.mask.psrav8.si
2784           IID = Intrinsic::x86_avx2_psrav_d_256;
2785         else if (Size == 'h' && Name[17] == '8') // avx512.mask.psrav8.hi
2786           IID = Intrinsic::x86_avx512_psrav_w_128;
2787         else if (Size == 'h' && Name[17] == '1') // avx512.mask.psrav16.hi
2788           IID = Intrinsic::x86_avx512_psrav_w_256;
2789         else if (Name[17] == '3' && Name[18] == '2') // avx512.mask.psrav32hi
2790           IID = Intrinsic::x86_avx512_psrav_w_512;
2791         else
2792           llvm_unreachable("Unexpected size");
2793       } else if (Name.endswith(".128")) {
2794         if (Size == 'd') // avx512.mask.psra.d.128, avx512.mask.psra.di.128
2795           IID = IsImmediate ? Intrinsic::x86_sse2_psrai_d
2796                             : Intrinsic::x86_sse2_psra_d;
2797         else if (Size == 'q') // avx512.mask.psra.q.128, avx512.mask.psra.qi.128
2798           IID = IsImmediate ? Intrinsic::x86_avx512_psrai_q_128 :
2799                 IsVariable  ? Intrinsic::x86_avx512_psrav_q_128 :
2800                               Intrinsic::x86_avx512_psra_q_128;
2801         else if (Size == 'w') // avx512.mask.psra.w.128, avx512.mask.psra.wi.128
2802           IID = IsImmediate ? Intrinsic::x86_sse2_psrai_w
2803                             : Intrinsic::x86_sse2_psra_w;
2804         else
2805           llvm_unreachable("Unexpected size");
2806       } else if (Name.endswith(".256")) {
2807         if (Size == 'd') // avx512.mask.psra.d.256, avx512.mask.psra.di.256
2808           IID = IsImmediate ? Intrinsic::x86_avx2_psrai_d
2809                             : Intrinsic::x86_avx2_psra_d;
2810         else if (Size == 'q') // avx512.mask.psra.q.256, avx512.mask.psra.qi.256
2811           IID = IsImmediate ? Intrinsic::x86_avx512_psrai_q_256 :
2812                 IsVariable  ? Intrinsic::x86_avx512_psrav_q_256 :
2813                               Intrinsic::x86_avx512_psra_q_256;
2814         else if (Size == 'w') // avx512.mask.psra.w.256, avx512.mask.psra.wi.256
2815           IID = IsImmediate ? Intrinsic::x86_avx2_psrai_w
2816                             : Intrinsic::x86_avx2_psra_w;
2817         else
2818           llvm_unreachable("Unexpected size");
2819       } else {
2820         if (Size == 'd') // psra.di.512, psrai.d, psra.d, psrav.d.512
2821           IID = IsImmediate ? Intrinsic::x86_avx512_psrai_d_512 :
2822                 IsVariable  ? Intrinsic::x86_avx512_psrav_d_512 :
2823                               Intrinsic::x86_avx512_psra_d_512;
2824         else if (Size == 'q') // psra.qi.512, psrai.q, psra.q
2825           IID = IsImmediate ? Intrinsic::x86_avx512_psrai_q_512 :
2826                 IsVariable  ? Intrinsic::x86_avx512_psrav_q_512 :
2827                               Intrinsic::x86_avx512_psra_q_512;
2828         else if (Size == 'w') // psra.wi.512, psrai.w, psra.w
2829           IID = IsImmediate ? Intrinsic::x86_avx512_psrai_w_512
2830                             : Intrinsic::x86_avx512_psra_w_512;
2831         else
2832           llvm_unreachable("Unexpected size");
2833       }
2834 
2835       Rep = UpgradeX86MaskedShift(Builder, *CI, IID);
2836     } else if (IsX86 && Name.startswith("avx512.mask.move.s")) {
2837       Rep = upgradeMaskedMove(Builder, *CI);
2838     } else if (IsX86 && Name.startswith("avx512.cvtmask2")) {
2839       Rep = UpgradeMaskToInt(Builder, *CI);
2840     } else if (IsX86 && Name.endswith(".movntdqa")) {
2841       Module *M = F->getParent();
2842       MDNode *Node = MDNode::get(
2843           C, ConstantAsMetadata::get(ConstantInt::get(Type::getInt32Ty(C), 1)));
2844 
2845       Value *Ptr = CI->getArgOperand(0);
2846       VectorType *VTy = cast<VectorType>(CI->getType());
2847 
2848       // Convert the type of the pointer to a pointer to the stored type.
2849       Value *BC =
2850           Builder.CreateBitCast(Ptr, PointerType::getUnqual(VTy), "cast");
2851       LoadInst *LI = Builder.CreateAlignedLoad(BC, VTy->getBitWidth() / 8);
2852       LI->setMetadata(M->getMDKindID("nontemporal"), Node);
2853       Rep = LI;
2854     } else if (IsX86 &&
2855                (Name.startswith("sse2.pavg") || Name.startswith("avx2.pavg") ||
2856                 Name.startswith("avx512.mask.pavg"))) {
2857       // llvm.x86.sse2.pavg.b/w, llvm.x86.avx2.pavg.b/w,
2858       // llvm.x86.avx512.mask.pavg.b/w
2859       Value *A = CI->getArgOperand(0);
2860       Value *B = CI->getArgOperand(1);
2861       VectorType *ZextType = VectorType::getExtendedElementVectorType(
2862           cast<VectorType>(A->getType()));
2863       Value *ExtendedA = Builder.CreateZExt(A, ZextType);
2864       Value *ExtendedB = Builder.CreateZExt(B, ZextType);
2865       Value *Sum = Builder.CreateAdd(ExtendedA, ExtendedB);
2866       Value *AddOne = Builder.CreateAdd(Sum, ConstantInt::get(ZextType, 1));
2867       Value *ShiftR = Builder.CreateLShr(AddOne, ConstantInt::get(ZextType, 1));
2868       Rep = Builder.CreateTrunc(ShiftR, A->getType());
2869       if (CI->getNumArgOperands() > 2) {
2870         Rep = EmitX86Select(Builder, CI->getArgOperand(3), Rep,
2871                             CI->getArgOperand(2));
2872       }
2873     } else if (IsX86 && (Name.startswith("fma.vfmadd.") ||
2874                          Name.startswith("fma.vfmsub.") ||
2875                          Name.startswith("fma.vfnmadd.") ||
2876                          Name.startswith("fma.vfnmsub."))) {
2877       bool NegMul = Name[6] == 'n';
2878       bool NegAcc = NegMul ? Name[8] == 's' : Name[7] == 's';
2879       bool IsScalar = NegMul ? Name[12] == 's' : Name[11] == 's';
2880 
2881       Value *Ops[] = { CI->getArgOperand(0), CI->getArgOperand(1),
2882                        CI->getArgOperand(2) };
2883 
2884       if (IsScalar) {
2885         Ops[0] = Builder.CreateExtractElement(Ops[0], (uint64_t)0);
2886         Ops[1] = Builder.CreateExtractElement(Ops[1], (uint64_t)0);
2887         Ops[2] = Builder.CreateExtractElement(Ops[2], (uint64_t)0);
2888       }
2889 
2890       if (NegMul && !IsScalar)
2891         Ops[0] = Builder.CreateFNeg(Ops[0]);
2892       if (NegMul && IsScalar)
2893         Ops[1] = Builder.CreateFNeg(Ops[1]);
2894       if (NegAcc)
2895         Ops[2] = Builder.CreateFNeg(Ops[2]);
2896 
2897       Rep = Builder.CreateCall(Intrinsic::getDeclaration(CI->getModule(),
2898                                                          Intrinsic::fma,
2899                                                          Ops[0]->getType()),
2900                                Ops);
2901 
2902       if (IsScalar)
2903         Rep = Builder.CreateInsertElement(CI->getArgOperand(0), Rep,
2904                                           (uint64_t)0);
2905     } else if (IsX86 && Name.startswith("fma4.vfmadd.s")) {
2906       Value *Ops[] = { CI->getArgOperand(0), CI->getArgOperand(1),
2907                        CI->getArgOperand(2) };
2908 
2909       Ops[0] = Builder.CreateExtractElement(Ops[0], (uint64_t)0);
2910       Ops[1] = Builder.CreateExtractElement(Ops[1], (uint64_t)0);
2911       Ops[2] = Builder.CreateExtractElement(Ops[2], (uint64_t)0);
2912 
2913       Rep = Builder.CreateCall(Intrinsic::getDeclaration(CI->getModule(),
2914                                                          Intrinsic::fma,
2915                                                          Ops[0]->getType()),
2916                                Ops);
2917 
2918       Rep = Builder.CreateInsertElement(Constant::getNullValue(CI->getType()),
2919                                         Rep, (uint64_t)0);
2920     } else if (IsX86 && (Name.startswith("avx512.mask.vfmadd.s") ||
2921                          Name.startswith("avx512.maskz.vfmadd.s") ||
2922                          Name.startswith("avx512.mask3.vfmadd.s") ||
2923                          Name.startswith("avx512.mask3.vfmsub.s") ||
2924                          Name.startswith("avx512.mask3.vfnmsub.s"))) {
2925       bool IsMask3 = Name[11] == '3';
2926       bool IsMaskZ = Name[11] == 'z';
2927       // Drop the "avx512.mask." to make it easier.
2928       Name = Name.drop_front(IsMask3 || IsMaskZ ? 13 : 12);
2929       bool NegMul = Name[2] == 'n';
2930       bool NegAcc = NegMul ? Name[4] == 's' : Name[3] == 's';
2931 
2932       Value *A = CI->getArgOperand(0);
2933       Value *B = CI->getArgOperand(1);
2934       Value *C = CI->getArgOperand(2);
2935 
2936       if (NegMul && (IsMask3 || IsMaskZ))
2937         A = Builder.CreateFNeg(A);
2938       if (NegMul && !(IsMask3 || IsMaskZ))
2939         B = Builder.CreateFNeg(B);
2940       if (NegAcc)
2941         C = Builder.CreateFNeg(C);
2942 
2943       A = Builder.CreateExtractElement(A, (uint64_t)0);
2944       B = Builder.CreateExtractElement(B, (uint64_t)0);
2945       C = Builder.CreateExtractElement(C, (uint64_t)0);
2946 
2947       if (!isa<ConstantInt>(CI->getArgOperand(4)) ||
2948           cast<ConstantInt>(CI->getArgOperand(4))->getZExtValue() != 4) {
2949         Value *Ops[] = { A, B, C, CI->getArgOperand(4) };
2950 
2951         Intrinsic::ID IID;
2952         if (Name.back() == 'd')
2953           IID = Intrinsic::x86_avx512_vfmadd_f64;
2954         else
2955           IID = Intrinsic::x86_avx512_vfmadd_f32;
2956         Function *FMA = Intrinsic::getDeclaration(CI->getModule(), IID);
2957         Rep = Builder.CreateCall(FMA, Ops);
2958       } else {
2959         Function *FMA = Intrinsic::getDeclaration(CI->getModule(),
2960                                                   Intrinsic::fma,
2961                                                   A->getType());
2962         Rep = Builder.CreateCall(FMA, { A, B, C });
2963       }
2964 
2965       Value *PassThru = IsMaskZ ? Constant::getNullValue(Rep->getType()) :
2966                         IsMask3 ? C : A;
2967 
2968       // For Mask3 with NegAcc, we need to create a new extractelement that
2969       // avoids the negation above.
2970       if (NegAcc && IsMask3)
2971         PassThru = Builder.CreateExtractElement(CI->getArgOperand(2),
2972                                                 (uint64_t)0);
2973 
2974       Rep = EmitX86ScalarSelect(Builder, CI->getArgOperand(3),
2975                                 Rep, PassThru);
2976       Rep = Builder.CreateInsertElement(CI->getArgOperand(IsMask3 ? 2 : 0),
2977                                         Rep, (uint64_t)0);
2978     } else if (IsX86 && (Name.startswith("avx512.mask.vfmadd.p") ||
2979                          Name.startswith("avx512.mask.vfnmadd.p") ||
2980                          Name.startswith("avx512.mask.vfnmsub.p") ||
2981                          Name.startswith("avx512.mask3.vfmadd.p") ||
2982                          Name.startswith("avx512.mask3.vfmsub.p") ||
2983                          Name.startswith("avx512.mask3.vfnmsub.p") ||
2984                          Name.startswith("avx512.maskz.vfmadd.p"))) {
2985       bool IsMask3 = Name[11] == '3';
2986       bool IsMaskZ = Name[11] == 'z';
2987       // Drop the "avx512.mask." to make it easier.
2988       Name = Name.drop_front(IsMask3 || IsMaskZ ? 13 : 12);
2989       bool NegMul = Name[2] == 'n';
2990       bool NegAcc = NegMul ? Name[4] == 's' : Name[3] == 's';
2991 
2992       Value *A = CI->getArgOperand(0);
2993       Value *B = CI->getArgOperand(1);
2994       Value *C = CI->getArgOperand(2);
2995 
2996       if (NegMul && (IsMask3 || IsMaskZ))
2997         A = Builder.CreateFNeg(A);
2998       if (NegMul && !(IsMask3 || IsMaskZ))
2999         B = Builder.CreateFNeg(B);
3000       if (NegAcc)
3001         C = Builder.CreateFNeg(C);
3002 
3003       if (CI->getNumArgOperands() == 5 &&
3004           (!isa<ConstantInt>(CI->getArgOperand(4)) ||
3005            cast<ConstantInt>(CI->getArgOperand(4))->getZExtValue() != 4)) {
3006         Intrinsic::ID IID;
3007         // Check the character before ".512" in string.
3008         if (Name[Name.size()-5] == 's')
3009           IID = Intrinsic::x86_avx512_vfmadd_ps_512;
3010         else
3011           IID = Intrinsic::x86_avx512_vfmadd_pd_512;
3012 
3013         Rep = Builder.CreateCall(Intrinsic::getDeclaration(F->getParent(), IID),
3014                                  { A, B, C, CI->getArgOperand(4) });
3015       } else {
3016         Function *FMA = Intrinsic::getDeclaration(CI->getModule(),
3017                                                   Intrinsic::fma,
3018                                                   A->getType());
3019         Rep = Builder.CreateCall(FMA, { A, B, C });
3020       }
3021 
3022       Value *PassThru = IsMaskZ ? llvm::Constant::getNullValue(CI->getType()) :
3023                         IsMask3 ? CI->getArgOperand(2) :
3024                                   CI->getArgOperand(0);
3025 
3026       Rep = EmitX86Select(Builder, CI->getArgOperand(3), Rep, PassThru);
3027     } else if (IsX86 && (Name.startswith("fma.vfmaddsub.p") ||
3028                          Name.startswith("fma.vfmsubadd.p"))) {
3029       bool IsSubAdd = Name[7] == 's';
3030       int NumElts = CI->getType()->getVectorNumElements();
3031 
3032       Value *Ops[] = { CI->getArgOperand(0), CI->getArgOperand(1),
3033                        CI->getArgOperand(2) };
3034 
3035       Function *FMA = Intrinsic::getDeclaration(CI->getModule(), Intrinsic::fma,
3036                                                 Ops[0]->getType());
3037       Value *Odd = Builder.CreateCall(FMA, Ops);
3038       Ops[2] = Builder.CreateFNeg(Ops[2]);
3039       Value *Even = Builder.CreateCall(FMA, Ops);
3040 
3041       if (IsSubAdd)
3042         std::swap(Even, Odd);
3043 
3044       SmallVector<uint32_t, 32> Idxs(NumElts);
3045       for (int i = 0; i != NumElts; ++i)
3046         Idxs[i] = i + (i % 2) * NumElts;
3047 
3048       Rep = Builder.CreateShuffleVector(Even, Odd, Idxs);
3049     } else if (IsX86 && (Name.startswith("avx512.mask.vfmaddsub.p") ||
3050                          Name.startswith("avx512.mask3.vfmaddsub.p") ||
3051                          Name.startswith("avx512.maskz.vfmaddsub.p") ||
3052                          Name.startswith("avx512.mask3.vfmsubadd.p"))) {
3053       bool IsMask3 = Name[11] == '3';
3054       bool IsMaskZ = Name[11] == 'z';
3055       // Drop the "avx512.mask." to make it easier.
3056       Name = Name.drop_front(IsMask3 || IsMaskZ ? 13 : 12);
3057       bool IsSubAdd = Name[3] == 's';
3058       if (CI->getNumArgOperands() == 5 &&
3059           (!isa<ConstantInt>(CI->getArgOperand(4)) ||
3060            cast<ConstantInt>(CI->getArgOperand(4))->getZExtValue() != 4)) {
3061         Intrinsic::ID IID;
3062         // Check the character before ".512" in string.
3063         if (Name[Name.size()-5] == 's')
3064           IID = Intrinsic::x86_avx512_vfmaddsub_ps_512;
3065         else
3066           IID = Intrinsic::x86_avx512_vfmaddsub_pd_512;
3067 
3068         Value *Ops[] = { CI->getArgOperand(0), CI->getArgOperand(1),
3069                          CI->getArgOperand(2), CI->getArgOperand(4) };
3070         if (IsSubAdd)
3071           Ops[2] = Builder.CreateFNeg(Ops[2]);
3072 
3073         Rep = Builder.CreateCall(Intrinsic::getDeclaration(F->getParent(), IID),
3074                                  {CI->getArgOperand(0), CI->getArgOperand(1),
3075                                   CI->getArgOperand(2), CI->getArgOperand(4)});
3076       } else {
3077         int NumElts = CI->getType()->getVectorNumElements();
3078 
3079         Value *Ops[] = { CI->getArgOperand(0), CI->getArgOperand(1),
3080                          CI->getArgOperand(2) };
3081 
3082         Function *FMA = Intrinsic::getDeclaration(CI->getModule(), Intrinsic::fma,
3083                                                   Ops[0]->getType());
3084         Value *Odd = Builder.CreateCall(FMA, Ops);
3085         Ops[2] = Builder.CreateFNeg(Ops[2]);
3086         Value *Even = Builder.CreateCall(FMA, Ops);
3087 
3088         if (IsSubAdd)
3089           std::swap(Even, Odd);
3090 
3091         SmallVector<uint32_t, 32> Idxs(NumElts);
3092         for (int i = 0; i != NumElts; ++i)
3093           Idxs[i] = i + (i % 2) * NumElts;
3094 
3095         Rep = Builder.CreateShuffleVector(Even, Odd, Idxs);
3096       }
3097 
3098       Value *PassThru = IsMaskZ ? llvm::Constant::getNullValue(CI->getType()) :
3099                         IsMask3 ? CI->getArgOperand(2) :
3100                                   CI->getArgOperand(0);
3101 
3102       Rep = EmitX86Select(Builder, CI->getArgOperand(3), Rep, PassThru);
3103     } else if (IsX86 && (Name.startswith("avx512.mask.pternlog.") ||
3104                          Name.startswith("avx512.maskz.pternlog."))) {
3105       bool ZeroMask = Name[11] == 'z';
3106       unsigned VecWidth = CI->getType()->getPrimitiveSizeInBits();
3107       unsigned EltWidth = CI->getType()->getScalarSizeInBits();
3108       Intrinsic::ID IID;
3109       if (VecWidth == 128 && EltWidth == 32)
3110         IID = Intrinsic::x86_avx512_pternlog_d_128;
3111       else if (VecWidth == 256 && EltWidth == 32)
3112         IID = Intrinsic::x86_avx512_pternlog_d_256;
3113       else if (VecWidth == 512 && EltWidth == 32)
3114         IID = Intrinsic::x86_avx512_pternlog_d_512;
3115       else if (VecWidth == 128 && EltWidth == 64)
3116         IID = Intrinsic::x86_avx512_pternlog_q_128;
3117       else if (VecWidth == 256 && EltWidth == 64)
3118         IID = Intrinsic::x86_avx512_pternlog_q_256;
3119       else if (VecWidth == 512 && EltWidth == 64)
3120         IID = Intrinsic::x86_avx512_pternlog_q_512;
3121       else
3122         llvm_unreachable("Unexpected intrinsic");
3123 
3124       Value *Args[] = { CI->getArgOperand(0) , CI->getArgOperand(1),
3125                         CI->getArgOperand(2), CI->getArgOperand(3) };
3126       Rep = Builder.CreateCall(Intrinsic::getDeclaration(CI->getModule(), IID),
3127                                Args);
3128       Value *PassThru = ZeroMask ? ConstantAggregateZero::get(CI->getType())
3129                                  : CI->getArgOperand(0);
3130       Rep = EmitX86Select(Builder, CI->getArgOperand(4), Rep, PassThru);
3131     } else if (IsX86 && (Name.startswith("avx512.mask.vpmadd52") ||
3132                          Name.startswith("avx512.maskz.vpmadd52"))) {
3133       bool ZeroMask = Name[11] == 'z';
3134       bool High = Name[20] == 'h' || Name[21] == 'h';
3135       unsigned VecWidth = CI->getType()->getPrimitiveSizeInBits();
3136       Intrinsic::ID IID;
3137       if (VecWidth == 128 && !High)
3138         IID = Intrinsic::x86_avx512_vpmadd52l_uq_128;
3139       else if (VecWidth == 256 && !High)
3140         IID = Intrinsic::x86_avx512_vpmadd52l_uq_256;
3141       else if (VecWidth == 512 && !High)
3142         IID = Intrinsic::x86_avx512_vpmadd52l_uq_512;
3143       else if (VecWidth == 128 && High)
3144         IID = Intrinsic::x86_avx512_vpmadd52h_uq_128;
3145       else if (VecWidth == 256 && High)
3146         IID = Intrinsic::x86_avx512_vpmadd52h_uq_256;
3147       else if (VecWidth == 512 && High)
3148         IID = Intrinsic::x86_avx512_vpmadd52h_uq_512;
3149       else
3150         llvm_unreachable("Unexpected intrinsic");
3151 
3152       Value *Args[] = { CI->getArgOperand(0) , CI->getArgOperand(1),
3153                         CI->getArgOperand(2) };
3154       Rep = Builder.CreateCall(Intrinsic::getDeclaration(CI->getModule(), IID),
3155                                Args);
3156       Value *PassThru = ZeroMask ? ConstantAggregateZero::get(CI->getType())
3157                                  : CI->getArgOperand(0);
3158       Rep = EmitX86Select(Builder, CI->getArgOperand(3), Rep, PassThru);
3159     } else if (IsX86 && (Name.startswith("avx512.mask.vpermi2var.") ||
3160                          Name.startswith("avx512.mask.vpermt2var.") ||
3161                          Name.startswith("avx512.maskz.vpermt2var."))) {
3162       bool ZeroMask = Name[11] == 'z';
3163       bool IndexForm = Name[17] == 'i';
3164       unsigned VecWidth = CI->getType()->getPrimitiveSizeInBits();
3165       unsigned EltWidth = CI->getType()->getScalarSizeInBits();
3166       bool IsFloat = CI->getType()->isFPOrFPVectorTy();
3167       Intrinsic::ID IID;
3168       if (VecWidth == 128 && EltWidth == 32 && IsFloat)
3169         IID = Intrinsic::x86_avx512_vpermi2var_ps_128;
3170       else if (VecWidth == 128 && EltWidth == 32 && !IsFloat)
3171         IID = Intrinsic::x86_avx512_vpermi2var_d_128;
3172       else if (VecWidth == 128 && EltWidth == 64 && IsFloat)
3173         IID = Intrinsic::x86_avx512_vpermi2var_pd_128;
3174       else if (VecWidth == 128 && EltWidth == 64 && !IsFloat)
3175         IID = Intrinsic::x86_avx512_vpermi2var_q_128;
3176       else if (VecWidth == 256 && EltWidth == 32 && IsFloat)
3177         IID = Intrinsic::x86_avx512_vpermi2var_ps_256;
3178       else if (VecWidth == 256 && EltWidth == 32 && !IsFloat)
3179         IID = Intrinsic::x86_avx512_vpermi2var_d_256;
3180       else if (VecWidth == 256 && EltWidth == 64 && IsFloat)
3181         IID = Intrinsic::x86_avx512_vpermi2var_pd_256;
3182       else if (VecWidth == 256 && EltWidth == 64 && !IsFloat)
3183         IID = Intrinsic::x86_avx512_vpermi2var_q_256;
3184       else if (VecWidth == 512 && EltWidth == 32 && IsFloat)
3185         IID = Intrinsic::x86_avx512_vpermi2var_ps_512;
3186       else if (VecWidth == 512 && EltWidth == 32 && !IsFloat)
3187         IID = Intrinsic::x86_avx512_vpermi2var_d_512;
3188       else if (VecWidth == 512 && EltWidth == 64 && IsFloat)
3189         IID = Intrinsic::x86_avx512_vpermi2var_pd_512;
3190       else if (VecWidth == 512 && EltWidth == 64 && !IsFloat)
3191         IID = Intrinsic::x86_avx512_vpermi2var_q_512;
3192       else if (VecWidth == 128 && EltWidth == 16)
3193         IID = Intrinsic::x86_avx512_vpermi2var_hi_128;
3194       else if (VecWidth == 256 && EltWidth == 16)
3195         IID = Intrinsic::x86_avx512_vpermi2var_hi_256;
3196       else if (VecWidth == 512 && EltWidth == 16)
3197         IID = Intrinsic::x86_avx512_vpermi2var_hi_512;
3198       else if (VecWidth == 128 && EltWidth == 8)
3199         IID = Intrinsic::x86_avx512_vpermi2var_qi_128;
3200       else if (VecWidth == 256 && EltWidth == 8)
3201         IID = Intrinsic::x86_avx512_vpermi2var_qi_256;
3202       else if (VecWidth == 512 && EltWidth == 8)
3203         IID = Intrinsic::x86_avx512_vpermi2var_qi_512;
3204       else
3205         llvm_unreachable("Unexpected intrinsic");
3206 
3207       Value *Args[] = { CI->getArgOperand(0) , CI->getArgOperand(1),
3208                         CI->getArgOperand(2) };
3209 
3210       // If this isn't index form we need to swap operand 0 and 1.
3211       if (!IndexForm)
3212         std::swap(Args[0], Args[1]);
3213 
3214       Rep = Builder.CreateCall(Intrinsic::getDeclaration(CI->getModule(), IID),
3215                                Args);
3216       Value *PassThru = ZeroMask ? ConstantAggregateZero::get(CI->getType())
3217                                  : Builder.CreateBitCast(CI->getArgOperand(1),
3218                                                          CI->getType());
3219       Rep = EmitX86Select(Builder, CI->getArgOperand(3), Rep, PassThru);
3220     } else if (IsX86 && (Name.startswith("avx512.mask.vpdpbusd.") ||
3221                          Name.startswith("avx512.maskz.vpdpbusd.") ||
3222                          Name.startswith("avx512.mask.vpdpbusds.") ||
3223                          Name.startswith("avx512.maskz.vpdpbusds."))) {
3224       bool ZeroMask = Name[11] == 'z';
3225       bool IsSaturating = Name[ZeroMask ? 21 : 20] == 's';
3226       unsigned VecWidth = CI->getType()->getPrimitiveSizeInBits();
3227       Intrinsic::ID IID;
3228       if (VecWidth == 128 && !IsSaturating)
3229         IID = Intrinsic::x86_avx512_vpdpbusd_128;
3230       else if (VecWidth == 256 && !IsSaturating)
3231         IID = Intrinsic::x86_avx512_vpdpbusd_256;
3232       else if (VecWidth == 512 && !IsSaturating)
3233         IID = Intrinsic::x86_avx512_vpdpbusd_512;
3234       else if (VecWidth == 128 && IsSaturating)
3235         IID = Intrinsic::x86_avx512_vpdpbusds_128;
3236       else if (VecWidth == 256 && IsSaturating)
3237         IID = Intrinsic::x86_avx512_vpdpbusds_256;
3238       else if (VecWidth == 512 && IsSaturating)
3239         IID = Intrinsic::x86_avx512_vpdpbusds_512;
3240       else
3241         llvm_unreachable("Unexpected intrinsic");
3242 
3243       Value *Args[] = { CI->getArgOperand(0), CI->getArgOperand(1),
3244                         CI->getArgOperand(2)  };
3245       Rep = Builder.CreateCall(Intrinsic::getDeclaration(CI->getModule(), IID),
3246                                Args);
3247       Value *PassThru = ZeroMask ? ConstantAggregateZero::get(CI->getType())
3248                                  : CI->getArgOperand(0);
3249       Rep = EmitX86Select(Builder, CI->getArgOperand(3), Rep, PassThru);
3250     } else if (IsX86 && (Name.startswith("avx512.mask.vpdpwssd.") ||
3251                          Name.startswith("avx512.maskz.vpdpwssd.") ||
3252                          Name.startswith("avx512.mask.vpdpwssds.") ||
3253                          Name.startswith("avx512.maskz.vpdpwssds."))) {
3254       bool ZeroMask = Name[11] == 'z';
3255       bool IsSaturating = Name[ZeroMask ? 21 : 20] == 's';
3256       unsigned VecWidth = CI->getType()->getPrimitiveSizeInBits();
3257       Intrinsic::ID IID;
3258       if (VecWidth == 128 && !IsSaturating)
3259         IID = Intrinsic::x86_avx512_vpdpwssd_128;
3260       else if (VecWidth == 256 && !IsSaturating)
3261         IID = Intrinsic::x86_avx512_vpdpwssd_256;
3262       else if (VecWidth == 512 && !IsSaturating)
3263         IID = Intrinsic::x86_avx512_vpdpwssd_512;
3264       else if (VecWidth == 128 && IsSaturating)
3265         IID = Intrinsic::x86_avx512_vpdpwssds_128;
3266       else if (VecWidth == 256 && IsSaturating)
3267         IID = Intrinsic::x86_avx512_vpdpwssds_256;
3268       else if (VecWidth == 512 && IsSaturating)
3269         IID = Intrinsic::x86_avx512_vpdpwssds_512;
3270       else
3271         llvm_unreachable("Unexpected intrinsic");
3272 
3273       Value *Args[] = { CI->getArgOperand(0), CI->getArgOperand(1),
3274                         CI->getArgOperand(2)  };
3275       Rep = Builder.CreateCall(Intrinsic::getDeclaration(CI->getModule(), IID),
3276                                Args);
3277       Value *PassThru = ZeroMask ? ConstantAggregateZero::get(CI->getType())
3278                                  : CI->getArgOperand(0);
3279       Rep = EmitX86Select(Builder, CI->getArgOperand(3), Rep, PassThru);
3280     } else if (IsX86 && Name.startswith("avx512.mask.") &&
3281                upgradeAVX512MaskToSelect(Name, Builder, *CI, Rep)) {
3282       // Rep will be updated by the call in the condition.
3283     } else if (IsNVVM && (Name == "abs.i" || Name == "abs.ll")) {
3284       Value *Arg = CI->getArgOperand(0);
3285       Value *Neg = Builder.CreateNeg(Arg, "neg");
3286       Value *Cmp = Builder.CreateICmpSGE(
3287           Arg, llvm::Constant::getNullValue(Arg->getType()), "abs.cond");
3288       Rep = Builder.CreateSelect(Cmp, Arg, Neg, "abs");
3289     } else if (IsNVVM && (Name == "max.i" || Name == "max.ll" ||
3290                           Name == "max.ui" || Name == "max.ull")) {
3291       Value *Arg0 = CI->getArgOperand(0);
3292       Value *Arg1 = CI->getArgOperand(1);
3293       Value *Cmp = Name.endswith(".ui") || Name.endswith(".ull")
3294                        ? Builder.CreateICmpUGE(Arg0, Arg1, "max.cond")
3295                        : Builder.CreateICmpSGE(Arg0, Arg1, "max.cond");
3296       Rep = Builder.CreateSelect(Cmp, Arg0, Arg1, "max");
3297     } else if (IsNVVM && (Name == "min.i" || Name == "min.ll" ||
3298                           Name == "min.ui" || Name == "min.ull")) {
3299       Value *Arg0 = CI->getArgOperand(0);
3300       Value *Arg1 = CI->getArgOperand(1);
3301       Value *Cmp = Name.endswith(".ui") || Name.endswith(".ull")
3302                        ? Builder.CreateICmpULE(Arg0, Arg1, "min.cond")
3303                        : Builder.CreateICmpSLE(Arg0, Arg1, "min.cond");
3304       Rep = Builder.CreateSelect(Cmp, Arg0, Arg1, "min");
3305     } else if (IsNVVM && Name == "clz.ll") {
3306       // llvm.nvvm.clz.ll returns an i32, but llvm.ctlz.i64 and returns an i64.
3307       Value *Arg = CI->getArgOperand(0);
3308       Value *Ctlz = Builder.CreateCall(
3309           Intrinsic::getDeclaration(F->getParent(), Intrinsic::ctlz,
3310                                     {Arg->getType()}),
3311           {Arg, Builder.getFalse()}, "ctlz");
3312       Rep = Builder.CreateTrunc(Ctlz, Builder.getInt32Ty(), "ctlz.trunc");
3313     } else if (IsNVVM && Name == "popc.ll") {
3314       // llvm.nvvm.popc.ll returns an i32, but llvm.ctpop.i64 and returns an
3315       // i64.
3316       Value *Arg = CI->getArgOperand(0);
3317       Value *Popc = Builder.CreateCall(
3318           Intrinsic::getDeclaration(F->getParent(), Intrinsic::ctpop,
3319                                     {Arg->getType()}),
3320           Arg, "ctpop");
3321       Rep = Builder.CreateTrunc(Popc, Builder.getInt32Ty(), "ctpop.trunc");
3322     } else if (IsNVVM && Name == "h2f") {
3323       Rep = Builder.CreateCall(Intrinsic::getDeclaration(
3324                                    F->getParent(), Intrinsic::convert_from_fp16,
3325                                    {Builder.getFloatTy()}),
3326                                CI->getArgOperand(0), "h2f");
3327     } else {
3328       llvm_unreachable("Unknown function for CallInst upgrade.");
3329     }
3330 
3331     if (Rep)
3332       CI->replaceAllUsesWith(Rep);
3333     CI->eraseFromParent();
3334     return;
3335   }
3336 
3337   const auto &DefaultCase = [&NewFn, &CI]() -> void {
3338     // Handle generic mangling change, but nothing else
3339     assert(
3340         (CI->getCalledFunction()->getName() != NewFn->getName()) &&
3341         "Unknown function for CallInst upgrade and isn't just a name change");
3342     CI->setCalledFunction(NewFn);
3343   };
3344   CallInst *NewCall = nullptr;
3345   switch (NewFn->getIntrinsicID()) {
3346   default: {
3347     DefaultCase();
3348     return;
3349   }
3350 
3351   case Intrinsic::arm_neon_vld1:
3352   case Intrinsic::arm_neon_vld2:
3353   case Intrinsic::arm_neon_vld3:
3354   case Intrinsic::arm_neon_vld4:
3355   case Intrinsic::arm_neon_vld2lane:
3356   case Intrinsic::arm_neon_vld3lane:
3357   case Intrinsic::arm_neon_vld4lane:
3358   case Intrinsic::arm_neon_vst1:
3359   case Intrinsic::arm_neon_vst2:
3360   case Intrinsic::arm_neon_vst3:
3361   case Intrinsic::arm_neon_vst4:
3362   case Intrinsic::arm_neon_vst2lane:
3363   case Intrinsic::arm_neon_vst3lane:
3364   case Intrinsic::arm_neon_vst4lane: {
3365     SmallVector<Value *, 4> Args(CI->arg_operands().begin(),
3366                                  CI->arg_operands().end());
3367     NewCall = Builder.CreateCall(NewFn, Args);
3368     break;
3369   }
3370 
3371   case Intrinsic::bitreverse:
3372     NewCall = Builder.CreateCall(NewFn, {CI->getArgOperand(0)});
3373     break;
3374 
3375   case Intrinsic::ctlz:
3376   case Intrinsic::cttz:
3377     assert(CI->getNumArgOperands() == 1 &&
3378            "Mismatch between function args and call args");
3379     NewCall =
3380         Builder.CreateCall(NewFn, {CI->getArgOperand(0), Builder.getFalse()});
3381     break;
3382 
3383   case Intrinsic::objectsize: {
3384     Value *NullIsUnknownSize = CI->getNumArgOperands() == 2
3385                                    ? Builder.getFalse()
3386                                    : CI->getArgOperand(2);
3387     NewCall = Builder.CreateCall(
3388         NewFn, {CI->getArgOperand(0), CI->getArgOperand(1), NullIsUnknownSize});
3389     break;
3390   }
3391 
3392   case Intrinsic::ctpop:
3393     NewCall = Builder.CreateCall(NewFn, {CI->getArgOperand(0)});
3394     break;
3395 
3396   case Intrinsic::convert_from_fp16:
3397     NewCall = Builder.CreateCall(NewFn, {CI->getArgOperand(0)});
3398     break;
3399 
3400   case Intrinsic::dbg_value:
3401     // Upgrade from the old version that had an extra offset argument.
3402     assert(CI->getNumArgOperands() == 4);
3403     // Drop nonzero offsets instead of attempting to upgrade them.
3404     if (auto *Offset = dyn_cast_or_null<Constant>(CI->getArgOperand(1)))
3405       if (Offset->isZeroValue()) {
3406         NewCall = Builder.CreateCall(
3407             NewFn,
3408             {CI->getArgOperand(0), CI->getArgOperand(2), CI->getArgOperand(3)});
3409         break;
3410       }
3411     CI->eraseFromParent();
3412     return;
3413 
3414   case Intrinsic::x86_xop_vfrcz_ss:
3415   case Intrinsic::x86_xop_vfrcz_sd:
3416     NewCall = Builder.CreateCall(NewFn, {CI->getArgOperand(1)});
3417     break;
3418 
3419   case Intrinsic::x86_xop_vpermil2pd:
3420   case Intrinsic::x86_xop_vpermil2ps:
3421   case Intrinsic::x86_xop_vpermil2pd_256:
3422   case Intrinsic::x86_xop_vpermil2ps_256: {
3423     SmallVector<Value *, 4> Args(CI->arg_operands().begin(),
3424                                  CI->arg_operands().end());
3425     VectorType *FltIdxTy = cast<VectorType>(Args[2]->getType());
3426     VectorType *IntIdxTy = VectorType::getInteger(FltIdxTy);
3427     Args[2] = Builder.CreateBitCast(Args[2], IntIdxTy);
3428     NewCall = Builder.CreateCall(NewFn, Args);
3429     break;
3430   }
3431 
3432   case Intrinsic::x86_sse41_ptestc:
3433   case Intrinsic::x86_sse41_ptestz:
3434   case Intrinsic::x86_sse41_ptestnzc: {
3435     // The arguments for these intrinsics used to be v4f32, and changed
3436     // to v2i64. This is purely a nop, since those are bitwise intrinsics.
3437     // So, the only thing required is a bitcast for both arguments.
3438     // First, check the arguments have the old type.
3439     Value *Arg0 = CI->getArgOperand(0);
3440     if (Arg0->getType() != VectorType::get(Type::getFloatTy(C), 4))
3441       return;
3442 
3443     // Old intrinsic, add bitcasts
3444     Value *Arg1 = CI->getArgOperand(1);
3445 
3446     Type *NewVecTy = VectorType::get(Type::getInt64Ty(C), 2);
3447 
3448     Value *BC0 = Builder.CreateBitCast(Arg0, NewVecTy, "cast");
3449     Value *BC1 = Builder.CreateBitCast(Arg1, NewVecTy, "cast");
3450 
3451     NewCall = Builder.CreateCall(NewFn, {BC0, BC1});
3452     break;
3453   }
3454 
3455   case Intrinsic::x86_rdtscp: {
3456     // This used to take 1 arguments. If we have no arguments, it is already
3457     // upgraded.
3458     if (CI->getNumOperands() == 0)
3459       return;
3460 
3461     NewCall = Builder.CreateCall(NewFn);
3462     // Extract the second result and store it.
3463     Value *Data = Builder.CreateExtractValue(NewCall, 1);
3464     // Cast the pointer to the right type.
3465     Value *Ptr = Builder.CreateBitCast(CI->getArgOperand(0),
3466                                  llvm::PointerType::getUnqual(Data->getType()));
3467     Builder.CreateAlignedStore(Data, Ptr, 1);
3468     // Replace the original call result with the first result of the new call.
3469     Value *TSC = Builder.CreateExtractValue(NewCall, 0);
3470 
3471     std::string Name = CI->getName();
3472     if (!Name.empty()) {
3473       CI->setName(Name + ".old");
3474       NewCall->setName(Name);
3475     }
3476     CI->replaceAllUsesWith(TSC);
3477     CI->eraseFromParent();
3478     return;
3479   }
3480 
3481   case Intrinsic::x86_addcarryx_u32:
3482   case Intrinsic::x86_addcarryx_u64:
3483   case Intrinsic::x86_addcarry_u32:
3484   case Intrinsic::x86_addcarry_u64:
3485   case Intrinsic::x86_subborrow_u32:
3486   case Intrinsic::x86_subborrow_u64: {
3487     // This used to take 4 arguments. If we only have 3 arguments its already
3488     // upgraded.
3489     if (CI->getNumOperands() == 3)
3490       return;
3491 
3492     // Make a call with 3 operands.
3493     NewCall = Builder.CreateCall(NewFn, { CI->getArgOperand(0),
3494                                           CI->getArgOperand(1),
3495                                           CI->getArgOperand(2)});
3496     // Extract the second result and store it.
3497     Value *Data = Builder.CreateExtractValue(NewCall, 1);
3498     // Cast the pointer to the right type.
3499     Value *Ptr = Builder.CreateBitCast(CI->getArgOperand(3),
3500                                  llvm::PointerType::getUnqual(Data->getType()));
3501     Builder.CreateAlignedStore(Data, Ptr, 1);
3502     // Replace the original call result with the first result of the new call.
3503     Value *CF = Builder.CreateExtractValue(NewCall, 0);
3504 
3505     std::string Name = CI->getName();
3506     if (!Name.empty()) {
3507       CI->setName(Name + ".old");
3508       NewCall->setName(Name);
3509     }
3510     CI->replaceAllUsesWith(CF);
3511     CI->eraseFromParent();
3512     return;
3513   }
3514 
3515   case Intrinsic::x86_sse41_insertps:
3516   case Intrinsic::x86_sse41_dppd:
3517   case Intrinsic::x86_sse41_dpps:
3518   case Intrinsic::x86_sse41_mpsadbw:
3519   case Intrinsic::x86_avx_dp_ps_256:
3520   case Intrinsic::x86_avx2_mpsadbw: {
3521     // Need to truncate the last argument from i32 to i8 -- this argument models
3522     // an inherently 8-bit immediate operand to these x86 instructions.
3523     SmallVector<Value *, 4> Args(CI->arg_operands().begin(),
3524                                  CI->arg_operands().end());
3525 
3526     // Replace the last argument with a trunc.
3527     Args.back() = Builder.CreateTrunc(Args.back(), Type::getInt8Ty(C), "trunc");
3528     NewCall = Builder.CreateCall(NewFn, Args);
3529     break;
3530   }
3531 
3532   case Intrinsic::thread_pointer: {
3533     NewCall = Builder.CreateCall(NewFn, {});
3534     break;
3535   }
3536 
3537   case Intrinsic::invariant_start:
3538   case Intrinsic::invariant_end:
3539   case Intrinsic::masked_load:
3540   case Intrinsic::masked_store:
3541   case Intrinsic::masked_gather:
3542   case Intrinsic::masked_scatter: {
3543     SmallVector<Value *, 4> Args(CI->arg_operands().begin(),
3544                                  CI->arg_operands().end());
3545     NewCall = Builder.CreateCall(NewFn, Args);
3546     break;
3547   }
3548 
3549   case Intrinsic::memcpy:
3550   case Intrinsic::memmove:
3551   case Intrinsic::memset: {
3552     // We have to make sure that the call signature is what we're expecting.
3553     // We only want to change the old signatures by removing the alignment arg:
3554     //  @llvm.mem[cpy|move]...(i8*, i8*, i[32|i64], i32, i1)
3555     //    -> @llvm.mem[cpy|move]...(i8*, i8*, i[32|i64], i1)
3556     //  @llvm.memset...(i8*, i8, i[32|64], i32, i1)
3557     //    -> @llvm.memset...(i8*, i8, i[32|64], i1)
3558     // Note: i8*'s in the above can be any pointer type
3559     if (CI->getNumArgOperands() != 5) {
3560       DefaultCase();
3561       return;
3562     }
3563     // Remove alignment argument (3), and add alignment attributes to the
3564     // dest/src pointers.
3565     Value *Args[4] = {CI->getArgOperand(0), CI->getArgOperand(1),
3566                       CI->getArgOperand(2), CI->getArgOperand(4)};
3567     NewCall = Builder.CreateCall(NewFn, Args);
3568     auto *MemCI = cast<MemIntrinsic>(NewCall);
3569     // All mem intrinsics support dest alignment.
3570     const ConstantInt *Align = cast<ConstantInt>(CI->getArgOperand(3));
3571     MemCI->setDestAlignment(Align->getZExtValue());
3572     // Memcpy/Memmove also support source alignment.
3573     if (auto *MTI = dyn_cast<MemTransferInst>(MemCI))
3574       MTI->setSourceAlignment(Align->getZExtValue());
3575     break;
3576   }
3577   }
3578   assert(NewCall && "Should have either set this variable or returned through "
3579                     "the default case");
3580   std::string Name = CI->getName();
3581   if (!Name.empty()) {
3582     CI->setName(Name + ".old");
3583     NewCall->setName(Name);
3584   }
3585   CI->replaceAllUsesWith(NewCall);
3586   CI->eraseFromParent();
3587 }
3588 
3589 void llvm::UpgradeCallsToIntrinsic(Function *F) {
3590   assert(F && "Illegal attempt to upgrade a non-existent intrinsic.");
3591 
3592   // Check if this function should be upgraded and get the replacement function
3593   // if there is one.
3594   Function *NewFn;
3595   if (UpgradeIntrinsicFunction(F, NewFn)) {
3596     // Replace all users of the old function with the new function or new
3597     // instructions. This is not a range loop because the call is deleted.
3598     for (auto UI = F->user_begin(), UE = F->user_end(); UI != UE; )
3599       if (CallInst *CI = dyn_cast<CallInst>(*UI++))
3600         UpgradeIntrinsicCall(CI, NewFn);
3601 
3602     // Remove old function, no longer used, from the module.
3603     F->eraseFromParent();
3604   }
3605 }
3606 
3607 MDNode *llvm::UpgradeTBAANode(MDNode &MD) {
3608   // Check if the tag uses struct-path aware TBAA format.
3609   if (isa<MDNode>(MD.getOperand(0)) && MD.getNumOperands() >= 3)
3610     return &MD;
3611 
3612   auto &Context = MD.getContext();
3613   if (MD.getNumOperands() == 3) {
3614     Metadata *Elts[] = {MD.getOperand(0), MD.getOperand(1)};
3615     MDNode *ScalarType = MDNode::get(Context, Elts);
3616     // Create a MDNode <ScalarType, ScalarType, offset 0, const>
3617     Metadata *Elts2[] = {ScalarType, ScalarType,
3618                          ConstantAsMetadata::get(
3619                              Constant::getNullValue(Type::getInt64Ty(Context))),
3620                          MD.getOperand(2)};
3621     return MDNode::get(Context, Elts2);
3622   }
3623   // Create a MDNode <MD, MD, offset 0>
3624   Metadata *Elts[] = {&MD, &MD, ConstantAsMetadata::get(Constant::getNullValue(
3625                                     Type::getInt64Ty(Context)))};
3626   return MDNode::get(Context, Elts);
3627 }
3628 
3629 Instruction *llvm::UpgradeBitCastInst(unsigned Opc, Value *V, Type *DestTy,
3630                                       Instruction *&Temp) {
3631   if (Opc != Instruction::BitCast)
3632     return nullptr;
3633 
3634   Temp = nullptr;
3635   Type *SrcTy = V->getType();
3636   if (SrcTy->isPtrOrPtrVectorTy() && DestTy->isPtrOrPtrVectorTy() &&
3637       SrcTy->getPointerAddressSpace() != DestTy->getPointerAddressSpace()) {
3638     LLVMContext &Context = V->getContext();
3639 
3640     // We have no information about target data layout, so we assume that
3641     // the maximum pointer size is 64bit.
3642     Type *MidTy = Type::getInt64Ty(Context);
3643     Temp = CastInst::Create(Instruction::PtrToInt, V, MidTy);
3644 
3645     return CastInst::Create(Instruction::IntToPtr, Temp, DestTy);
3646   }
3647 
3648   return nullptr;
3649 }
3650 
3651 Value *llvm::UpgradeBitCastExpr(unsigned Opc, Constant *C, Type *DestTy) {
3652   if (Opc != Instruction::BitCast)
3653     return nullptr;
3654 
3655   Type *SrcTy = C->getType();
3656   if (SrcTy->isPtrOrPtrVectorTy() && DestTy->isPtrOrPtrVectorTy() &&
3657       SrcTy->getPointerAddressSpace() != DestTy->getPointerAddressSpace()) {
3658     LLVMContext &Context = C->getContext();
3659 
3660     // We have no information about target data layout, so we assume that
3661     // the maximum pointer size is 64bit.
3662     Type *MidTy = Type::getInt64Ty(Context);
3663 
3664     return ConstantExpr::getIntToPtr(ConstantExpr::getPtrToInt(C, MidTy),
3665                                      DestTy);
3666   }
3667 
3668   return nullptr;
3669 }
3670 
3671 /// Check the debug info version number, if it is out-dated, drop the debug
3672 /// info. Return true if module is modified.
3673 bool llvm::UpgradeDebugInfo(Module &M) {
3674   unsigned Version = getDebugMetadataVersionFromModule(M);
3675   if (Version == DEBUG_METADATA_VERSION) {
3676     bool BrokenDebugInfo = false;
3677     if (verifyModule(M, &llvm::errs(), &BrokenDebugInfo))
3678       report_fatal_error("Broken module found, compilation aborted!");
3679     if (!BrokenDebugInfo)
3680       // Everything is ok.
3681       return false;
3682     else {
3683       // Diagnose malformed debug info.
3684       DiagnosticInfoIgnoringInvalidDebugMetadata Diag(M);
3685       M.getContext().diagnose(Diag);
3686     }
3687   }
3688   bool Modified = StripDebugInfo(M);
3689   if (Modified && Version != DEBUG_METADATA_VERSION) {
3690     // Diagnose a version mismatch.
3691     DiagnosticInfoDebugMetadataVersion DiagVersion(M, Version);
3692     M.getContext().diagnose(DiagVersion);
3693   }
3694   return Modified;
3695 }
3696 
3697 bool llvm::UpgradeRetainReleaseMarker(Module &M) {
3698   bool Changed = false;
3699   NamedMDNode *ModRetainReleaseMarker =
3700       M.getNamedMetadata("clang.arc.retainAutoreleasedReturnValueMarker");
3701   if (ModRetainReleaseMarker) {
3702     MDNode *Op = ModRetainReleaseMarker->getOperand(0);
3703     if (Op) {
3704       MDString *ID = dyn_cast_or_null<MDString>(Op->getOperand(0));
3705       if (ID) {
3706         SmallVector<StringRef, 4> ValueComp;
3707         ID->getString().split(ValueComp, "#");
3708         if (ValueComp.size() == 2) {
3709           std::string NewValue = ValueComp[0].str() + ";" + ValueComp[1].str();
3710           Metadata *Ops[1] = {MDString::get(M.getContext(), NewValue)};
3711           ModRetainReleaseMarker->setOperand(0,
3712                                              MDNode::get(M.getContext(), Ops));
3713           Changed = true;
3714         }
3715       }
3716     }
3717   }
3718   return Changed;
3719 }
3720 
3721 bool llvm::UpgradeModuleFlags(Module &M) {
3722   NamedMDNode *ModFlags = M.getModuleFlagsMetadata();
3723   if (!ModFlags)
3724     return false;
3725 
3726   bool HasObjCFlag = false, HasClassProperties = false, Changed = false;
3727   for (unsigned I = 0, E = ModFlags->getNumOperands(); I != E; ++I) {
3728     MDNode *Op = ModFlags->getOperand(I);
3729     if (Op->getNumOperands() != 3)
3730       continue;
3731     MDString *ID = dyn_cast_or_null<MDString>(Op->getOperand(1));
3732     if (!ID)
3733       continue;
3734     if (ID->getString() == "Objective-C Image Info Version")
3735       HasObjCFlag = true;
3736     if (ID->getString() == "Objective-C Class Properties")
3737       HasClassProperties = true;
3738     // Upgrade PIC/PIE Module Flags. The module flag behavior for these two
3739     // field was Error and now they are Max.
3740     if (ID->getString() == "PIC Level" || ID->getString() == "PIE Level") {
3741       if (auto *Behavior =
3742               mdconst::dyn_extract_or_null<ConstantInt>(Op->getOperand(0))) {
3743         if (Behavior->getLimitedValue() == Module::Error) {
3744           Type *Int32Ty = Type::getInt32Ty(M.getContext());
3745           Metadata *Ops[3] = {
3746               ConstantAsMetadata::get(ConstantInt::get(Int32Ty, Module::Max)),
3747               MDString::get(M.getContext(), ID->getString()),
3748               Op->getOperand(2)};
3749           ModFlags->setOperand(I, MDNode::get(M.getContext(), Ops));
3750           Changed = true;
3751         }
3752       }
3753     }
3754     // Upgrade Objective-C Image Info Section. Removed the whitespce in the
3755     // section name so that llvm-lto will not complain about mismatching
3756     // module flags that is functionally the same.
3757     if (ID->getString() == "Objective-C Image Info Section") {
3758       if (auto *Value = dyn_cast_or_null<MDString>(Op->getOperand(2))) {
3759         SmallVector<StringRef, 4> ValueComp;
3760         Value->getString().split(ValueComp, " ");
3761         if (ValueComp.size() != 1) {
3762           std::string NewValue;
3763           for (auto &S : ValueComp)
3764             NewValue += S.str();
3765           Metadata *Ops[3] = {Op->getOperand(0), Op->getOperand(1),
3766                               MDString::get(M.getContext(), NewValue)};
3767           ModFlags->setOperand(I, MDNode::get(M.getContext(), Ops));
3768           Changed = true;
3769         }
3770       }
3771     }
3772   }
3773 
3774   // "Objective-C Class Properties" is recently added for Objective-C. We
3775   // upgrade ObjC bitcodes to contain a "Objective-C Class Properties" module
3776   // flag of value 0, so we can correclty downgrade this flag when trying to
3777   // link an ObjC bitcode without this module flag with an ObjC bitcode with
3778   // this module flag.
3779   if (HasObjCFlag && !HasClassProperties) {
3780     M.addModuleFlag(llvm::Module::Override, "Objective-C Class Properties",
3781                     (uint32_t)0);
3782     Changed = true;
3783   }
3784 
3785   return Changed;
3786 }
3787 
3788 void llvm::UpgradeSectionAttributes(Module &M) {
3789   auto TrimSpaces = [](StringRef Section) -> std::string {
3790     SmallVector<StringRef, 5> Components;
3791     Section.split(Components, ',');
3792 
3793     SmallString<32> Buffer;
3794     raw_svector_ostream OS(Buffer);
3795 
3796     for (auto Component : Components)
3797       OS << ',' << Component.trim();
3798 
3799     return OS.str().substr(1);
3800   };
3801 
3802   for (auto &GV : M.globals()) {
3803     if (!GV.hasSection())
3804       continue;
3805 
3806     StringRef Section = GV.getSection();
3807 
3808     if (!Section.startswith("__DATA, __objc_catlist"))
3809       continue;
3810 
3811     // __DATA, __objc_catlist, regular, no_dead_strip
3812     // __DATA,__objc_catlist,regular,no_dead_strip
3813     GV.setSection(TrimSpaces(Section));
3814   }
3815 }
3816 
3817 static bool isOldLoopArgument(Metadata *MD) {
3818   auto *T = dyn_cast_or_null<MDTuple>(MD);
3819   if (!T)
3820     return false;
3821   if (T->getNumOperands() < 1)
3822     return false;
3823   auto *S = dyn_cast_or_null<MDString>(T->getOperand(0));
3824   if (!S)
3825     return false;
3826   return S->getString().startswith("llvm.vectorizer.");
3827 }
3828 
3829 static MDString *upgradeLoopTag(LLVMContext &C, StringRef OldTag) {
3830   StringRef OldPrefix = "llvm.vectorizer.";
3831   assert(OldTag.startswith(OldPrefix) && "Expected old prefix");
3832 
3833   if (OldTag == "llvm.vectorizer.unroll")
3834     return MDString::get(C, "llvm.loop.interleave.count");
3835 
3836   return MDString::get(
3837       C, (Twine("llvm.loop.vectorize.") + OldTag.drop_front(OldPrefix.size()))
3838              .str());
3839 }
3840 
3841 static Metadata *upgradeLoopArgument(Metadata *MD) {
3842   auto *T = dyn_cast_or_null<MDTuple>(MD);
3843   if (!T)
3844     return MD;
3845   if (T->getNumOperands() < 1)
3846     return MD;
3847   auto *OldTag = dyn_cast_or_null<MDString>(T->getOperand(0));
3848   if (!OldTag)
3849     return MD;
3850   if (!OldTag->getString().startswith("llvm.vectorizer."))
3851     return MD;
3852 
3853   // This has an old tag.  Upgrade it.
3854   SmallVector<Metadata *, 8> Ops;
3855   Ops.reserve(T->getNumOperands());
3856   Ops.push_back(upgradeLoopTag(T->getContext(), OldTag->getString()));
3857   for (unsigned I = 1, E = T->getNumOperands(); I != E; ++I)
3858     Ops.push_back(T->getOperand(I));
3859 
3860   return MDTuple::get(T->getContext(), Ops);
3861 }
3862 
3863 MDNode *llvm::upgradeInstructionLoopAttachment(MDNode &N) {
3864   auto *T = dyn_cast<MDTuple>(&N);
3865   if (!T)
3866     return &N;
3867 
3868   if (none_of(T->operands(), isOldLoopArgument))
3869     return &N;
3870 
3871   SmallVector<Metadata *, 8> Ops;
3872   Ops.reserve(T->getNumOperands());
3873   for (Metadata *MD : T->operands())
3874     Ops.push_back(upgradeLoopArgument(MD));
3875 
3876   return MDTuple::get(T->getContext(), Ops);
3877 }
3878