1 //===-- AutoUpgrade.cpp - Implement auto-upgrade helper functions ---------===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 // This file implements the auto-upgrade helper functions.
10 // This is where deprecated IR intrinsics and other IR features are updated to
11 // current specifications.
12 //
13 //===----------------------------------------------------------------------===//
14 
15 #include "llvm/IR/AutoUpgrade.h"
16 #include "llvm/ADT/StringSwitch.h"
17 #include "llvm/IR/Constants.h"
18 #include "llvm/IR/DIBuilder.h"
19 #include "llvm/IR/DebugInfo.h"
20 #include "llvm/IR/DiagnosticInfo.h"
21 #include "llvm/IR/Function.h"
22 #include "llvm/IR/IRBuilder.h"
23 #include "llvm/IR/Instruction.h"
24 #include "llvm/IR/IntrinsicInst.h"
25 #include "llvm/IR/LLVMContext.h"
26 #include "llvm/IR/Module.h"
27 #include "llvm/IR/Verifier.h"
28 #include "llvm/Support/ErrorHandling.h"
29 #include "llvm/Support/Regex.h"
30 #include <cstring>
31 using namespace llvm;
32 
33 static void rename(GlobalValue *GV) { GV->setName(GV->getName() + ".old"); }
34 
35 // Upgrade the declarations of the SSE4.1 ptest intrinsics whose arguments have
36 // changed their type from v4f32 to v2i64.
37 static bool UpgradePTESTIntrinsic(Function* F, Intrinsic::ID IID,
38                                   Function *&NewFn) {
39   // Check whether this is an old version of the function, which received
40   // v4f32 arguments.
41   Type *Arg0Type = F->getFunctionType()->getParamType(0);
42   if (Arg0Type != VectorType::get(Type::getFloatTy(F->getContext()), 4))
43     return false;
44 
45   // Yes, it's old, replace it with new version.
46   rename(F);
47   NewFn = Intrinsic::getDeclaration(F->getParent(), IID);
48   return true;
49 }
50 
51 // Upgrade the declarations of intrinsic functions whose 8-bit immediate mask
52 // arguments have changed their type from i32 to i8.
53 static bool UpgradeX86IntrinsicsWith8BitMask(Function *F, Intrinsic::ID IID,
54                                              Function *&NewFn) {
55   // Check that the last argument is an i32.
56   Type *LastArgType = F->getFunctionType()->getParamType(
57      F->getFunctionType()->getNumParams() - 1);
58   if (!LastArgType->isIntegerTy(32))
59     return false;
60 
61   // Move this function aside and map down.
62   rename(F);
63   NewFn = Intrinsic::getDeclaration(F->getParent(), IID);
64   return true;
65 }
66 
67 static bool ShouldUpgradeX86Intrinsic(Function *F, StringRef Name) {
68   // All of the intrinsics matches below should be marked with which llvm
69   // version started autoupgrading them. At some point in the future we would
70   // like to use this information to remove upgrade code for some older
71   // intrinsics. It is currently undecided how we will determine that future
72   // point.
73   if (Name == "addcarryx.u32" || // Added in 8.0
74       Name == "addcarryx.u64" || // Added in 8.0
75       Name == "addcarry.u32" || // Added in 8.0
76       Name == "addcarry.u64" || // Added in 8.0
77       Name == "subborrow.u32" || // Added in 8.0
78       Name == "subborrow.u64" || // Added in 8.0
79       Name.startswith("sse2.padds.") || // Added in 8.0
80       Name.startswith("sse2.psubs.") || // Added in 8.0
81       Name.startswith("sse2.paddus.") || // Added in 8.0
82       Name.startswith("sse2.psubus.") || // Added in 8.0
83       Name.startswith("avx2.padds.") || // Added in 8.0
84       Name.startswith("avx2.psubs.") || // Added in 8.0
85       Name.startswith("avx2.paddus.") || // Added in 8.0
86       Name.startswith("avx2.psubus.") || // Added in 8.0
87       Name.startswith("avx512.padds.") || // Added in 8.0
88       Name.startswith("avx512.psubs.") || // Added in 8.0
89       Name.startswith("avx512.mask.padds.") || // Added in 8.0
90       Name.startswith("avx512.mask.psubs.") || // Added in 8.0
91       Name.startswith("avx512.mask.paddus.") || // Added in 8.0
92       Name.startswith("avx512.mask.psubus.") || // Added in 8.0
93       Name=="ssse3.pabs.b.128" || // Added in 6.0
94       Name=="ssse3.pabs.w.128" || // Added in 6.0
95       Name=="ssse3.pabs.d.128" || // Added in 6.0
96       Name.startswith("fma4.vfmadd.s") || // Added in 7.0
97       Name.startswith("fma.vfmadd.") || // Added in 7.0
98       Name.startswith("fma.vfmsub.") || // Added in 7.0
99       Name.startswith("fma.vfmaddsub.") || // Added in 7.0
100       Name.startswith("fma.vfmsubadd.") || // Added in 7.0
101       Name.startswith("fma.vfnmadd.") || // Added in 7.0
102       Name.startswith("fma.vfnmsub.") || // Added in 7.0
103       Name.startswith("avx512.mask.vfmadd.") || // Added in 7.0
104       Name.startswith("avx512.mask.vfnmadd.") || // Added in 7.0
105       Name.startswith("avx512.mask.vfnmsub.") || // Added in 7.0
106       Name.startswith("avx512.mask3.vfmadd.") || // Added in 7.0
107       Name.startswith("avx512.maskz.vfmadd.") || // Added in 7.0
108       Name.startswith("avx512.mask3.vfmsub.") || // Added in 7.0
109       Name.startswith("avx512.mask3.vfnmsub.") || // Added in 7.0
110       Name.startswith("avx512.mask.vfmaddsub.") || // Added in 7.0
111       Name.startswith("avx512.maskz.vfmaddsub.") || // Added in 7.0
112       Name.startswith("avx512.mask3.vfmaddsub.") || // Added in 7.0
113       Name.startswith("avx512.mask3.vfmsubadd.") || // Added in 7.0
114       Name.startswith("avx512.mask.shuf.i") || // Added in 6.0
115       Name.startswith("avx512.mask.shuf.f") || // Added in 6.0
116       Name.startswith("avx512.kunpck") || //added in 6.0
117       Name.startswith("avx2.pabs.") || // Added in 6.0
118       Name.startswith("avx512.mask.pabs.") || // Added in 6.0
119       Name.startswith("avx512.broadcastm") || // Added in 6.0
120       Name == "sse.sqrt.ss" || // Added in 7.0
121       Name == "sse2.sqrt.sd" || // Added in 7.0
122       Name.startswith("avx512.mask.sqrt.p") || // Added in 7.0
123       Name.startswith("avx.sqrt.p") || // Added in 7.0
124       Name.startswith("sse2.sqrt.p") || // Added in 7.0
125       Name.startswith("sse.sqrt.p") || // Added in 7.0
126       Name.startswith("avx512.mask.pbroadcast") || // Added in 6.0
127       Name.startswith("sse2.pcmpeq.") || // Added in 3.1
128       Name.startswith("sse2.pcmpgt.") || // Added in 3.1
129       Name.startswith("avx2.pcmpeq.") || // Added in 3.1
130       Name.startswith("avx2.pcmpgt.") || // Added in 3.1
131       Name.startswith("avx512.mask.pcmpeq.") || // Added in 3.9
132       Name.startswith("avx512.mask.pcmpgt.") || // Added in 3.9
133       Name.startswith("avx.vperm2f128.") || // Added in 6.0
134       Name == "avx2.vperm2i128" || // Added in 6.0
135       Name == "sse.add.ss" || // Added in 4.0
136       Name == "sse2.add.sd" || // Added in 4.0
137       Name == "sse.sub.ss" || // Added in 4.0
138       Name == "sse2.sub.sd" || // Added in 4.0
139       Name == "sse.mul.ss" || // Added in 4.0
140       Name == "sse2.mul.sd" || // Added in 4.0
141       Name == "sse.div.ss" || // Added in 4.0
142       Name == "sse2.div.sd" || // Added in 4.0
143       Name == "sse41.pmaxsb" || // Added in 3.9
144       Name == "sse2.pmaxs.w" || // Added in 3.9
145       Name == "sse41.pmaxsd" || // Added in 3.9
146       Name == "sse2.pmaxu.b" || // Added in 3.9
147       Name == "sse41.pmaxuw" || // Added in 3.9
148       Name == "sse41.pmaxud" || // Added in 3.9
149       Name == "sse41.pminsb" || // Added in 3.9
150       Name == "sse2.pmins.w" || // Added in 3.9
151       Name == "sse41.pminsd" || // Added in 3.9
152       Name == "sse2.pminu.b" || // Added in 3.9
153       Name == "sse41.pminuw" || // Added in 3.9
154       Name == "sse41.pminud" || // Added in 3.9
155       Name == "avx512.kand.w" || // Added in 7.0
156       Name == "avx512.kandn.w" || // Added in 7.0
157       Name == "avx512.knot.w" || // Added in 7.0
158       Name == "avx512.kor.w" || // Added in 7.0
159       Name == "avx512.kxor.w" || // Added in 7.0
160       Name == "avx512.kxnor.w" || // Added in 7.0
161       Name == "avx512.kortestc.w" || // Added in 7.0
162       Name == "avx512.kortestz.w" || // Added in 7.0
163       Name.startswith("avx512.mask.pshuf.b.") || // Added in 4.0
164       Name.startswith("avx2.pmax") || // Added in 3.9
165       Name.startswith("avx2.pmin") || // Added in 3.9
166       Name.startswith("avx512.mask.pmax") || // Added in 4.0
167       Name.startswith("avx512.mask.pmin") || // Added in 4.0
168       Name.startswith("avx2.vbroadcast") || // Added in 3.8
169       Name.startswith("avx2.pbroadcast") || // Added in 3.8
170       Name.startswith("avx.vpermil.") || // Added in 3.1
171       Name.startswith("sse2.pshuf") || // Added in 3.9
172       Name.startswith("avx512.pbroadcast") || // Added in 3.9
173       Name.startswith("avx512.mask.broadcast.s") || // Added in 3.9
174       Name.startswith("avx512.mask.movddup") || // Added in 3.9
175       Name.startswith("avx512.mask.movshdup") || // Added in 3.9
176       Name.startswith("avx512.mask.movsldup") || // Added in 3.9
177       Name.startswith("avx512.mask.pshuf.d.") || // Added in 3.9
178       Name.startswith("avx512.mask.pshufl.w.") || // Added in 3.9
179       Name.startswith("avx512.mask.pshufh.w.") || // Added in 3.9
180       Name.startswith("avx512.mask.shuf.p") || // Added in 4.0
181       Name.startswith("avx512.mask.vpermil.p") || // Added in 3.9
182       Name.startswith("avx512.mask.perm.df.") || // Added in 3.9
183       Name.startswith("avx512.mask.perm.di.") || // Added in 3.9
184       Name.startswith("avx512.mask.punpckl") || // Added in 3.9
185       Name.startswith("avx512.mask.punpckh") || // Added in 3.9
186       Name.startswith("avx512.mask.unpckl.") || // Added in 3.9
187       Name.startswith("avx512.mask.unpckh.") || // Added in 3.9
188       Name.startswith("avx512.mask.pand.") || // Added in 3.9
189       Name.startswith("avx512.mask.pandn.") || // Added in 3.9
190       Name.startswith("avx512.mask.por.") || // Added in 3.9
191       Name.startswith("avx512.mask.pxor.") || // Added in 3.9
192       Name.startswith("avx512.mask.and.") || // Added in 3.9
193       Name.startswith("avx512.mask.andn.") || // Added in 3.9
194       Name.startswith("avx512.mask.or.") || // Added in 3.9
195       Name.startswith("avx512.mask.xor.") || // Added in 3.9
196       Name.startswith("avx512.mask.padd.") || // Added in 4.0
197       Name.startswith("avx512.mask.psub.") || // Added in 4.0
198       Name.startswith("avx512.mask.pmull.") || // Added in 4.0
199       Name.startswith("avx512.mask.cvtdq2pd.") || // Added in 4.0
200       Name.startswith("avx512.mask.cvtudq2pd.") || // Added in 4.0
201       Name.startswith("avx512.mask.cvtudq2ps.") || // Added in 7.0 updated 9.0
202       Name.startswith("avx512.mask.cvtqq2pd.") || // Added in 7.0 updated 9.0
203       Name.startswith("avx512.mask.cvtuqq2pd.") || // Added in 7.0 updated 9.0
204       Name.startswith("avx512.mask.cvtdq2ps.") || // Added in 7.0 updated 9.0
205       Name == "avx512.mask.cvtqq2ps.256" || // Added in 9.0
206       Name == "avx512.mask.cvtqq2ps.512" || // Added in 9.0
207       Name == "avx512.mask.cvtuqq2ps.256" || // Added in 9.0
208       Name == "avx512.mask.cvtuqq2ps.512" || // Added in 9.0
209       Name == "avx512.mask.cvtpd2dq.256" || // Added in 7.0
210       Name == "avx512.mask.cvtpd2ps.256" || // Added in 7.0
211       Name == "avx512.mask.cvttpd2dq.256" || // Added in 7.0
212       Name == "avx512.mask.cvttps2dq.128" || // Added in 7.0
213       Name == "avx512.mask.cvttps2dq.256" || // Added in 7.0
214       Name == "avx512.mask.cvtps2pd.128" || // Added in 7.0
215       Name == "avx512.mask.cvtps2pd.256" || // Added in 7.0
216       Name == "avx512.cvtusi2sd" || // Added in 7.0
217       Name.startswith("avx512.mask.permvar.") || // Added in 7.0
218       Name.startswith("avx512.mask.permvar.") || // Added in 7.0
219       Name == "sse2.pmulu.dq" || // Added in 7.0
220       Name == "sse41.pmuldq" || // Added in 7.0
221       Name == "avx2.pmulu.dq" || // Added in 7.0
222       Name == "avx2.pmul.dq" || // Added in 7.0
223       Name == "avx512.pmulu.dq.512" || // Added in 7.0
224       Name == "avx512.pmul.dq.512" || // Added in 7.0
225       Name.startswith("avx512.mask.pmul.dq.") || // Added in 4.0
226       Name.startswith("avx512.mask.pmulu.dq.") || // Added in 4.0
227       Name.startswith("avx512.mask.pmul.hr.sw.") || // Added in 7.0
228       Name.startswith("avx512.mask.pmulh.w.") || // Added in 7.0
229       Name.startswith("avx512.mask.pmulhu.w.") || // Added in 7.0
230       Name.startswith("avx512.mask.pmaddw.d.") || // Added in 7.0
231       Name.startswith("avx512.mask.pmaddubs.w.") || // Added in 7.0
232       Name.startswith("avx512.mask.packsswb.") || // Added in 5.0
233       Name.startswith("avx512.mask.packssdw.") || // Added in 5.0
234       Name.startswith("avx512.mask.packuswb.") || // Added in 5.0
235       Name.startswith("avx512.mask.packusdw.") || // Added in 5.0
236       Name.startswith("avx512.mask.cmp.b") || // Added in 5.0
237       Name.startswith("avx512.mask.cmp.d") || // Added in 5.0
238       Name.startswith("avx512.mask.cmp.q") || // Added in 5.0
239       Name.startswith("avx512.mask.cmp.w") || // Added in 5.0
240       Name.startswith("avx512.mask.cmp.p") || // Added in 7.0
241       Name.startswith("avx512.mask.ucmp.") || // Added in 5.0
242       Name.startswith("avx512.cvtb2mask.") || // Added in 7.0
243       Name.startswith("avx512.cvtw2mask.") || // Added in 7.0
244       Name.startswith("avx512.cvtd2mask.") || // Added in 7.0
245       Name.startswith("avx512.cvtq2mask.") || // Added in 7.0
246       Name.startswith("avx512.mask.vpermilvar.") || // Added in 4.0
247       Name.startswith("avx512.mask.psll.d") || // Added in 4.0
248       Name.startswith("avx512.mask.psll.q") || // Added in 4.0
249       Name.startswith("avx512.mask.psll.w") || // Added in 4.0
250       Name.startswith("avx512.mask.psra.d") || // Added in 4.0
251       Name.startswith("avx512.mask.psra.q") || // Added in 4.0
252       Name.startswith("avx512.mask.psra.w") || // Added in 4.0
253       Name.startswith("avx512.mask.psrl.d") || // Added in 4.0
254       Name.startswith("avx512.mask.psrl.q") || // Added in 4.0
255       Name.startswith("avx512.mask.psrl.w") || // Added in 4.0
256       Name.startswith("avx512.mask.pslli") || // Added in 4.0
257       Name.startswith("avx512.mask.psrai") || // Added in 4.0
258       Name.startswith("avx512.mask.psrli") || // Added in 4.0
259       Name.startswith("avx512.mask.psllv") || // Added in 4.0
260       Name.startswith("avx512.mask.psrav") || // Added in 4.0
261       Name.startswith("avx512.mask.psrlv") || // Added in 4.0
262       Name.startswith("sse41.pmovsx") || // Added in 3.8
263       Name.startswith("sse41.pmovzx") || // Added in 3.9
264       Name.startswith("avx2.pmovsx") || // Added in 3.9
265       Name.startswith("avx2.pmovzx") || // Added in 3.9
266       Name.startswith("avx512.mask.pmovsx") || // Added in 4.0
267       Name.startswith("avx512.mask.pmovzx") || // Added in 4.0
268       Name.startswith("avx512.mask.lzcnt.") || // Added in 5.0
269       Name.startswith("avx512.mask.pternlog.") || // Added in 7.0
270       Name.startswith("avx512.maskz.pternlog.") || // Added in 7.0
271       Name.startswith("avx512.mask.vpmadd52") || // Added in 7.0
272       Name.startswith("avx512.maskz.vpmadd52") || // Added in 7.0
273       Name.startswith("avx512.mask.vpermi2var.") || // Added in 7.0
274       Name.startswith("avx512.mask.vpermt2var.") || // Added in 7.0
275       Name.startswith("avx512.maskz.vpermt2var.") || // Added in 7.0
276       Name.startswith("avx512.mask.vpdpbusd.") || // Added in 7.0
277       Name.startswith("avx512.maskz.vpdpbusd.") || // Added in 7.0
278       Name.startswith("avx512.mask.vpdpbusds.") || // Added in 7.0
279       Name.startswith("avx512.maskz.vpdpbusds.") || // Added in 7.0
280       Name.startswith("avx512.mask.vpdpwssd.") || // Added in 7.0
281       Name.startswith("avx512.maskz.vpdpwssd.") || // Added in 7.0
282       Name.startswith("avx512.mask.vpdpwssds.") || // Added in 7.0
283       Name.startswith("avx512.maskz.vpdpwssds.") || // Added in 7.0
284       Name.startswith("avx512.mask.dbpsadbw.") || // Added in 7.0
285       Name.startswith("avx512.mask.vpshld.") || // Added in 7.0
286       Name.startswith("avx512.mask.vpshrd.") || // Added in 7.0
287       Name.startswith("avx512.mask.vpshldv.") || // Added in 8.0
288       Name.startswith("avx512.mask.vpshrdv.") || // Added in 8.0
289       Name.startswith("avx512.maskz.vpshldv.") || // Added in 8.0
290       Name.startswith("avx512.maskz.vpshrdv.") || // Added in 8.0
291       Name.startswith("avx512.vpshld.") || // Added in 8.0
292       Name.startswith("avx512.vpshrd.") || // Added in 8.0
293       Name.startswith("avx512.mask.add.p") || // Added in 7.0. 128/256 in 4.0
294       Name.startswith("avx512.mask.sub.p") || // Added in 7.0. 128/256 in 4.0
295       Name.startswith("avx512.mask.mul.p") || // Added in 7.0. 128/256 in 4.0
296       Name.startswith("avx512.mask.div.p") || // Added in 7.0. 128/256 in 4.0
297       Name.startswith("avx512.mask.max.p") || // Added in 7.0. 128/256 in 5.0
298       Name.startswith("avx512.mask.min.p") || // Added in 7.0. 128/256 in 5.0
299       Name.startswith("avx512.mask.fpclass.p") || // Added in 7.0
300       Name.startswith("avx512.mask.vpshufbitqmb.") || // Added in 8.0
301       Name.startswith("avx512.mask.pmultishift.qb.") || // Added in 8.0
302       Name.startswith("avx512.mask.conflict.") || // Added in 9.0
303       Name == "avx512.mask.pmov.qd.256" || // Added in 9.0
304       Name == "avx512.mask.pmov.qd.512" || // Added in 9.0
305       Name == "avx512.mask.pmov.wb.256" || // Added in 9.0
306       Name == "avx512.mask.pmov.wb.512" || // Added in 9.0
307       Name == "sse.cvtsi2ss" || // Added in 7.0
308       Name == "sse.cvtsi642ss" || // Added in 7.0
309       Name == "sse2.cvtsi2sd" || // Added in 7.0
310       Name == "sse2.cvtsi642sd" || // Added in 7.0
311       Name == "sse2.cvtss2sd" || // Added in 7.0
312       Name == "sse2.cvtdq2pd" || // Added in 3.9
313       Name == "sse2.cvtdq2ps" || // Added in 7.0
314       Name == "sse2.cvtps2pd" || // Added in 3.9
315       Name == "avx.cvtdq2.pd.256" || // Added in 3.9
316       Name == "avx.cvtdq2.ps.256" || // Added in 7.0
317       Name == "avx.cvt.ps2.pd.256" || // Added in 3.9
318       Name.startswith("avx.vinsertf128.") || // Added in 3.7
319       Name == "avx2.vinserti128" || // Added in 3.7
320       Name.startswith("avx512.mask.insert") || // Added in 4.0
321       Name.startswith("avx.vextractf128.") || // Added in 3.7
322       Name == "avx2.vextracti128" || // Added in 3.7
323       Name.startswith("avx512.mask.vextract") || // Added in 4.0
324       Name.startswith("sse4a.movnt.") || // Added in 3.9
325       Name.startswith("avx.movnt.") || // Added in 3.2
326       Name.startswith("avx512.storent.") || // Added in 3.9
327       Name == "sse41.movntdqa" || // Added in 5.0
328       Name == "avx2.movntdqa" || // Added in 5.0
329       Name == "avx512.movntdqa" || // Added in 5.0
330       Name == "sse2.storel.dq" || // Added in 3.9
331       Name.startswith("sse.storeu.") || // Added in 3.9
332       Name.startswith("sse2.storeu.") || // Added in 3.9
333       Name.startswith("avx.storeu.") || // Added in 3.9
334       Name.startswith("avx512.mask.storeu.") || // Added in 3.9
335       Name.startswith("avx512.mask.store.p") || // Added in 3.9
336       Name.startswith("avx512.mask.store.b.") || // Added in 3.9
337       Name.startswith("avx512.mask.store.w.") || // Added in 3.9
338       Name.startswith("avx512.mask.store.d.") || // Added in 3.9
339       Name.startswith("avx512.mask.store.q.") || // Added in 3.9
340       Name == "avx512.mask.store.ss" || // Added in 7.0
341       Name.startswith("avx512.mask.loadu.") || // Added in 3.9
342       Name.startswith("avx512.mask.load.") || // Added in 3.9
343       Name.startswith("avx512.mask.expand.load.") || // Added in 7.0
344       Name.startswith("avx512.mask.compress.store.") || // Added in 7.0
345       Name.startswith("avx512.mask.expand.b") || // Added in 9.0
346       Name.startswith("avx512.mask.expand.w") || // Added in 9.0
347       Name.startswith("avx512.mask.expand.d") || // Added in 9.0
348       Name.startswith("avx512.mask.expand.q") || // Added in 9.0
349       Name.startswith("avx512.mask.expand.p") || // Added in 9.0
350       Name.startswith("avx512.mask.compress.b") || // Added in 9.0
351       Name.startswith("avx512.mask.compress.w") || // Added in 9.0
352       Name.startswith("avx512.mask.compress.d") || // Added in 9.0
353       Name.startswith("avx512.mask.compress.q") || // Added in 9.0
354       Name.startswith("avx512.mask.compress.p") || // Added in 9.0
355       Name == "sse42.crc32.64.8" || // Added in 3.4
356       Name.startswith("avx.vbroadcast.s") || // Added in 3.5
357       Name.startswith("avx512.vbroadcast.s") || // Added in 7.0
358       Name.startswith("avx512.mask.palignr.") || // Added in 3.9
359       Name.startswith("avx512.mask.valign.") || // Added in 4.0
360       Name.startswith("sse2.psll.dq") || // Added in 3.7
361       Name.startswith("sse2.psrl.dq") || // Added in 3.7
362       Name.startswith("avx2.psll.dq") || // Added in 3.7
363       Name.startswith("avx2.psrl.dq") || // Added in 3.7
364       Name.startswith("avx512.psll.dq") || // Added in 3.9
365       Name.startswith("avx512.psrl.dq") || // Added in 3.9
366       Name == "sse41.pblendw" || // Added in 3.7
367       Name.startswith("sse41.blendp") || // Added in 3.7
368       Name.startswith("avx.blend.p") || // Added in 3.7
369       Name == "avx2.pblendw" || // Added in 3.7
370       Name.startswith("avx2.pblendd.") || // Added in 3.7
371       Name.startswith("avx.vbroadcastf128") || // Added in 4.0
372       Name == "avx2.vbroadcasti128" || // Added in 3.7
373       Name.startswith("avx512.mask.broadcastf") || // Added in 6.0
374       Name.startswith("avx512.mask.broadcasti") || // Added in 6.0
375       Name == "xop.vpcmov" || // Added in 3.8
376       Name == "xop.vpcmov.256" || // Added in 5.0
377       Name.startswith("avx512.mask.move.s") || // Added in 4.0
378       Name.startswith("avx512.cvtmask2") || // Added in 5.0
379       Name.startswith("xop.vpcom") || // Added in 3.2, Updated in 9.0
380       Name.startswith("xop.vprot") || // Added in 8.0
381       Name.startswith("avx512.prol") || // Added in 8.0
382       Name.startswith("avx512.pror") || // Added in 8.0
383       Name.startswith("avx512.mask.prorv.") || // Added in 8.0
384       Name.startswith("avx512.mask.pror.") ||  // Added in 8.0
385       Name.startswith("avx512.mask.prolv.") || // Added in 8.0
386       Name.startswith("avx512.mask.prol.") ||  // Added in 8.0
387       Name.startswith("avx512.ptestm") || //Added in 6.0
388       Name.startswith("avx512.ptestnm") || //Added in 6.0
389       Name.startswith("sse2.pavg") || // Added in 6.0
390       Name.startswith("avx2.pavg") || // Added in 6.0
391       Name.startswith("avx512.mask.pavg")) // Added in 6.0
392     return true;
393 
394   return false;
395 }
396 
397 static bool UpgradeX86IntrinsicFunction(Function *F, StringRef Name,
398                                         Function *&NewFn) {
399   // Only handle intrinsics that start with "x86.".
400   if (!Name.startswith("x86."))
401     return false;
402   // Remove "x86." prefix.
403   Name = Name.substr(4);
404 
405   if (ShouldUpgradeX86Intrinsic(F, Name)) {
406     NewFn = nullptr;
407     return true;
408   }
409 
410   if (Name == "rdtscp") { // Added in 8.0
411     // If this intrinsic has 0 operands, it's the new version.
412     if (F->getFunctionType()->getNumParams() == 0)
413       return false;
414 
415     rename(F);
416     NewFn = Intrinsic::getDeclaration(F->getParent(),
417                                       Intrinsic::x86_rdtscp);
418     return true;
419   }
420 
421   // SSE4.1 ptest functions may have an old signature.
422   if (Name.startswith("sse41.ptest")) { // Added in 3.2
423     if (Name.substr(11) == "c")
424       return UpgradePTESTIntrinsic(F, Intrinsic::x86_sse41_ptestc, NewFn);
425     if (Name.substr(11) == "z")
426       return UpgradePTESTIntrinsic(F, Intrinsic::x86_sse41_ptestz, NewFn);
427     if (Name.substr(11) == "nzc")
428       return UpgradePTESTIntrinsic(F, Intrinsic::x86_sse41_ptestnzc, NewFn);
429   }
430   // Several blend and other instructions with masks used the wrong number of
431   // bits.
432   if (Name == "sse41.insertps") // Added in 3.6
433     return UpgradeX86IntrinsicsWith8BitMask(F, Intrinsic::x86_sse41_insertps,
434                                             NewFn);
435   if (Name == "sse41.dppd") // Added in 3.6
436     return UpgradeX86IntrinsicsWith8BitMask(F, Intrinsic::x86_sse41_dppd,
437                                             NewFn);
438   if (Name == "sse41.dpps") // Added in 3.6
439     return UpgradeX86IntrinsicsWith8BitMask(F, Intrinsic::x86_sse41_dpps,
440                                             NewFn);
441   if (Name == "sse41.mpsadbw") // Added in 3.6
442     return UpgradeX86IntrinsicsWith8BitMask(F, Intrinsic::x86_sse41_mpsadbw,
443                                             NewFn);
444   if (Name == "avx.dp.ps.256") // Added in 3.6
445     return UpgradeX86IntrinsicsWith8BitMask(F, Intrinsic::x86_avx_dp_ps_256,
446                                             NewFn);
447   if (Name == "avx2.mpsadbw") // Added in 3.6
448     return UpgradeX86IntrinsicsWith8BitMask(F, Intrinsic::x86_avx2_mpsadbw,
449                                             NewFn);
450 
451   // frcz.ss/sd may need to have an argument dropped. Added in 3.2
452   if (Name.startswith("xop.vfrcz.ss") && F->arg_size() == 2) {
453     rename(F);
454     NewFn = Intrinsic::getDeclaration(F->getParent(),
455                                       Intrinsic::x86_xop_vfrcz_ss);
456     return true;
457   }
458   if (Name.startswith("xop.vfrcz.sd") && F->arg_size() == 2) {
459     rename(F);
460     NewFn = Intrinsic::getDeclaration(F->getParent(),
461                                       Intrinsic::x86_xop_vfrcz_sd);
462     return true;
463   }
464   // Upgrade any XOP PERMIL2 index operand still using a float/double vector.
465   if (Name.startswith("xop.vpermil2")) { // Added in 3.9
466     auto Idx = F->getFunctionType()->getParamType(2);
467     if (Idx->isFPOrFPVectorTy()) {
468       rename(F);
469       unsigned IdxSize = Idx->getPrimitiveSizeInBits();
470       unsigned EltSize = Idx->getScalarSizeInBits();
471       Intrinsic::ID Permil2ID;
472       if (EltSize == 64 && IdxSize == 128)
473         Permil2ID = Intrinsic::x86_xop_vpermil2pd;
474       else if (EltSize == 32 && IdxSize == 128)
475         Permil2ID = Intrinsic::x86_xop_vpermil2ps;
476       else if (EltSize == 64 && IdxSize == 256)
477         Permil2ID = Intrinsic::x86_xop_vpermil2pd_256;
478       else
479         Permil2ID = Intrinsic::x86_xop_vpermil2ps_256;
480       NewFn = Intrinsic::getDeclaration(F->getParent(), Permil2ID);
481       return true;
482     }
483   }
484 
485   if (Name == "seh.recoverfp") {
486     NewFn = Intrinsic::getDeclaration(F->getParent(), Intrinsic::eh_recoverfp);
487     return true;
488   }
489 
490   return false;
491 }
492 
493 static bool UpgradeIntrinsicFunction1(Function *F, Function *&NewFn) {
494   assert(F && "Illegal to upgrade a non-existent Function.");
495 
496   // Upgrade intrinsics "clang.arc.use" which doesn't start with "llvm.".
497   if (F->getName() == "clang.arc.use") {
498     NewFn = nullptr;
499     return true;
500   }
501 
502   // Quickly eliminate it, if it's not a candidate.
503   StringRef Name = F->getName();
504   if (Name.size() <= 8 || !Name.startswith("llvm."))
505     return false;
506   Name = Name.substr(5); // Strip off "llvm."
507 
508   switch (Name[0]) {
509   default: break;
510   case 'a': {
511     if (Name.startswith("arm.rbit") || Name.startswith("aarch64.rbit")) {
512       NewFn = Intrinsic::getDeclaration(F->getParent(), Intrinsic::bitreverse,
513                                         F->arg_begin()->getType());
514       return true;
515     }
516     if (Name.startswith("arm.neon.vclz")) {
517       Type* args[2] = {
518         F->arg_begin()->getType(),
519         Type::getInt1Ty(F->getContext())
520       };
521       // Can't use Intrinsic::getDeclaration here as it adds a ".i1" to
522       // the end of the name. Change name from llvm.arm.neon.vclz.* to
523       //  llvm.ctlz.*
524       FunctionType* fType = FunctionType::get(F->getReturnType(), args, false);
525       NewFn = Function::Create(fType, F->getLinkage(), F->getAddressSpace(),
526                                "llvm.ctlz." + Name.substr(14), F->getParent());
527       return true;
528     }
529     if (Name.startswith("arm.neon.vcnt")) {
530       NewFn = Intrinsic::getDeclaration(F->getParent(), Intrinsic::ctpop,
531                                         F->arg_begin()->getType());
532       return true;
533     }
534     Regex vldRegex("^arm\\.neon\\.vld([1234]|[234]lane)\\.v[a-z0-9]*$");
535     if (vldRegex.match(Name)) {
536       auto fArgs = F->getFunctionType()->params();
537       SmallVector<Type *, 4> Tys(fArgs.begin(), fArgs.end());
538       // Can't use Intrinsic::getDeclaration here as the return types might
539       // then only be structurally equal.
540       FunctionType* fType = FunctionType::get(F->getReturnType(), Tys, false);
541       NewFn = Function::Create(fType, F->getLinkage(), F->getAddressSpace(),
542                                "llvm." + Name + ".p0i8", F->getParent());
543       return true;
544     }
545     Regex vstRegex("^arm\\.neon\\.vst([1234]|[234]lane)\\.v[a-z0-9]*$");
546     if (vstRegex.match(Name)) {
547       static const Intrinsic::ID StoreInts[] = {Intrinsic::arm_neon_vst1,
548                                                 Intrinsic::arm_neon_vst2,
549                                                 Intrinsic::arm_neon_vst3,
550                                                 Intrinsic::arm_neon_vst4};
551 
552       static const Intrinsic::ID StoreLaneInts[] = {
553         Intrinsic::arm_neon_vst2lane, Intrinsic::arm_neon_vst3lane,
554         Intrinsic::arm_neon_vst4lane
555       };
556 
557       auto fArgs = F->getFunctionType()->params();
558       Type *Tys[] = {fArgs[0], fArgs[1]};
559       if (Name.find("lane") == StringRef::npos)
560         NewFn = Intrinsic::getDeclaration(F->getParent(),
561                                           StoreInts[fArgs.size() - 3], Tys);
562       else
563         NewFn = Intrinsic::getDeclaration(F->getParent(),
564                                           StoreLaneInts[fArgs.size() - 5], Tys);
565       return true;
566     }
567     if (Name == "aarch64.thread.pointer" || Name == "arm.thread.pointer") {
568       NewFn = Intrinsic::getDeclaration(F->getParent(), Intrinsic::thread_pointer);
569       return true;
570     }
571     if (Name.startswith("aarch64.neon.addp")) {
572       if (F->arg_size() != 2)
573         break; // Invalid IR.
574       auto fArgs = F->getFunctionType()->params();
575       VectorType *ArgTy = dyn_cast<VectorType>(fArgs[0]);
576       if (ArgTy && ArgTy->getElementType()->isFloatingPointTy()) {
577         NewFn = Intrinsic::getDeclaration(F->getParent(),
578                                           Intrinsic::aarch64_neon_faddp, fArgs);
579         return true;
580       }
581     }
582     break;
583   }
584 
585   case 'c': {
586     if (Name.startswith("ctlz.") && F->arg_size() == 1) {
587       rename(F);
588       NewFn = Intrinsic::getDeclaration(F->getParent(), Intrinsic::ctlz,
589                                         F->arg_begin()->getType());
590       return true;
591     }
592     if (Name.startswith("cttz.") && F->arg_size() == 1) {
593       rename(F);
594       NewFn = Intrinsic::getDeclaration(F->getParent(), Intrinsic::cttz,
595                                         F->arg_begin()->getType());
596       return true;
597     }
598     break;
599   }
600   case 'd': {
601     if (Name == "dbg.value" && F->arg_size() == 4) {
602       rename(F);
603       NewFn = Intrinsic::getDeclaration(F->getParent(), Intrinsic::dbg_value);
604       return true;
605     }
606     break;
607   }
608   case 'i':
609   case 'l': {
610     bool IsLifetimeStart = Name.startswith("lifetime.start");
611     if (IsLifetimeStart || Name.startswith("invariant.start")) {
612       Intrinsic::ID ID = IsLifetimeStart ?
613         Intrinsic::lifetime_start : Intrinsic::invariant_start;
614       auto Args = F->getFunctionType()->params();
615       Type* ObjectPtr[1] = {Args[1]};
616       if (F->getName() != Intrinsic::getName(ID, ObjectPtr)) {
617         rename(F);
618         NewFn = Intrinsic::getDeclaration(F->getParent(), ID, ObjectPtr);
619         return true;
620       }
621     }
622 
623     bool IsLifetimeEnd = Name.startswith("lifetime.end");
624     if (IsLifetimeEnd || Name.startswith("invariant.end")) {
625       Intrinsic::ID ID = IsLifetimeEnd ?
626         Intrinsic::lifetime_end : Intrinsic::invariant_end;
627 
628       auto Args = F->getFunctionType()->params();
629       Type* ObjectPtr[1] = {Args[IsLifetimeEnd ? 1 : 2]};
630       if (F->getName() != Intrinsic::getName(ID, ObjectPtr)) {
631         rename(F);
632         NewFn = Intrinsic::getDeclaration(F->getParent(), ID, ObjectPtr);
633         return true;
634       }
635     }
636     if (Name.startswith("invariant.group.barrier")) {
637       // Rename invariant.group.barrier to launder.invariant.group
638       auto Args = F->getFunctionType()->params();
639       Type* ObjectPtr[1] = {Args[0]};
640       rename(F);
641       NewFn = Intrinsic::getDeclaration(F->getParent(),
642           Intrinsic::launder_invariant_group, ObjectPtr);
643       return true;
644 
645     }
646 
647     break;
648   }
649   case 'm': {
650     if (Name.startswith("masked.load.")) {
651       Type *Tys[] = { F->getReturnType(), F->arg_begin()->getType() };
652       if (F->getName() != Intrinsic::getName(Intrinsic::masked_load, Tys)) {
653         rename(F);
654         NewFn = Intrinsic::getDeclaration(F->getParent(),
655                                           Intrinsic::masked_load,
656                                           Tys);
657         return true;
658       }
659     }
660     if (Name.startswith("masked.store.")) {
661       auto Args = F->getFunctionType()->params();
662       Type *Tys[] = { Args[0], Args[1] };
663       if (F->getName() != Intrinsic::getName(Intrinsic::masked_store, Tys)) {
664         rename(F);
665         NewFn = Intrinsic::getDeclaration(F->getParent(),
666                                           Intrinsic::masked_store,
667                                           Tys);
668         return true;
669       }
670     }
671     // Renaming gather/scatter intrinsics with no address space overloading
672     // to the new overload which includes an address space
673     if (Name.startswith("masked.gather.")) {
674       Type *Tys[] = {F->getReturnType(), F->arg_begin()->getType()};
675       if (F->getName() != Intrinsic::getName(Intrinsic::masked_gather, Tys)) {
676         rename(F);
677         NewFn = Intrinsic::getDeclaration(F->getParent(),
678                                           Intrinsic::masked_gather, Tys);
679         return true;
680       }
681     }
682     if (Name.startswith("masked.scatter.")) {
683       auto Args = F->getFunctionType()->params();
684       Type *Tys[] = {Args[0], Args[1]};
685       if (F->getName() != Intrinsic::getName(Intrinsic::masked_scatter, Tys)) {
686         rename(F);
687         NewFn = Intrinsic::getDeclaration(F->getParent(),
688                                           Intrinsic::masked_scatter, Tys);
689         return true;
690       }
691     }
692     // Updating the memory intrinsics (memcpy/memmove/memset) that have an
693     // alignment parameter to embedding the alignment as an attribute of
694     // the pointer args.
695     if (Name.startswith("memcpy.") && F->arg_size() == 5) {
696       rename(F);
697       // Get the types of dest, src, and len
698       ArrayRef<Type *> ParamTypes = F->getFunctionType()->params().slice(0, 3);
699       NewFn = Intrinsic::getDeclaration(F->getParent(), Intrinsic::memcpy,
700                                         ParamTypes);
701       return true;
702     }
703     if (Name.startswith("memmove.") && F->arg_size() == 5) {
704       rename(F);
705       // Get the types of dest, src, and len
706       ArrayRef<Type *> ParamTypes = F->getFunctionType()->params().slice(0, 3);
707       NewFn = Intrinsic::getDeclaration(F->getParent(), Intrinsic::memmove,
708                                         ParamTypes);
709       return true;
710     }
711     if (Name.startswith("memset.") && F->arg_size() == 5) {
712       rename(F);
713       // Get the types of dest, and len
714       const auto *FT = F->getFunctionType();
715       Type *ParamTypes[2] = {
716           FT->getParamType(0), // Dest
717           FT->getParamType(2)  // len
718       };
719       NewFn = Intrinsic::getDeclaration(F->getParent(), Intrinsic::memset,
720                                         ParamTypes);
721       return true;
722     }
723     break;
724   }
725   case 'n': {
726     if (Name.startswith("nvvm.")) {
727       Name = Name.substr(5);
728 
729       // The following nvvm intrinsics correspond exactly to an LLVM intrinsic.
730       Intrinsic::ID IID = StringSwitch<Intrinsic::ID>(Name)
731                               .Cases("brev32", "brev64", Intrinsic::bitreverse)
732                               .Case("clz.i", Intrinsic::ctlz)
733                               .Case("popc.i", Intrinsic::ctpop)
734                               .Default(Intrinsic::not_intrinsic);
735       if (IID != Intrinsic::not_intrinsic && F->arg_size() == 1) {
736         NewFn = Intrinsic::getDeclaration(F->getParent(), IID,
737                                           {F->getReturnType()});
738         return true;
739       }
740 
741       // The following nvvm intrinsics correspond exactly to an LLVM idiom, but
742       // not to an intrinsic alone.  We expand them in UpgradeIntrinsicCall.
743       //
744       // TODO: We could add lohi.i2d.
745       bool Expand = StringSwitch<bool>(Name)
746                         .Cases("abs.i", "abs.ll", true)
747                         .Cases("clz.ll", "popc.ll", "h2f", true)
748                         .Cases("max.i", "max.ll", "max.ui", "max.ull", true)
749                         .Cases("min.i", "min.ll", "min.ui", "min.ull", true)
750                         .Default(false);
751       if (Expand) {
752         NewFn = nullptr;
753         return true;
754       }
755     }
756     break;
757   }
758   case 'o':
759     // We only need to change the name to match the mangling including the
760     // address space.
761     if (Name.startswith("objectsize.")) {
762       Type *Tys[2] = { F->getReturnType(), F->arg_begin()->getType() };
763       if (F->arg_size() == 2 || F->arg_size() == 3 ||
764           F->getName() != Intrinsic::getName(Intrinsic::objectsize, Tys)) {
765         rename(F);
766         NewFn = Intrinsic::getDeclaration(F->getParent(), Intrinsic::objectsize,
767                                           Tys);
768         return true;
769       }
770     }
771     break;
772 
773   case 's':
774     if (Name == "stackprotectorcheck") {
775       NewFn = nullptr;
776       return true;
777     }
778     break;
779 
780   case 'x':
781     if (UpgradeX86IntrinsicFunction(F, Name, NewFn))
782       return true;
783   }
784   // Remangle our intrinsic since we upgrade the mangling
785   auto Result = llvm::Intrinsic::remangleIntrinsicFunction(F);
786   if (Result != None) {
787     NewFn = Result.getValue();
788     return true;
789   }
790 
791   //  This may not belong here. This function is effectively being overloaded
792   //  to both detect an intrinsic which needs upgrading, and to provide the
793   //  upgraded form of the intrinsic. We should perhaps have two separate
794   //  functions for this.
795   return false;
796 }
797 
798 bool llvm::UpgradeIntrinsicFunction(Function *F, Function *&NewFn) {
799   NewFn = nullptr;
800   bool Upgraded = UpgradeIntrinsicFunction1(F, NewFn);
801   assert(F != NewFn && "Intrinsic function upgraded to the same function");
802 
803   // Upgrade intrinsic attributes.  This does not change the function.
804   if (NewFn)
805     F = NewFn;
806   if (Intrinsic::ID id = F->getIntrinsicID())
807     F->setAttributes(Intrinsic::getAttributes(F->getContext(), id));
808   return Upgraded;
809 }
810 
811 bool llvm::UpgradeGlobalVariable(GlobalVariable *GV) {
812   // Nothing to do yet.
813   return false;
814 }
815 
816 // Handles upgrading SSE2/AVX2/AVX512BW PSLLDQ intrinsics by converting them
817 // to byte shuffles.
818 static Value *UpgradeX86PSLLDQIntrinsics(IRBuilder<> &Builder,
819                                          Value *Op, unsigned Shift) {
820   Type *ResultTy = Op->getType();
821   unsigned NumElts = ResultTy->getVectorNumElements() * 8;
822 
823   // Bitcast from a 64-bit element type to a byte element type.
824   Type *VecTy = VectorType::get(Builder.getInt8Ty(), NumElts);
825   Op = Builder.CreateBitCast(Op, VecTy, "cast");
826 
827   // We'll be shuffling in zeroes.
828   Value *Res = Constant::getNullValue(VecTy);
829 
830   // If shift is less than 16, emit a shuffle to move the bytes. Otherwise,
831   // we'll just return the zero vector.
832   if (Shift < 16) {
833     uint32_t Idxs[64];
834     // 256/512-bit version is split into 2/4 16-byte lanes.
835     for (unsigned l = 0; l != NumElts; l += 16)
836       for (unsigned i = 0; i != 16; ++i) {
837         unsigned Idx = NumElts + i - Shift;
838         if (Idx < NumElts)
839           Idx -= NumElts - 16; // end of lane, switch operand.
840         Idxs[l + i] = Idx + l;
841       }
842 
843     Res = Builder.CreateShuffleVector(Res, Op, makeArrayRef(Idxs, NumElts));
844   }
845 
846   // Bitcast back to a 64-bit element type.
847   return Builder.CreateBitCast(Res, ResultTy, "cast");
848 }
849 
850 // Handles upgrading SSE2/AVX2/AVX512BW PSRLDQ intrinsics by converting them
851 // to byte shuffles.
852 static Value *UpgradeX86PSRLDQIntrinsics(IRBuilder<> &Builder, Value *Op,
853                                          unsigned Shift) {
854   Type *ResultTy = Op->getType();
855   unsigned NumElts = ResultTy->getVectorNumElements() * 8;
856 
857   // Bitcast from a 64-bit element type to a byte element type.
858   Type *VecTy = VectorType::get(Builder.getInt8Ty(), NumElts);
859   Op = Builder.CreateBitCast(Op, VecTy, "cast");
860 
861   // We'll be shuffling in zeroes.
862   Value *Res = Constant::getNullValue(VecTy);
863 
864   // If shift is less than 16, emit a shuffle to move the bytes. Otherwise,
865   // we'll just return the zero vector.
866   if (Shift < 16) {
867     uint32_t Idxs[64];
868     // 256/512-bit version is split into 2/4 16-byte lanes.
869     for (unsigned l = 0; l != NumElts; l += 16)
870       for (unsigned i = 0; i != 16; ++i) {
871         unsigned Idx = i + Shift;
872         if (Idx >= 16)
873           Idx += NumElts - 16; // end of lane, switch operand.
874         Idxs[l + i] = Idx + l;
875       }
876 
877     Res = Builder.CreateShuffleVector(Op, Res, makeArrayRef(Idxs, NumElts));
878   }
879 
880   // Bitcast back to a 64-bit element type.
881   return Builder.CreateBitCast(Res, ResultTy, "cast");
882 }
883 
884 static Value *getX86MaskVec(IRBuilder<> &Builder, Value *Mask,
885                             unsigned NumElts) {
886   llvm::VectorType *MaskTy = llvm::VectorType::get(Builder.getInt1Ty(),
887                              cast<IntegerType>(Mask->getType())->getBitWidth());
888   Mask = Builder.CreateBitCast(Mask, MaskTy);
889 
890   // If we have less than 8 elements, then the starting mask was an i8 and
891   // we need to extract down to the right number of elements.
892   if (NumElts < 8) {
893     uint32_t Indices[4];
894     for (unsigned i = 0; i != NumElts; ++i)
895       Indices[i] = i;
896     Mask = Builder.CreateShuffleVector(Mask, Mask,
897                                        makeArrayRef(Indices, NumElts),
898                                        "extract");
899   }
900 
901   return Mask;
902 }
903 
904 static Value *EmitX86Select(IRBuilder<> &Builder, Value *Mask,
905                             Value *Op0, Value *Op1) {
906   // If the mask is all ones just emit the first operation.
907   if (const auto *C = dyn_cast<Constant>(Mask))
908     if (C->isAllOnesValue())
909       return Op0;
910 
911   Mask = getX86MaskVec(Builder, Mask, Op0->getType()->getVectorNumElements());
912   return Builder.CreateSelect(Mask, Op0, Op1);
913 }
914 
915 static Value *EmitX86ScalarSelect(IRBuilder<> &Builder, Value *Mask,
916                                   Value *Op0, Value *Op1) {
917   // If the mask is all ones just emit the first operation.
918   if (const auto *C = dyn_cast<Constant>(Mask))
919     if (C->isAllOnesValue())
920       return Op0;
921 
922   llvm::VectorType *MaskTy =
923     llvm::VectorType::get(Builder.getInt1Ty(),
924                           Mask->getType()->getIntegerBitWidth());
925   Mask = Builder.CreateBitCast(Mask, MaskTy);
926   Mask = Builder.CreateExtractElement(Mask, (uint64_t)0);
927   return Builder.CreateSelect(Mask, Op0, Op1);
928 }
929 
930 // Handle autoupgrade for masked PALIGNR and VALIGND/Q intrinsics.
931 // PALIGNR handles large immediates by shifting while VALIGN masks the immediate
932 // so we need to handle both cases. VALIGN also doesn't have 128-bit lanes.
933 static Value *UpgradeX86ALIGNIntrinsics(IRBuilder<> &Builder, Value *Op0,
934                                         Value *Op1, Value *Shift,
935                                         Value *Passthru, Value *Mask,
936                                         bool IsVALIGN) {
937   unsigned ShiftVal = cast<llvm::ConstantInt>(Shift)->getZExtValue();
938 
939   unsigned NumElts = Op0->getType()->getVectorNumElements();
940   assert((IsVALIGN || NumElts % 16 == 0) && "Illegal NumElts for PALIGNR!");
941   assert((!IsVALIGN || NumElts <= 16) && "NumElts too large for VALIGN!");
942   assert(isPowerOf2_32(NumElts) && "NumElts not a power of 2!");
943 
944   // Mask the immediate for VALIGN.
945   if (IsVALIGN)
946     ShiftVal &= (NumElts - 1);
947 
948   // If palignr is shifting the pair of vectors more than the size of two
949   // lanes, emit zero.
950   if (ShiftVal >= 32)
951     return llvm::Constant::getNullValue(Op0->getType());
952 
953   // If palignr is shifting the pair of input vectors more than one lane,
954   // but less than two lanes, convert to shifting in zeroes.
955   if (ShiftVal > 16) {
956     ShiftVal -= 16;
957     Op1 = Op0;
958     Op0 = llvm::Constant::getNullValue(Op0->getType());
959   }
960 
961   uint32_t Indices[64];
962   // 256-bit palignr operates on 128-bit lanes so we need to handle that
963   for (unsigned l = 0; l < NumElts; l += 16) {
964     for (unsigned i = 0; i != 16; ++i) {
965       unsigned Idx = ShiftVal + i;
966       if (!IsVALIGN && Idx >= 16) // Disable wrap for VALIGN.
967         Idx += NumElts - 16; // End of lane, switch operand.
968       Indices[l + i] = Idx + l;
969     }
970   }
971 
972   Value *Align = Builder.CreateShuffleVector(Op1, Op0,
973                                              makeArrayRef(Indices, NumElts),
974                                              "palignr");
975 
976   return EmitX86Select(Builder, Mask, Align, Passthru);
977 }
978 
979 static Value *UpgradeX86VPERMT2Intrinsics(IRBuilder<> &Builder, CallInst &CI,
980                                           bool ZeroMask, bool IndexForm) {
981   Type *Ty = CI.getType();
982   unsigned VecWidth = Ty->getPrimitiveSizeInBits();
983   unsigned EltWidth = Ty->getScalarSizeInBits();
984   bool IsFloat = Ty->isFPOrFPVectorTy();
985   Intrinsic::ID IID;
986   if (VecWidth == 128 && EltWidth == 32 && IsFloat)
987     IID = Intrinsic::x86_avx512_vpermi2var_ps_128;
988   else if (VecWidth == 128 && EltWidth == 32 && !IsFloat)
989     IID = Intrinsic::x86_avx512_vpermi2var_d_128;
990   else if (VecWidth == 128 && EltWidth == 64 && IsFloat)
991     IID = Intrinsic::x86_avx512_vpermi2var_pd_128;
992   else if (VecWidth == 128 && EltWidth == 64 && !IsFloat)
993     IID = Intrinsic::x86_avx512_vpermi2var_q_128;
994   else if (VecWidth == 256 && EltWidth == 32 && IsFloat)
995     IID = Intrinsic::x86_avx512_vpermi2var_ps_256;
996   else if (VecWidth == 256 && EltWidth == 32 && !IsFloat)
997     IID = Intrinsic::x86_avx512_vpermi2var_d_256;
998   else if (VecWidth == 256 && EltWidth == 64 && IsFloat)
999     IID = Intrinsic::x86_avx512_vpermi2var_pd_256;
1000   else if (VecWidth == 256 && EltWidth == 64 && !IsFloat)
1001     IID = Intrinsic::x86_avx512_vpermi2var_q_256;
1002   else if (VecWidth == 512 && EltWidth == 32 && IsFloat)
1003     IID = Intrinsic::x86_avx512_vpermi2var_ps_512;
1004   else if (VecWidth == 512 && EltWidth == 32 && !IsFloat)
1005     IID = Intrinsic::x86_avx512_vpermi2var_d_512;
1006   else if (VecWidth == 512 && EltWidth == 64 && IsFloat)
1007     IID = Intrinsic::x86_avx512_vpermi2var_pd_512;
1008   else if (VecWidth == 512 && EltWidth == 64 && !IsFloat)
1009     IID = Intrinsic::x86_avx512_vpermi2var_q_512;
1010   else if (VecWidth == 128 && EltWidth == 16)
1011     IID = Intrinsic::x86_avx512_vpermi2var_hi_128;
1012   else if (VecWidth == 256 && EltWidth == 16)
1013     IID = Intrinsic::x86_avx512_vpermi2var_hi_256;
1014   else if (VecWidth == 512 && EltWidth == 16)
1015     IID = Intrinsic::x86_avx512_vpermi2var_hi_512;
1016   else if (VecWidth == 128 && EltWidth == 8)
1017     IID = Intrinsic::x86_avx512_vpermi2var_qi_128;
1018   else if (VecWidth == 256 && EltWidth == 8)
1019     IID = Intrinsic::x86_avx512_vpermi2var_qi_256;
1020   else if (VecWidth == 512 && EltWidth == 8)
1021     IID = Intrinsic::x86_avx512_vpermi2var_qi_512;
1022   else
1023     llvm_unreachable("Unexpected intrinsic");
1024 
1025   Value *Args[] = { CI.getArgOperand(0) , CI.getArgOperand(1),
1026                     CI.getArgOperand(2) };
1027 
1028   // If this isn't index form we need to swap operand 0 and 1.
1029   if (!IndexForm)
1030     std::swap(Args[0], Args[1]);
1031 
1032   Value *V = Builder.CreateCall(Intrinsic::getDeclaration(CI.getModule(), IID),
1033                                 Args);
1034   Value *PassThru = ZeroMask ? ConstantAggregateZero::get(Ty)
1035                              : Builder.CreateBitCast(CI.getArgOperand(1),
1036                                                      Ty);
1037   return EmitX86Select(Builder, CI.getArgOperand(3), V, PassThru);
1038 }
1039 
1040 static Value *UpgradeX86AddSubSatIntrinsics(IRBuilder<> &Builder, CallInst &CI,
1041                                             bool IsSigned, bool IsAddition) {
1042   Type *Ty = CI.getType();
1043   Value *Op0 = CI.getOperand(0);
1044   Value *Op1 = CI.getOperand(1);
1045 
1046   Intrinsic::ID IID =
1047       IsSigned ? (IsAddition ? Intrinsic::sadd_sat : Intrinsic::ssub_sat)
1048                : (IsAddition ? Intrinsic::uadd_sat : Intrinsic::usub_sat);
1049   Function *Intrin = Intrinsic::getDeclaration(CI.getModule(), IID, Ty);
1050   Value *Res = Builder.CreateCall(Intrin, {Op0, Op1});
1051 
1052   if (CI.getNumArgOperands() == 4) { // For masked intrinsics.
1053     Value *VecSrc = CI.getOperand(2);
1054     Value *Mask = CI.getOperand(3);
1055     Res = EmitX86Select(Builder, Mask, Res, VecSrc);
1056   }
1057   return Res;
1058 }
1059 
1060 static Value *upgradeX86Rotate(IRBuilder<> &Builder, CallInst &CI,
1061                                bool IsRotateRight) {
1062   Type *Ty = CI.getType();
1063   Value *Src = CI.getArgOperand(0);
1064   Value *Amt = CI.getArgOperand(1);
1065 
1066   // Amount may be scalar immediate, in which case create a splat vector.
1067   // Funnel shifts amounts are treated as modulo and types are all power-of-2 so
1068   // we only care about the lowest log2 bits anyway.
1069   if (Amt->getType() != Ty) {
1070     unsigned NumElts = Ty->getVectorNumElements();
1071     Amt = Builder.CreateIntCast(Amt, Ty->getScalarType(), false);
1072     Amt = Builder.CreateVectorSplat(NumElts, Amt);
1073   }
1074 
1075   Intrinsic::ID IID = IsRotateRight ? Intrinsic::fshr : Intrinsic::fshl;
1076   Function *Intrin = Intrinsic::getDeclaration(CI.getModule(), IID, Ty);
1077   Value *Res = Builder.CreateCall(Intrin, {Src, Src, Amt});
1078 
1079   if (CI.getNumArgOperands() == 4) { // For masked intrinsics.
1080     Value *VecSrc = CI.getOperand(2);
1081     Value *Mask = CI.getOperand(3);
1082     Res = EmitX86Select(Builder, Mask, Res, VecSrc);
1083   }
1084   return Res;
1085 }
1086 
1087 static Value *upgradeX86vpcom(IRBuilder<> &Builder, CallInst &CI, unsigned Imm,
1088                               bool IsSigned) {
1089   Type *Ty = CI.getType();
1090   Value *LHS = CI.getArgOperand(0);
1091   Value *RHS = CI.getArgOperand(1);
1092 
1093   CmpInst::Predicate Pred;
1094   switch (Imm) {
1095   case 0x0:
1096     Pred = IsSigned ? ICmpInst::ICMP_SLT : ICmpInst::ICMP_ULT;
1097     break;
1098   case 0x1:
1099     Pred = IsSigned ? ICmpInst::ICMP_SLE : ICmpInst::ICMP_ULE;
1100     break;
1101   case 0x2:
1102     Pred = IsSigned ? ICmpInst::ICMP_SGT : ICmpInst::ICMP_UGT;
1103     break;
1104   case 0x3:
1105     Pred = IsSigned ? ICmpInst::ICMP_SGE : ICmpInst::ICMP_UGE;
1106     break;
1107   case 0x4:
1108     Pred = ICmpInst::ICMP_EQ;
1109     break;
1110   case 0x5:
1111     Pred = ICmpInst::ICMP_NE;
1112     break;
1113   case 0x6:
1114     return Constant::getNullValue(Ty); // FALSE
1115   case 0x7:
1116     return Constant::getAllOnesValue(Ty); // TRUE
1117   default:
1118     llvm_unreachable("Unknown XOP vpcom/vpcomu predicate");
1119   }
1120 
1121   Value *Cmp = Builder.CreateICmp(Pred, LHS, RHS);
1122   Value *Ext = Builder.CreateSExt(Cmp, Ty);
1123   return Ext;
1124 }
1125 
1126 static Value *upgradeX86ConcatShift(IRBuilder<> &Builder, CallInst &CI,
1127                                     bool IsShiftRight, bool ZeroMask) {
1128   Type *Ty = CI.getType();
1129   Value *Op0 = CI.getArgOperand(0);
1130   Value *Op1 = CI.getArgOperand(1);
1131   Value *Amt = CI.getArgOperand(2);
1132 
1133   if (IsShiftRight)
1134     std::swap(Op0, Op1);
1135 
1136   // Amount may be scalar immediate, in which case create a splat vector.
1137   // Funnel shifts amounts are treated as modulo and types are all power-of-2 so
1138   // we only care about the lowest log2 bits anyway.
1139   if (Amt->getType() != Ty) {
1140     unsigned NumElts = Ty->getVectorNumElements();
1141     Amt = Builder.CreateIntCast(Amt, Ty->getScalarType(), false);
1142     Amt = Builder.CreateVectorSplat(NumElts, Amt);
1143   }
1144 
1145   Intrinsic::ID IID = IsShiftRight ? Intrinsic::fshr : Intrinsic::fshl;
1146   Function *Intrin = Intrinsic::getDeclaration(CI.getModule(), IID, Ty);
1147   Value *Res = Builder.CreateCall(Intrin, {Op0, Op1, Amt});
1148 
1149   unsigned NumArgs = CI.getNumArgOperands();
1150   if (NumArgs >= 4) { // For masked intrinsics.
1151     Value *VecSrc = NumArgs == 5 ? CI.getArgOperand(3) :
1152                     ZeroMask     ? ConstantAggregateZero::get(CI.getType()) :
1153                                    CI.getArgOperand(0);
1154     Value *Mask = CI.getOperand(NumArgs - 1);
1155     Res = EmitX86Select(Builder, Mask, Res, VecSrc);
1156   }
1157   return Res;
1158 }
1159 
1160 static Value *UpgradeMaskedStore(IRBuilder<> &Builder,
1161                                  Value *Ptr, Value *Data, Value *Mask,
1162                                  bool Aligned) {
1163   // Cast the pointer to the right type.
1164   Ptr = Builder.CreateBitCast(Ptr,
1165                               llvm::PointerType::getUnqual(Data->getType()));
1166   unsigned Align =
1167     Aligned ? cast<VectorType>(Data->getType())->getBitWidth() / 8 : 1;
1168 
1169   // If the mask is all ones just emit a regular store.
1170   if (const auto *C = dyn_cast<Constant>(Mask))
1171     if (C->isAllOnesValue())
1172       return Builder.CreateAlignedStore(Data, Ptr, Align);
1173 
1174   // Convert the mask from an integer type to a vector of i1.
1175   unsigned NumElts = Data->getType()->getVectorNumElements();
1176   Mask = getX86MaskVec(Builder, Mask, NumElts);
1177   return Builder.CreateMaskedStore(Data, Ptr, Align, Mask);
1178 }
1179 
1180 static Value *UpgradeMaskedLoad(IRBuilder<> &Builder,
1181                                 Value *Ptr, Value *Passthru, Value *Mask,
1182                                 bool Aligned) {
1183   Type *ValTy = Passthru->getType();
1184   // Cast the pointer to the right type.
1185   Ptr = Builder.CreateBitCast(Ptr, llvm::PointerType::getUnqual(ValTy));
1186   unsigned Align =
1187     Aligned ? cast<VectorType>(Passthru->getType())->getBitWidth() / 8 : 1;
1188 
1189   // If the mask is all ones just emit a regular store.
1190   if (const auto *C = dyn_cast<Constant>(Mask))
1191     if (C->isAllOnesValue())
1192       return Builder.CreateAlignedLoad(ValTy, Ptr, Align);
1193 
1194   // Convert the mask from an integer type to a vector of i1.
1195   unsigned NumElts = Passthru->getType()->getVectorNumElements();
1196   Mask = getX86MaskVec(Builder, Mask, NumElts);
1197   return Builder.CreateMaskedLoad(Ptr, Align, Mask, Passthru);
1198 }
1199 
1200 static Value *upgradeAbs(IRBuilder<> &Builder, CallInst &CI) {
1201   Value *Op0 = CI.getArgOperand(0);
1202   llvm::Type *Ty = Op0->getType();
1203   Value *Zero = llvm::Constant::getNullValue(Ty);
1204   Value *Cmp = Builder.CreateICmp(ICmpInst::ICMP_SGT, Op0, Zero);
1205   Value *Neg = Builder.CreateNeg(Op0);
1206   Value *Res = Builder.CreateSelect(Cmp, Op0, Neg);
1207 
1208   if (CI.getNumArgOperands() == 3)
1209     Res = EmitX86Select(Builder,CI.getArgOperand(2), Res, CI.getArgOperand(1));
1210 
1211   return Res;
1212 }
1213 
1214 static Value *upgradeIntMinMax(IRBuilder<> &Builder, CallInst &CI,
1215                                ICmpInst::Predicate Pred) {
1216   Value *Op0 = CI.getArgOperand(0);
1217   Value *Op1 = CI.getArgOperand(1);
1218   Value *Cmp = Builder.CreateICmp(Pred, Op0, Op1);
1219   Value *Res = Builder.CreateSelect(Cmp, Op0, Op1);
1220 
1221   if (CI.getNumArgOperands() == 4)
1222     Res = EmitX86Select(Builder, CI.getArgOperand(3), Res, CI.getArgOperand(2));
1223 
1224   return Res;
1225 }
1226 
1227 static Value *upgradePMULDQ(IRBuilder<> &Builder, CallInst &CI, bool IsSigned) {
1228   Type *Ty = CI.getType();
1229 
1230   // Arguments have a vXi32 type so cast to vXi64.
1231   Value *LHS = Builder.CreateBitCast(CI.getArgOperand(0), Ty);
1232   Value *RHS = Builder.CreateBitCast(CI.getArgOperand(1), Ty);
1233 
1234   if (IsSigned) {
1235     // Shift left then arithmetic shift right.
1236     Constant *ShiftAmt = ConstantInt::get(Ty, 32);
1237     LHS = Builder.CreateShl(LHS, ShiftAmt);
1238     LHS = Builder.CreateAShr(LHS, ShiftAmt);
1239     RHS = Builder.CreateShl(RHS, ShiftAmt);
1240     RHS = Builder.CreateAShr(RHS, ShiftAmt);
1241   } else {
1242     // Clear the upper bits.
1243     Constant *Mask = ConstantInt::get(Ty, 0xffffffff);
1244     LHS = Builder.CreateAnd(LHS, Mask);
1245     RHS = Builder.CreateAnd(RHS, Mask);
1246   }
1247 
1248   Value *Res = Builder.CreateMul(LHS, RHS);
1249 
1250   if (CI.getNumArgOperands() == 4)
1251     Res = EmitX86Select(Builder, CI.getArgOperand(3), Res, CI.getArgOperand(2));
1252 
1253   return Res;
1254 }
1255 
1256 // Applying mask on vector of i1's and make sure result is at least 8 bits wide.
1257 static Value *ApplyX86MaskOn1BitsVec(IRBuilder<> &Builder, Value *Vec,
1258                                      Value *Mask) {
1259   unsigned NumElts = Vec->getType()->getVectorNumElements();
1260   if (Mask) {
1261     const auto *C = dyn_cast<Constant>(Mask);
1262     if (!C || !C->isAllOnesValue())
1263       Vec = Builder.CreateAnd(Vec, getX86MaskVec(Builder, Mask, NumElts));
1264   }
1265 
1266   if (NumElts < 8) {
1267     uint32_t Indices[8];
1268     for (unsigned i = 0; i != NumElts; ++i)
1269       Indices[i] = i;
1270     for (unsigned i = NumElts; i != 8; ++i)
1271       Indices[i] = NumElts + i % NumElts;
1272     Vec = Builder.CreateShuffleVector(Vec,
1273                                       Constant::getNullValue(Vec->getType()),
1274                                       Indices);
1275   }
1276   return Builder.CreateBitCast(Vec, Builder.getIntNTy(std::max(NumElts, 8U)));
1277 }
1278 
1279 static Value *upgradeMaskedCompare(IRBuilder<> &Builder, CallInst &CI,
1280                                    unsigned CC, bool Signed) {
1281   Value *Op0 = CI.getArgOperand(0);
1282   unsigned NumElts = Op0->getType()->getVectorNumElements();
1283 
1284   Value *Cmp;
1285   if (CC == 3) {
1286     Cmp = Constant::getNullValue(llvm::VectorType::get(Builder.getInt1Ty(), NumElts));
1287   } else if (CC == 7) {
1288     Cmp = Constant::getAllOnesValue(llvm::VectorType::get(Builder.getInt1Ty(), NumElts));
1289   } else {
1290     ICmpInst::Predicate Pred;
1291     switch (CC) {
1292     default: llvm_unreachable("Unknown condition code");
1293     case 0: Pred = ICmpInst::ICMP_EQ;  break;
1294     case 1: Pred = Signed ? ICmpInst::ICMP_SLT : ICmpInst::ICMP_ULT; break;
1295     case 2: Pred = Signed ? ICmpInst::ICMP_SLE : ICmpInst::ICMP_ULE; break;
1296     case 4: Pred = ICmpInst::ICMP_NE;  break;
1297     case 5: Pred = Signed ? ICmpInst::ICMP_SGE : ICmpInst::ICMP_UGE; break;
1298     case 6: Pred = Signed ? ICmpInst::ICMP_SGT : ICmpInst::ICMP_UGT; break;
1299     }
1300     Cmp = Builder.CreateICmp(Pred, Op0, CI.getArgOperand(1));
1301   }
1302 
1303   Value *Mask = CI.getArgOperand(CI.getNumArgOperands() - 1);
1304 
1305   return ApplyX86MaskOn1BitsVec(Builder, Cmp, Mask);
1306 }
1307 
1308 // Replace a masked intrinsic with an older unmasked intrinsic.
1309 static Value *UpgradeX86MaskedShift(IRBuilder<> &Builder, CallInst &CI,
1310                                     Intrinsic::ID IID) {
1311   Function *Intrin = Intrinsic::getDeclaration(CI.getModule(), IID);
1312   Value *Rep = Builder.CreateCall(Intrin,
1313                                  { CI.getArgOperand(0), CI.getArgOperand(1) });
1314   return EmitX86Select(Builder, CI.getArgOperand(3), Rep, CI.getArgOperand(2));
1315 }
1316 
1317 static Value* upgradeMaskedMove(IRBuilder<> &Builder, CallInst &CI) {
1318   Value* A = CI.getArgOperand(0);
1319   Value* B = CI.getArgOperand(1);
1320   Value* Src = CI.getArgOperand(2);
1321   Value* Mask = CI.getArgOperand(3);
1322 
1323   Value* AndNode = Builder.CreateAnd(Mask, APInt(8, 1));
1324   Value* Cmp = Builder.CreateIsNotNull(AndNode);
1325   Value* Extract1 = Builder.CreateExtractElement(B, (uint64_t)0);
1326   Value* Extract2 = Builder.CreateExtractElement(Src, (uint64_t)0);
1327   Value* Select = Builder.CreateSelect(Cmp, Extract1, Extract2);
1328   return Builder.CreateInsertElement(A, Select, (uint64_t)0);
1329 }
1330 
1331 
1332 static Value* UpgradeMaskToInt(IRBuilder<> &Builder, CallInst &CI) {
1333   Value* Op = CI.getArgOperand(0);
1334   Type* ReturnOp = CI.getType();
1335   unsigned NumElts = CI.getType()->getVectorNumElements();
1336   Value *Mask = getX86MaskVec(Builder, Op, NumElts);
1337   return Builder.CreateSExt(Mask, ReturnOp, "vpmovm2");
1338 }
1339 
1340 // Replace intrinsic with unmasked version and a select.
1341 static bool upgradeAVX512MaskToSelect(StringRef Name, IRBuilder<> &Builder,
1342                                       CallInst &CI, Value *&Rep) {
1343   Name = Name.substr(12); // Remove avx512.mask.
1344 
1345   unsigned VecWidth = CI.getType()->getPrimitiveSizeInBits();
1346   unsigned EltWidth = CI.getType()->getScalarSizeInBits();
1347   Intrinsic::ID IID;
1348   if (Name.startswith("max.p")) {
1349     if (VecWidth == 128 && EltWidth == 32)
1350       IID = Intrinsic::x86_sse_max_ps;
1351     else if (VecWidth == 128 && EltWidth == 64)
1352       IID = Intrinsic::x86_sse2_max_pd;
1353     else if (VecWidth == 256 && EltWidth == 32)
1354       IID = Intrinsic::x86_avx_max_ps_256;
1355     else if (VecWidth == 256 && EltWidth == 64)
1356       IID = Intrinsic::x86_avx_max_pd_256;
1357     else
1358       llvm_unreachable("Unexpected intrinsic");
1359   } else if (Name.startswith("min.p")) {
1360     if (VecWidth == 128 && EltWidth == 32)
1361       IID = Intrinsic::x86_sse_min_ps;
1362     else if (VecWidth == 128 && EltWidth == 64)
1363       IID = Intrinsic::x86_sse2_min_pd;
1364     else if (VecWidth == 256 && EltWidth == 32)
1365       IID = Intrinsic::x86_avx_min_ps_256;
1366     else if (VecWidth == 256 && EltWidth == 64)
1367       IID = Intrinsic::x86_avx_min_pd_256;
1368     else
1369       llvm_unreachable("Unexpected intrinsic");
1370   } else if (Name.startswith("pshuf.b.")) {
1371     if (VecWidth == 128)
1372       IID = Intrinsic::x86_ssse3_pshuf_b_128;
1373     else if (VecWidth == 256)
1374       IID = Intrinsic::x86_avx2_pshuf_b;
1375     else if (VecWidth == 512)
1376       IID = Intrinsic::x86_avx512_pshuf_b_512;
1377     else
1378       llvm_unreachable("Unexpected intrinsic");
1379   } else if (Name.startswith("pmul.hr.sw.")) {
1380     if (VecWidth == 128)
1381       IID = Intrinsic::x86_ssse3_pmul_hr_sw_128;
1382     else if (VecWidth == 256)
1383       IID = Intrinsic::x86_avx2_pmul_hr_sw;
1384     else if (VecWidth == 512)
1385       IID = Intrinsic::x86_avx512_pmul_hr_sw_512;
1386     else
1387       llvm_unreachable("Unexpected intrinsic");
1388   } else if (Name.startswith("pmulh.w.")) {
1389     if (VecWidth == 128)
1390       IID = Intrinsic::x86_sse2_pmulh_w;
1391     else if (VecWidth == 256)
1392       IID = Intrinsic::x86_avx2_pmulh_w;
1393     else if (VecWidth == 512)
1394       IID = Intrinsic::x86_avx512_pmulh_w_512;
1395     else
1396       llvm_unreachable("Unexpected intrinsic");
1397   } else if (Name.startswith("pmulhu.w.")) {
1398     if (VecWidth == 128)
1399       IID = Intrinsic::x86_sse2_pmulhu_w;
1400     else if (VecWidth == 256)
1401       IID = Intrinsic::x86_avx2_pmulhu_w;
1402     else if (VecWidth == 512)
1403       IID = Intrinsic::x86_avx512_pmulhu_w_512;
1404     else
1405       llvm_unreachable("Unexpected intrinsic");
1406   } else if (Name.startswith("pmaddw.d.")) {
1407     if (VecWidth == 128)
1408       IID = Intrinsic::x86_sse2_pmadd_wd;
1409     else if (VecWidth == 256)
1410       IID = Intrinsic::x86_avx2_pmadd_wd;
1411     else if (VecWidth == 512)
1412       IID = Intrinsic::x86_avx512_pmaddw_d_512;
1413     else
1414       llvm_unreachable("Unexpected intrinsic");
1415   } else if (Name.startswith("pmaddubs.w.")) {
1416     if (VecWidth == 128)
1417       IID = Intrinsic::x86_ssse3_pmadd_ub_sw_128;
1418     else if (VecWidth == 256)
1419       IID = Intrinsic::x86_avx2_pmadd_ub_sw;
1420     else if (VecWidth == 512)
1421       IID = Intrinsic::x86_avx512_pmaddubs_w_512;
1422     else
1423       llvm_unreachable("Unexpected intrinsic");
1424   } else if (Name.startswith("packsswb.")) {
1425     if (VecWidth == 128)
1426       IID = Intrinsic::x86_sse2_packsswb_128;
1427     else if (VecWidth == 256)
1428       IID = Intrinsic::x86_avx2_packsswb;
1429     else if (VecWidth == 512)
1430       IID = Intrinsic::x86_avx512_packsswb_512;
1431     else
1432       llvm_unreachable("Unexpected intrinsic");
1433   } else if (Name.startswith("packssdw.")) {
1434     if (VecWidth == 128)
1435       IID = Intrinsic::x86_sse2_packssdw_128;
1436     else if (VecWidth == 256)
1437       IID = Intrinsic::x86_avx2_packssdw;
1438     else if (VecWidth == 512)
1439       IID = Intrinsic::x86_avx512_packssdw_512;
1440     else
1441       llvm_unreachable("Unexpected intrinsic");
1442   } else if (Name.startswith("packuswb.")) {
1443     if (VecWidth == 128)
1444       IID = Intrinsic::x86_sse2_packuswb_128;
1445     else if (VecWidth == 256)
1446       IID = Intrinsic::x86_avx2_packuswb;
1447     else if (VecWidth == 512)
1448       IID = Intrinsic::x86_avx512_packuswb_512;
1449     else
1450       llvm_unreachable("Unexpected intrinsic");
1451   } else if (Name.startswith("packusdw.")) {
1452     if (VecWidth == 128)
1453       IID = Intrinsic::x86_sse41_packusdw;
1454     else if (VecWidth == 256)
1455       IID = Intrinsic::x86_avx2_packusdw;
1456     else if (VecWidth == 512)
1457       IID = Intrinsic::x86_avx512_packusdw_512;
1458     else
1459       llvm_unreachable("Unexpected intrinsic");
1460   } else if (Name.startswith("vpermilvar.")) {
1461     if (VecWidth == 128 && EltWidth == 32)
1462       IID = Intrinsic::x86_avx_vpermilvar_ps;
1463     else if (VecWidth == 128 && EltWidth == 64)
1464       IID = Intrinsic::x86_avx_vpermilvar_pd;
1465     else if (VecWidth == 256 && EltWidth == 32)
1466       IID = Intrinsic::x86_avx_vpermilvar_ps_256;
1467     else if (VecWidth == 256 && EltWidth == 64)
1468       IID = Intrinsic::x86_avx_vpermilvar_pd_256;
1469     else if (VecWidth == 512 && EltWidth == 32)
1470       IID = Intrinsic::x86_avx512_vpermilvar_ps_512;
1471     else if (VecWidth == 512 && EltWidth == 64)
1472       IID = Intrinsic::x86_avx512_vpermilvar_pd_512;
1473     else
1474       llvm_unreachable("Unexpected intrinsic");
1475   } else if (Name == "cvtpd2dq.256") {
1476     IID = Intrinsic::x86_avx_cvt_pd2dq_256;
1477   } else if (Name == "cvtpd2ps.256") {
1478     IID = Intrinsic::x86_avx_cvt_pd2_ps_256;
1479   } else if (Name == "cvttpd2dq.256") {
1480     IID = Intrinsic::x86_avx_cvtt_pd2dq_256;
1481   } else if (Name == "cvttps2dq.128") {
1482     IID = Intrinsic::x86_sse2_cvttps2dq;
1483   } else if (Name == "cvttps2dq.256") {
1484     IID = Intrinsic::x86_avx_cvtt_ps2dq_256;
1485   } else if (Name.startswith("permvar.")) {
1486     bool IsFloat = CI.getType()->isFPOrFPVectorTy();
1487     if (VecWidth == 256 && EltWidth == 32 && IsFloat)
1488       IID = Intrinsic::x86_avx2_permps;
1489     else if (VecWidth == 256 && EltWidth == 32 && !IsFloat)
1490       IID = Intrinsic::x86_avx2_permd;
1491     else if (VecWidth == 256 && EltWidth == 64 && IsFloat)
1492       IID = Intrinsic::x86_avx512_permvar_df_256;
1493     else if (VecWidth == 256 && EltWidth == 64 && !IsFloat)
1494       IID = Intrinsic::x86_avx512_permvar_di_256;
1495     else if (VecWidth == 512 && EltWidth == 32 && IsFloat)
1496       IID = Intrinsic::x86_avx512_permvar_sf_512;
1497     else if (VecWidth == 512 && EltWidth == 32 && !IsFloat)
1498       IID = Intrinsic::x86_avx512_permvar_si_512;
1499     else if (VecWidth == 512 && EltWidth == 64 && IsFloat)
1500       IID = Intrinsic::x86_avx512_permvar_df_512;
1501     else if (VecWidth == 512 && EltWidth == 64 && !IsFloat)
1502       IID = Intrinsic::x86_avx512_permvar_di_512;
1503     else if (VecWidth == 128 && EltWidth == 16)
1504       IID = Intrinsic::x86_avx512_permvar_hi_128;
1505     else if (VecWidth == 256 && EltWidth == 16)
1506       IID = Intrinsic::x86_avx512_permvar_hi_256;
1507     else if (VecWidth == 512 && EltWidth == 16)
1508       IID = Intrinsic::x86_avx512_permvar_hi_512;
1509     else if (VecWidth == 128 && EltWidth == 8)
1510       IID = Intrinsic::x86_avx512_permvar_qi_128;
1511     else if (VecWidth == 256 && EltWidth == 8)
1512       IID = Intrinsic::x86_avx512_permvar_qi_256;
1513     else if (VecWidth == 512 && EltWidth == 8)
1514       IID = Intrinsic::x86_avx512_permvar_qi_512;
1515     else
1516       llvm_unreachable("Unexpected intrinsic");
1517   } else if (Name.startswith("dbpsadbw.")) {
1518     if (VecWidth == 128)
1519       IID = Intrinsic::x86_avx512_dbpsadbw_128;
1520     else if (VecWidth == 256)
1521       IID = Intrinsic::x86_avx512_dbpsadbw_256;
1522     else if (VecWidth == 512)
1523       IID = Intrinsic::x86_avx512_dbpsadbw_512;
1524     else
1525       llvm_unreachable("Unexpected intrinsic");
1526   } else if (Name.startswith("pmultishift.qb.")) {
1527     if (VecWidth == 128)
1528       IID = Intrinsic::x86_avx512_pmultishift_qb_128;
1529     else if (VecWidth == 256)
1530       IID = Intrinsic::x86_avx512_pmultishift_qb_256;
1531     else if (VecWidth == 512)
1532       IID = Intrinsic::x86_avx512_pmultishift_qb_512;
1533     else
1534       llvm_unreachable("Unexpected intrinsic");
1535   } else if (Name.startswith("conflict.")) {
1536     if (Name[9] == 'd' && VecWidth == 128)
1537       IID = Intrinsic::x86_avx512_conflict_d_128;
1538     else if (Name[9] == 'd' && VecWidth == 256)
1539       IID = Intrinsic::x86_avx512_conflict_d_256;
1540     else if (Name[9] == 'd' && VecWidth == 512)
1541       IID = Intrinsic::x86_avx512_conflict_d_512;
1542     else if (Name[9] == 'q' && VecWidth == 128)
1543       IID = Intrinsic::x86_avx512_conflict_q_128;
1544     else if (Name[9] == 'q' && VecWidth == 256)
1545       IID = Intrinsic::x86_avx512_conflict_q_256;
1546     else if (Name[9] == 'q' && VecWidth == 512)
1547       IID = Intrinsic::x86_avx512_conflict_q_512;
1548     else
1549       llvm_unreachable("Unexpected intrinsic");
1550   } else
1551     return false;
1552 
1553   SmallVector<Value *, 4> Args(CI.arg_operands().begin(),
1554                                CI.arg_operands().end());
1555   Args.pop_back();
1556   Args.pop_back();
1557   Rep = Builder.CreateCall(Intrinsic::getDeclaration(CI.getModule(), IID),
1558                            Args);
1559   unsigned NumArgs = CI.getNumArgOperands();
1560   Rep = EmitX86Select(Builder, CI.getArgOperand(NumArgs - 1), Rep,
1561                       CI.getArgOperand(NumArgs - 2));
1562   return true;
1563 }
1564 
1565 /// Upgrade comment in call to inline asm that represents an objc retain release
1566 /// marker.
1567 void llvm::UpgradeInlineAsmString(std::string *AsmStr) {
1568   size_t Pos;
1569   if (AsmStr->find("mov\tfp") == 0 &&
1570       AsmStr->find("objc_retainAutoreleaseReturnValue") != std::string::npos &&
1571       (Pos = AsmStr->find("# marker")) != std::string::npos) {
1572     AsmStr->replace(Pos, 1, ";");
1573   }
1574   return;
1575 }
1576 
1577 /// Upgrade a call to an old intrinsic. All argument and return casting must be
1578 /// provided to seamlessly integrate with existing context.
1579 void llvm::UpgradeIntrinsicCall(CallInst *CI, Function *NewFn) {
1580   Function *F = CI->getCalledFunction();
1581   LLVMContext &C = CI->getContext();
1582   IRBuilder<> Builder(C);
1583   Builder.SetInsertPoint(CI->getParent(), CI->getIterator());
1584 
1585   assert(F && "Intrinsic call is not direct?");
1586 
1587   if (!NewFn) {
1588     // Get the Function's name.
1589     StringRef Name = F->getName();
1590 
1591     // clang.arc.use is an old name for llvm.arc.clang.arc.use. It is dropped
1592     // from upgrader because the optimizer now only recognizes intrinsics for
1593     // ARC runtime calls.
1594     if (Name == "clang.arc.use") {
1595       CI->eraseFromParent();
1596       return;
1597     }
1598 
1599     assert(Name.startswith("llvm.") && "Intrinsic doesn't start with 'llvm.'");
1600     Name = Name.substr(5);
1601 
1602     bool IsX86 = Name.startswith("x86.");
1603     if (IsX86)
1604       Name = Name.substr(4);
1605     bool IsNVVM = Name.startswith("nvvm.");
1606     if (IsNVVM)
1607       Name = Name.substr(5);
1608 
1609     if (IsX86 && Name.startswith("sse4a.movnt.")) {
1610       Module *M = F->getParent();
1611       SmallVector<Metadata *, 1> Elts;
1612       Elts.push_back(
1613           ConstantAsMetadata::get(ConstantInt::get(Type::getInt32Ty(C), 1)));
1614       MDNode *Node = MDNode::get(C, Elts);
1615 
1616       Value *Arg0 = CI->getArgOperand(0);
1617       Value *Arg1 = CI->getArgOperand(1);
1618 
1619       // Nontemporal (unaligned) store of the 0'th element of the float/double
1620       // vector.
1621       Type *SrcEltTy = cast<VectorType>(Arg1->getType())->getElementType();
1622       PointerType *EltPtrTy = PointerType::getUnqual(SrcEltTy);
1623       Value *Addr = Builder.CreateBitCast(Arg0, EltPtrTy, "cast");
1624       Value *Extract =
1625           Builder.CreateExtractElement(Arg1, (uint64_t)0, "extractelement");
1626 
1627       StoreInst *SI = Builder.CreateAlignedStore(Extract, Addr, 1);
1628       SI->setMetadata(M->getMDKindID("nontemporal"), Node);
1629 
1630       // Remove intrinsic.
1631       CI->eraseFromParent();
1632       return;
1633     }
1634 
1635     if (IsX86 && (Name.startswith("avx.movnt.") ||
1636                   Name.startswith("avx512.storent."))) {
1637       Module *M = F->getParent();
1638       SmallVector<Metadata *, 1> Elts;
1639       Elts.push_back(
1640           ConstantAsMetadata::get(ConstantInt::get(Type::getInt32Ty(C), 1)));
1641       MDNode *Node = MDNode::get(C, Elts);
1642 
1643       Value *Arg0 = CI->getArgOperand(0);
1644       Value *Arg1 = CI->getArgOperand(1);
1645 
1646       // Convert the type of the pointer to a pointer to the stored type.
1647       Value *BC = Builder.CreateBitCast(Arg0,
1648                                         PointerType::getUnqual(Arg1->getType()),
1649                                         "cast");
1650       VectorType *VTy = cast<VectorType>(Arg1->getType());
1651       StoreInst *SI = Builder.CreateAlignedStore(Arg1, BC,
1652                                                  VTy->getBitWidth() / 8);
1653       SI->setMetadata(M->getMDKindID("nontemporal"), Node);
1654 
1655       // Remove intrinsic.
1656       CI->eraseFromParent();
1657       return;
1658     }
1659 
1660     if (IsX86 && Name == "sse2.storel.dq") {
1661       Value *Arg0 = CI->getArgOperand(0);
1662       Value *Arg1 = CI->getArgOperand(1);
1663 
1664       Type *NewVecTy = VectorType::get(Type::getInt64Ty(C), 2);
1665       Value *BC0 = Builder.CreateBitCast(Arg1, NewVecTy, "cast");
1666       Value *Elt = Builder.CreateExtractElement(BC0, (uint64_t)0);
1667       Value *BC = Builder.CreateBitCast(Arg0,
1668                                         PointerType::getUnqual(Elt->getType()),
1669                                         "cast");
1670       Builder.CreateAlignedStore(Elt, BC, 1);
1671 
1672       // Remove intrinsic.
1673       CI->eraseFromParent();
1674       return;
1675     }
1676 
1677     if (IsX86 && (Name.startswith("sse.storeu.") ||
1678                   Name.startswith("sse2.storeu.") ||
1679                   Name.startswith("avx.storeu."))) {
1680       Value *Arg0 = CI->getArgOperand(0);
1681       Value *Arg1 = CI->getArgOperand(1);
1682 
1683       Arg0 = Builder.CreateBitCast(Arg0,
1684                                    PointerType::getUnqual(Arg1->getType()),
1685                                    "cast");
1686       Builder.CreateAlignedStore(Arg1, Arg0, 1);
1687 
1688       // Remove intrinsic.
1689       CI->eraseFromParent();
1690       return;
1691     }
1692 
1693     if (IsX86 && Name == "avx512.mask.store.ss") {
1694       Value *Mask = Builder.CreateAnd(CI->getArgOperand(2), Builder.getInt8(1));
1695       UpgradeMaskedStore(Builder, CI->getArgOperand(0), CI->getArgOperand(1),
1696                          Mask, false);
1697 
1698       // Remove intrinsic.
1699       CI->eraseFromParent();
1700       return;
1701     }
1702 
1703     if (IsX86 && (Name.startswith("avx512.mask.store"))) {
1704       // "avx512.mask.storeu." or "avx512.mask.store."
1705       bool Aligned = Name[17] != 'u'; // "avx512.mask.storeu".
1706       UpgradeMaskedStore(Builder, CI->getArgOperand(0), CI->getArgOperand(1),
1707                          CI->getArgOperand(2), Aligned);
1708 
1709       // Remove intrinsic.
1710       CI->eraseFromParent();
1711       return;
1712     }
1713 
1714     Value *Rep;
1715     // Upgrade packed integer vector compare intrinsics to compare instructions.
1716     if (IsX86 && (Name.startswith("sse2.pcmp") ||
1717                   Name.startswith("avx2.pcmp"))) {
1718       // "sse2.pcpmpeq." "sse2.pcmpgt." "avx2.pcmpeq." or "avx2.pcmpgt."
1719       bool CmpEq = Name[9] == 'e';
1720       Rep = Builder.CreateICmp(CmpEq ? ICmpInst::ICMP_EQ : ICmpInst::ICMP_SGT,
1721                                CI->getArgOperand(0), CI->getArgOperand(1));
1722       Rep = Builder.CreateSExt(Rep, CI->getType(), "");
1723     } else if (IsX86 && (Name.startswith("avx512.broadcastm"))) {
1724       Type *ExtTy = Type::getInt32Ty(C);
1725       if (CI->getOperand(0)->getType()->isIntegerTy(8))
1726         ExtTy = Type::getInt64Ty(C);
1727       unsigned NumElts = CI->getType()->getPrimitiveSizeInBits() /
1728                          ExtTy->getPrimitiveSizeInBits();
1729       Rep = Builder.CreateZExt(CI->getArgOperand(0), ExtTy);
1730       Rep = Builder.CreateVectorSplat(NumElts, Rep);
1731     } else if (IsX86 && (Name == "sse.sqrt.ss" ||
1732                          Name == "sse2.sqrt.sd")) {
1733       Value *Vec = CI->getArgOperand(0);
1734       Value *Elt0 = Builder.CreateExtractElement(Vec, (uint64_t)0);
1735       Function *Intr = Intrinsic::getDeclaration(F->getParent(),
1736                                                  Intrinsic::sqrt, Elt0->getType());
1737       Elt0 = Builder.CreateCall(Intr, Elt0);
1738       Rep = Builder.CreateInsertElement(Vec, Elt0, (uint64_t)0);
1739     } else if (IsX86 && (Name.startswith("avx.sqrt.p") ||
1740                          Name.startswith("sse2.sqrt.p") ||
1741                          Name.startswith("sse.sqrt.p"))) {
1742       Rep = Builder.CreateCall(Intrinsic::getDeclaration(F->getParent(),
1743                                                          Intrinsic::sqrt,
1744                                                          CI->getType()),
1745                                {CI->getArgOperand(0)});
1746     } else if (IsX86 && (Name.startswith("avx512.mask.sqrt.p"))) {
1747       if (CI->getNumArgOperands() == 4 &&
1748           (!isa<ConstantInt>(CI->getArgOperand(3)) ||
1749            cast<ConstantInt>(CI->getArgOperand(3))->getZExtValue() != 4)) {
1750         Intrinsic::ID IID = Name[18] == 's' ? Intrinsic::x86_avx512_sqrt_ps_512
1751                                             : Intrinsic::x86_avx512_sqrt_pd_512;
1752 
1753         Value *Args[] = { CI->getArgOperand(0), CI->getArgOperand(3) };
1754         Rep = Builder.CreateCall(Intrinsic::getDeclaration(CI->getModule(),
1755                                                            IID), Args);
1756       } else {
1757         Rep = Builder.CreateCall(Intrinsic::getDeclaration(F->getParent(),
1758                                                            Intrinsic::sqrt,
1759                                                            CI->getType()),
1760                                  {CI->getArgOperand(0)});
1761       }
1762       Rep = EmitX86Select(Builder, CI->getArgOperand(2), Rep,
1763                           CI->getArgOperand(1));
1764     } else if (IsX86 && (Name.startswith("avx512.ptestm") ||
1765                          Name.startswith("avx512.ptestnm"))) {
1766       Value *Op0 = CI->getArgOperand(0);
1767       Value *Op1 = CI->getArgOperand(1);
1768       Value *Mask = CI->getArgOperand(2);
1769       Rep = Builder.CreateAnd(Op0, Op1);
1770       llvm::Type *Ty = Op0->getType();
1771       Value *Zero = llvm::Constant::getNullValue(Ty);
1772       ICmpInst::Predicate Pred =
1773         Name.startswith("avx512.ptestm") ? ICmpInst::ICMP_NE : ICmpInst::ICMP_EQ;
1774       Rep = Builder.CreateICmp(Pred, Rep, Zero);
1775       Rep = ApplyX86MaskOn1BitsVec(Builder, Rep, Mask);
1776     } else if (IsX86 && (Name.startswith("avx512.mask.pbroadcast"))){
1777       unsigned NumElts =
1778           CI->getArgOperand(1)->getType()->getVectorNumElements();
1779       Rep = Builder.CreateVectorSplat(NumElts, CI->getArgOperand(0));
1780       Rep = EmitX86Select(Builder, CI->getArgOperand(2), Rep,
1781                           CI->getArgOperand(1));
1782     } else if (IsX86 && (Name.startswith("avx512.kunpck"))) {
1783       unsigned NumElts = CI->getType()->getScalarSizeInBits();
1784       Value *LHS = getX86MaskVec(Builder, CI->getArgOperand(0), NumElts);
1785       Value *RHS = getX86MaskVec(Builder, CI->getArgOperand(1), NumElts);
1786       uint32_t Indices[64];
1787       for (unsigned i = 0; i != NumElts; ++i)
1788         Indices[i] = i;
1789 
1790       // First extract half of each vector. This gives better codegen than
1791       // doing it in a single shuffle.
1792       LHS = Builder.CreateShuffleVector(LHS, LHS,
1793                                         makeArrayRef(Indices, NumElts / 2));
1794       RHS = Builder.CreateShuffleVector(RHS, RHS,
1795                                         makeArrayRef(Indices, NumElts / 2));
1796       // Concat the vectors.
1797       // NOTE: Operands have to be swapped to match intrinsic definition.
1798       Rep = Builder.CreateShuffleVector(RHS, LHS,
1799                                         makeArrayRef(Indices, NumElts));
1800       Rep = Builder.CreateBitCast(Rep, CI->getType());
1801     } else if (IsX86 && Name == "avx512.kand.w") {
1802       Value *LHS = getX86MaskVec(Builder, CI->getArgOperand(0), 16);
1803       Value *RHS = getX86MaskVec(Builder, CI->getArgOperand(1), 16);
1804       Rep = Builder.CreateAnd(LHS, RHS);
1805       Rep = Builder.CreateBitCast(Rep, CI->getType());
1806     } else if (IsX86 && Name == "avx512.kandn.w") {
1807       Value *LHS = getX86MaskVec(Builder, CI->getArgOperand(0), 16);
1808       Value *RHS = getX86MaskVec(Builder, CI->getArgOperand(1), 16);
1809       LHS = Builder.CreateNot(LHS);
1810       Rep = Builder.CreateAnd(LHS, RHS);
1811       Rep = Builder.CreateBitCast(Rep, CI->getType());
1812     } else if (IsX86 && Name == "avx512.kor.w") {
1813       Value *LHS = getX86MaskVec(Builder, CI->getArgOperand(0), 16);
1814       Value *RHS = getX86MaskVec(Builder, CI->getArgOperand(1), 16);
1815       Rep = Builder.CreateOr(LHS, RHS);
1816       Rep = Builder.CreateBitCast(Rep, CI->getType());
1817     } else if (IsX86 && Name == "avx512.kxor.w") {
1818       Value *LHS = getX86MaskVec(Builder, CI->getArgOperand(0), 16);
1819       Value *RHS = getX86MaskVec(Builder, CI->getArgOperand(1), 16);
1820       Rep = Builder.CreateXor(LHS, RHS);
1821       Rep = Builder.CreateBitCast(Rep, CI->getType());
1822     } else if (IsX86 && Name == "avx512.kxnor.w") {
1823       Value *LHS = getX86MaskVec(Builder, CI->getArgOperand(0), 16);
1824       Value *RHS = getX86MaskVec(Builder, CI->getArgOperand(1), 16);
1825       LHS = Builder.CreateNot(LHS);
1826       Rep = Builder.CreateXor(LHS, RHS);
1827       Rep = Builder.CreateBitCast(Rep, CI->getType());
1828     } else if (IsX86 && Name == "avx512.knot.w") {
1829       Rep = getX86MaskVec(Builder, CI->getArgOperand(0), 16);
1830       Rep = Builder.CreateNot(Rep);
1831       Rep = Builder.CreateBitCast(Rep, CI->getType());
1832     } else if (IsX86 &&
1833                (Name == "avx512.kortestz.w" || Name == "avx512.kortestc.w")) {
1834       Value *LHS = getX86MaskVec(Builder, CI->getArgOperand(0), 16);
1835       Value *RHS = getX86MaskVec(Builder, CI->getArgOperand(1), 16);
1836       Rep = Builder.CreateOr(LHS, RHS);
1837       Rep = Builder.CreateBitCast(Rep, Builder.getInt16Ty());
1838       Value *C;
1839       if (Name[14] == 'c')
1840         C = ConstantInt::getAllOnesValue(Builder.getInt16Ty());
1841       else
1842         C = ConstantInt::getNullValue(Builder.getInt16Ty());
1843       Rep = Builder.CreateICmpEQ(Rep, C);
1844       Rep = Builder.CreateZExt(Rep, Builder.getInt32Ty());
1845     } else if (IsX86 && (Name == "sse.add.ss" || Name == "sse2.add.sd" ||
1846                          Name == "sse.sub.ss" || Name == "sse2.sub.sd" ||
1847                          Name == "sse.mul.ss" || Name == "sse2.mul.sd" ||
1848                          Name == "sse.div.ss" || Name == "sse2.div.sd")) {
1849       Type *I32Ty = Type::getInt32Ty(C);
1850       Value *Elt0 = Builder.CreateExtractElement(CI->getArgOperand(0),
1851                                                  ConstantInt::get(I32Ty, 0));
1852       Value *Elt1 = Builder.CreateExtractElement(CI->getArgOperand(1),
1853                                                  ConstantInt::get(I32Ty, 0));
1854       Value *EltOp;
1855       if (Name.contains(".add."))
1856         EltOp = Builder.CreateFAdd(Elt0, Elt1);
1857       else if (Name.contains(".sub."))
1858         EltOp = Builder.CreateFSub(Elt0, Elt1);
1859       else if (Name.contains(".mul."))
1860         EltOp = Builder.CreateFMul(Elt0, Elt1);
1861       else
1862         EltOp = Builder.CreateFDiv(Elt0, Elt1);
1863       Rep = Builder.CreateInsertElement(CI->getArgOperand(0), EltOp,
1864                                         ConstantInt::get(I32Ty, 0));
1865     } else if (IsX86 && Name.startswith("avx512.mask.pcmp")) {
1866       // "avx512.mask.pcmpeq." or "avx512.mask.pcmpgt."
1867       bool CmpEq = Name[16] == 'e';
1868       Rep = upgradeMaskedCompare(Builder, *CI, CmpEq ? 0 : 6, true);
1869     } else if (IsX86 && Name.startswith("avx512.mask.vpshufbitqmb.")) {
1870       Type *OpTy = CI->getArgOperand(0)->getType();
1871       unsigned VecWidth = OpTy->getPrimitiveSizeInBits();
1872       Intrinsic::ID IID;
1873       switch (VecWidth) {
1874       default: llvm_unreachable("Unexpected intrinsic");
1875       case 128: IID = Intrinsic::x86_avx512_vpshufbitqmb_128; break;
1876       case 256: IID = Intrinsic::x86_avx512_vpshufbitqmb_256; break;
1877       case 512: IID = Intrinsic::x86_avx512_vpshufbitqmb_512; break;
1878       }
1879 
1880       Rep = Builder.CreateCall(Intrinsic::getDeclaration(F->getParent(), IID),
1881                                { CI->getOperand(0), CI->getArgOperand(1) });
1882       Rep = ApplyX86MaskOn1BitsVec(Builder, Rep, CI->getArgOperand(2));
1883     } else if (IsX86 && Name.startswith("avx512.mask.fpclass.p")) {
1884       Type *OpTy = CI->getArgOperand(0)->getType();
1885       unsigned VecWidth = OpTy->getPrimitiveSizeInBits();
1886       unsigned EltWidth = OpTy->getScalarSizeInBits();
1887       Intrinsic::ID IID;
1888       if (VecWidth == 128 && EltWidth == 32)
1889         IID = Intrinsic::x86_avx512_fpclass_ps_128;
1890       else if (VecWidth == 256 && EltWidth == 32)
1891         IID = Intrinsic::x86_avx512_fpclass_ps_256;
1892       else if (VecWidth == 512 && EltWidth == 32)
1893         IID = Intrinsic::x86_avx512_fpclass_ps_512;
1894       else if (VecWidth == 128 && EltWidth == 64)
1895         IID = Intrinsic::x86_avx512_fpclass_pd_128;
1896       else if (VecWidth == 256 && EltWidth == 64)
1897         IID = Intrinsic::x86_avx512_fpclass_pd_256;
1898       else if (VecWidth == 512 && EltWidth == 64)
1899         IID = Intrinsic::x86_avx512_fpclass_pd_512;
1900       else
1901         llvm_unreachable("Unexpected intrinsic");
1902 
1903       Rep = Builder.CreateCall(Intrinsic::getDeclaration(F->getParent(), IID),
1904                                { CI->getOperand(0), CI->getArgOperand(1) });
1905       Rep = ApplyX86MaskOn1BitsVec(Builder, Rep, CI->getArgOperand(2));
1906     } else if (IsX86 && Name.startswith("avx512.mask.cmp.p")) {
1907       Type *OpTy = CI->getArgOperand(0)->getType();
1908       unsigned VecWidth = OpTy->getPrimitiveSizeInBits();
1909       unsigned EltWidth = OpTy->getScalarSizeInBits();
1910       Intrinsic::ID IID;
1911       if (VecWidth == 128 && EltWidth == 32)
1912         IID = Intrinsic::x86_avx512_cmp_ps_128;
1913       else if (VecWidth == 256 && EltWidth == 32)
1914         IID = Intrinsic::x86_avx512_cmp_ps_256;
1915       else if (VecWidth == 512 && EltWidth == 32)
1916         IID = Intrinsic::x86_avx512_cmp_ps_512;
1917       else if (VecWidth == 128 && EltWidth == 64)
1918         IID = Intrinsic::x86_avx512_cmp_pd_128;
1919       else if (VecWidth == 256 && EltWidth == 64)
1920         IID = Intrinsic::x86_avx512_cmp_pd_256;
1921       else if (VecWidth == 512 && EltWidth == 64)
1922         IID = Intrinsic::x86_avx512_cmp_pd_512;
1923       else
1924         llvm_unreachable("Unexpected intrinsic");
1925 
1926       SmallVector<Value *, 4> Args;
1927       Args.push_back(CI->getArgOperand(0));
1928       Args.push_back(CI->getArgOperand(1));
1929       Args.push_back(CI->getArgOperand(2));
1930       if (CI->getNumArgOperands() == 5)
1931         Args.push_back(CI->getArgOperand(4));
1932 
1933       Rep = Builder.CreateCall(Intrinsic::getDeclaration(F->getParent(), IID),
1934                                Args);
1935       Rep = ApplyX86MaskOn1BitsVec(Builder, Rep, CI->getArgOperand(3));
1936     } else if (IsX86 && Name.startswith("avx512.mask.cmp.") &&
1937                Name[16] != 'p') {
1938       // Integer compare intrinsics.
1939       unsigned Imm = cast<ConstantInt>(CI->getArgOperand(2))->getZExtValue();
1940       Rep = upgradeMaskedCompare(Builder, *CI, Imm, true);
1941     } else if (IsX86 && Name.startswith("avx512.mask.ucmp.")) {
1942       unsigned Imm = cast<ConstantInt>(CI->getArgOperand(2))->getZExtValue();
1943       Rep = upgradeMaskedCompare(Builder, *CI, Imm, false);
1944     } else if (IsX86 && (Name.startswith("avx512.cvtb2mask.") ||
1945                          Name.startswith("avx512.cvtw2mask.") ||
1946                          Name.startswith("avx512.cvtd2mask.") ||
1947                          Name.startswith("avx512.cvtq2mask."))) {
1948       Value *Op = CI->getArgOperand(0);
1949       Value *Zero = llvm::Constant::getNullValue(Op->getType());
1950       Rep = Builder.CreateICmp(ICmpInst::ICMP_SLT, Op, Zero);
1951       Rep = ApplyX86MaskOn1BitsVec(Builder, Rep, nullptr);
1952     } else if(IsX86 && (Name == "ssse3.pabs.b.128" ||
1953                         Name == "ssse3.pabs.w.128" ||
1954                         Name == "ssse3.pabs.d.128" ||
1955                         Name.startswith("avx2.pabs") ||
1956                         Name.startswith("avx512.mask.pabs"))) {
1957       Rep = upgradeAbs(Builder, *CI);
1958     } else if (IsX86 && (Name == "sse41.pmaxsb" ||
1959                          Name == "sse2.pmaxs.w" ||
1960                          Name == "sse41.pmaxsd" ||
1961                          Name.startswith("avx2.pmaxs") ||
1962                          Name.startswith("avx512.mask.pmaxs"))) {
1963       Rep = upgradeIntMinMax(Builder, *CI, ICmpInst::ICMP_SGT);
1964     } else if (IsX86 && (Name == "sse2.pmaxu.b" ||
1965                          Name == "sse41.pmaxuw" ||
1966                          Name == "sse41.pmaxud" ||
1967                          Name.startswith("avx2.pmaxu") ||
1968                          Name.startswith("avx512.mask.pmaxu"))) {
1969       Rep = upgradeIntMinMax(Builder, *CI, ICmpInst::ICMP_UGT);
1970     } else if (IsX86 && (Name == "sse41.pminsb" ||
1971                          Name == "sse2.pmins.w" ||
1972                          Name == "sse41.pminsd" ||
1973                          Name.startswith("avx2.pmins") ||
1974                          Name.startswith("avx512.mask.pmins"))) {
1975       Rep = upgradeIntMinMax(Builder, *CI, ICmpInst::ICMP_SLT);
1976     } else if (IsX86 && (Name == "sse2.pminu.b" ||
1977                          Name == "sse41.pminuw" ||
1978                          Name == "sse41.pminud" ||
1979                          Name.startswith("avx2.pminu") ||
1980                          Name.startswith("avx512.mask.pminu"))) {
1981       Rep = upgradeIntMinMax(Builder, *CI, ICmpInst::ICMP_ULT);
1982     } else if (IsX86 && (Name == "sse2.pmulu.dq" ||
1983                          Name == "avx2.pmulu.dq" ||
1984                          Name == "avx512.pmulu.dq.512" ||
1985                          Name.startswith("avx512.mask.pmulu.dq."))) {
1986       Rep = upgradePMULDQ(Builder, *CI, /*Signed*/false);
1987     } else if (IsX86 && (Name == "sse41.pmuldq" ||
1988                          Name == "avx2.pmul.dq" ||
1989                          Name == "avx512.pmul.dq.512" ||
1990                          Name.startswith("avx512.mask.pmul.dq."))) {
1991       Rep = upgradePMULDQ(Builder, *CI, /*Signed*/true);
1992     } else if (IsX86 && (Name == "sse.cvtsi2ss" ||
1993                          Name == "sse2.cvtsi2sd" ||
1994                          Name == "sse.cvtsi642ss" ||
1995                          Name == "sse2.cvtsi642sd")) {
1996       Rep = Builder.CreateSIToFP(CI->getArgOperand(1),
1997                                  CI->getType()->getVectorElementType());
1998       Rep = Builder.CreateInsertElement(CI->getArgOperand(0), Rep, (uint64_t)0);
1999     } else if (IsX86 && Name == "avx512.cvtusi2sd") {
2000       Rep = Builder.CreateUIToFP(CI->getArgOperand(1),
2001                                  CI->getType()->getVectorElementType());
2002       Rep = Builder.CreateInsertElement(CI->getArgOperand(0), Rep, (uint64_t)0);
2003     } else if (IsX86 && Name == "sse2.cvtss2sd") {
2004       Rep = Builder.CreateExtractElement(CI->getArgOperand(1), (uint64_t)0);
2005       Rep = Builder.CreateFPExt(Rep, CI->getType()->getVectorElementType());
2006       Rep = Builder.CreateInsertElement(CI->getArgOperand(0), Rep, (uint64_t)0);
2007     } else if (IsX86 && (Name == "sse2.cvtdq2pd" ||
2008                          Name == "sse2.cvtdq2ps" ||
2009                          Name == "avx.cvtdq2.pd.256" ||
2010                          Name == "avx.cvtdq2.ps.256" ||
2011                          Name.startswith("avx512.mask.cvtdq2pd.") ||
2012                          Name.startswith("avx512.mask.cvtudq2pd.") ||
2013                          Name.startswith("avx512.mask.cvtdq2ps.") ||
2014                          Name.startswith("avx512.mask.cvtudq2ps.") ||
2015                          Name.startswith("avx512.mask.cvtqq2pd.") ||
2016                          Name.startswith("avx512.mask.cvtuqq2pd.") ||
2017                          Name == "avx512.mask.cvtqq2ps.256" ||
2018                          Name == "avx512.mask.cvtqq2ps.512" ||
2019                          Name == "avx512.mask.cvtuqq2ps.256" ||
2020                          Name == "avx512.mask.cvtuqq2ps.512" ||
2021                          Name == "sse2.cvtps2pd" ||
2022                          Name == "avx.cvt.ps2.pd.256" ||
2023                          Name == "avx512.mask.cvtps2pd.128" ||
2024                          Name == "avx512.mask.cvtps2pd.256")) {
2025       Type *DstTy = CI->getType();
2026       Rep = CI->getArgOperand(0);
2027       Type *SrcTy = Rep->getType();
2028 
2029       unsigned NumDstElts = DstTy->getVectorNumElements();
2030       if (NumDstElts < SrcTy->getVectorNumElements()) {
2031         assert(NumDstElts == 2 && "Unexpected vector size");
2032         uint32_t ShuffleMask[2] = { 0, 1 };
2033         Rep = Builder.CreateShuffleVector(Rep, Rep, ShuffleMask);
2034       }
2035 
2036       bool IsPS2PD = SrcTy->getVectorElementType()->isFloatTy();
2037       bool IsUnsigned = (StringRef::npos != Name.find("cvtu"));
2038       if (IsPS2PD)
2039         Rep = Builder.CreateFPExt(Rep, DstTy, "cvtps2pd");
2040       else if (CI->getNumArgOperands() == 4 &&
2041                (!isa<ConstantInt>(CI->getArgOperand(3)) ||
2042                 cast<ConstantInt>(CI->getArgOperand(3))->getZExtValue() != 4)) {
2043         Intrinsic::ID IID = IsUnsigned ? Intrinsic::x86_avx512_uitofp_round
2044                                        : Intrinsic::x86_avx512_sitofp_round;
2045         Function *F = Intrinsic::getDeclaration(CI->getModule(), IID,
2046                                                 { DstTy, SrcTy });
2047         Rep = Builder.CreateCall(F, { Rep, CI->getArgOperand(3) });
2048       } else {
2049         Rep = IsUnsigned ? Builder.CreateUIToFP(Rep, DstTy, "cvt")
2050                          : Builder.CreateSIToFP(Rep, DstTy, "cvt");
2051       }
2052 
2053       if (CI->getNumArgOperands() >= 3)
2054         Rep = EmitX86Select(Builder, CI->getArgOperand(2), Rep,
2055                             CI->getArgOperand(1));
2056     } else if (IsX86 && (Name.startswith("avx512.mask.loadu."))) {
2057       Rep = UpgradeMaskedLoad(Builder, CI->getArgOperand(0),
2058                               CI->getArgOperand(1), CI->getArgOperand(2),
2059                               /*Aligned*/false);
2060     } else if (IsX86 && (Name.startswith("avx512.mask.load."))) {
2061       Rep = UpgradeMaskedLoad(Builder, CI->getArgOperand(0),
2062                               CI->getArgOperand(1),CI->getArgOperand(2),
2063                               /*Aligned*/true);
2064     } else if (IsX86 && Name.startswith("avx512.mask.expand.load.")) {
2065       Type *ResultTy = CI->getType();
2066       Type *PtrTy = ResultTy->getVectorElementType();
2067 
2068       // Cast the pointer to element type.
2069       Value *Ptr = Builder.CreateBitCast(CI->getOperand(0),
2070                                          llvm::PointerType::getUnqual(PtrTy));
2071 
2072       Value *MaskVec = getX86MaskVec(Builder, CI->getArgOperand(2),
2073                                      ResultTy->getVectorNumElements());
2074 
2075       Function *ELd = Intrinsic::getDeclaration(F->getParent(),
2076                                                 Intrinsic::masked_expandload,
2077                                                 ResultTy);
2078       Rep = Builder.CreateCall(ELd, { Ptr, MaskVec, CI->getOperand(1) });
2079     } else if (IsX86 && Name.startswith("avx512.mask.compress.store.")) {
2080       Type *ResultTy = CI->getArgOperand(1)->getType();
2081       Type *PtrTy = ResultTy->getVectorElementType();
2082 
2083       // Cast the pointer to element type.
2084       Value *Ptr = Builder.CreateBitCast(CI->getOperand(0),
2085                                          llvm::PointerType::getUnqual(PtrTy));
2086 
2087       Value *MaskVec = getX86MaskVec(Builder, CI->getArgOperand(2),
2088                                      ResultTy->getVectorNumElements());
2089 
2090       Function *CSt = Intrinsic::getDeclaration(F->getParent(),
2091                                                 Intrinsic::masked_compressstore,
2092                                                 ResultTy);
2093       Rep = Builder.CreateCall(CSt, { CI->getArgOperand(1), Ptr, MaskVec });
2094     } else if (IsX86 && (Name.startswith("avx512.mask.compress.") ||
2095                          Name.startswith("avx512.mask.expand."))) {
2096       Type *ResultTy = CI->getType();
2097 
2098       Value *MaskVec = getX86MaskVec(Builder, CI->getArgOperand(2),
2099                                      ResultTy->getVectorNumElements());
2100 
2101       bool IsCompress = Name[12] == 'c';
2102       Intrinsic::ID IID = IsCompress ? Intrinsic::x86_avx512_mask_compress
2103                                      : Intrinsic::x86_avx512_mask_expand;
2104       Function *Intr = Intrinsic::getDeclaration(F->getParent(), IID, ResultTy);
2105       Rep = Builder.CreateCall(Intr, { CI->getOperand(0), CI->getOperand(1),
2106                                        MaskVec });
2107     } else if (IsX86 && Name.startswith("xop.vpcom")) {
2108       bool IsSigned;
2109       if (Name.endswith("ub") || Name.endswith("uw") || Name.endswith("ud") ||
2110           Name.endswith("uq"))
2111         IsSigned = false;
2112       else if (Name.endswith("b") || Name.endswith("w") || Name.endswith("d") ||
2113                Name.endswith("q"))
2114         IsSigned = true;
2115       else
2116         llvm_unreachable("Unknown suffix");
2117 
2118       unsigned Imm;
2119       if (CI->getNumArgOperands() == 3) {
2120         Imm = cast<ConstantInt>(CI->getArgOperand(2))->getZExtValue();
2121       } else {
2122         Name = Name.substr(9); // strip off "xop.vpcom"
2123         if (Name.startswith("lt"))
2124           Imm = 0;
2125         else if (Name.startswith("le"))
2126           Imm = 1;
2127         else if (Name.startswith("gt"))
2128           Imm = 2;
2129         else if (Name.startswith("ge"))
2130           Imm = 3;
2131         else if (Name.startswith("eq"))
2132           Imm = 4;
2133         else if (Name.startswith("ne"))
2134           Imm = 5;
2135         else if (Name.startswith("false"))
2136           Imm = 6;
2137         else if (Name.startswith("true"))
2138           Imm = 7;
2139         else
2140           llvm_unreachable("Unknown condition");
2141       }
2142 
2143       Rep = upgradeX86vpcom(Builder, *CI, Imm, IsSigned);
2144     } else if (IsX86 && Name.startswith("xop.vpcmov")) {
2145       Value *Sel = CI->getArgOperand(2);
2146       Value *NotSel = Builder.CreateNot(Sel);
2147       Value *Sel0 = Builder.CreateAnd(CI->getArgOperand(0), Sel);
2148       Value *Sel1 = Builder.CreateAnd(CI->getArgOperand(1), NotSel);
2149       Rep = Builder.CreateOr(Sel0, Sel1);
2150     } else if (IsX86 && (Name.startswith("xop.vprot") ||
2151                          Name.startswith("avx512.prol") ||
2152                          Name.startswith("avx512.mask.prol"))) {
2153       Rep = upgradeX86Rotate(Builder, *CI, false);
2154     } else if (IsX86 && (Name.startswith("avx512.pror") ||
2155                          Name.startswith("avx512.mask.pror"))) {
2156       Rep = upgradeX86Rotate(Builder, *CI, true);
2157     } else if (IsX86 && (Name.startswith("avx512.vpshld.") ||
2158                          Name.startswith("avx512.mask.vpshld") ||
2159                          Name.startswith("avx512.maskz.vpshld"))) {
2160       bool ZeroMask = Name[11] == 'z';
2161       Rep = upgradeX86ConcatShift(Builder, *CI, false, ZeroMask);
2162     } else if (IsX86 && (Name.startswith("avx512.vpshrd.") ||
2163                          Name.startswith("avx512.mask.vpshrd") ||
2164                          Name.startswith("avx512.maskz.vpshrd"))) {
2165       bool ZeroMask = Name[11] == 'z';
2166       Rep = upgradeX86ConcatShift(Builder, *CI, true, ZeroMask);
2167     } else if (IsX86 && Name == "sse42.crc32.64.8") {
2168       Function *CRC32 = Intrinsic::getDeclaration(F->getParent(),
2169                                                Intrinsic::x86_sse42_crc32_32_8);
2170       Value *Trunc0 = Builder.CreateTrunc(CI->getArgOperand(0), Type::getInt32Ty(C));
2171       Rep = Builder.CreateCall(CRC32, {Trunc0, CI->getArgOperand(1)});
2172       Rep = Builder.CreateZExt(Rep, CI->getType(), "");
2173     } else if (IsX86 && (Name.startswith("avx.vbroadcast.s") ||
2174                          Name.startswith("avx512.vbroadcast.s"))) {
2175       // Replace broadcasts with a series of insertelements.
2176       Type *VecTy = CI->getType();
2177       Type *EltTy = VecTy->getVectorElementType();
2178       unsigned EltNum = VecTy->getVectorNumElements();
2179       Value *Cast = Builder.CreateBitCast(CI->getArgOperand(0),
2180                                           EltTy->getPointerTo());
2181       Value *Load = Builder.CreateLoad(EltTy, Cast);
2182       Type *I32Ty = Type::getInt32Ty(C);
2183       Rep = UndefValue::get(VecTy);
2184       for (unsigned I = 0; I < EltNum; ++I)
2185         Rep = Builder.CreateInsertElement(Rep, Load,
2186                                           ConstantInt::get(I32Ty, I));
2187     } else if (IsX86 && (Name.startswith("sse41.pmovsx") ||
2188                          Name.startswith("sse41.pmovzx") ||
2189                          Name.startswith("avx2.pmovsx") ||
2190                          Name.startswith("avx2.pmovzx") ||
2191                          Name.startswith("avx512.mask.pmovsx") ||
2192                          Name.startswith("avx512.mask.pmovzx"))) {
2193       VectorType *SrcTy = cast<VectorType>(CI->getArgOperand(0)->getType());
2194       VectorType *DstTy = cast<VectorType>(CI->getType());
2195       unsigned NumDstElts = DstTy->getNumElements();
2196 
2197       // Extract a subvector of the first NumDstElts lanes and sign/zero extend.
2198       SmallVector<uint32_t, 8> ShuffleMask(NumDstElts);
2199       for (unsigned i = 0; i != NumDstElts; ++i)
2200         ShuffleMask[i] = i;
2201 
2202       Value *SV = Builder.CreateShuffleVector(
2203           CI->getArgOperand(0), UndefValue::get(SrcTy), ShuffleMask);
2204 
2205       bool DoSext = (StringRef::npos != Name.find("pmovsx"));
2206       Rep = DoSext ? Builder.CreateSExt(SV, DstTy)
2207                    : Builder.CreateZExt(SV, DstTy);
2208       // If there are 3 arguments, it's a masked intrinsic so we need a select.
2209       if (CI->getNumArgOperands() == 3)
2210         Rep = EmitX86Select(Builder, CI->getArgOperand(2), Rep,
2211                             CI->getArgOperand(1));
2212     } else if (Name == "avx512.mask.pmov.qd.256" ||
2213                Name == "avx512.mask.pmov.qd.512" ||
2214                Name == "avx512.mask.pmov.wb.256" ||
2215                Name == "avx512.mask.pmov.wb.512") {
2216       Type *Ty = CI->getArgOperand(1)->getType();
2217       Rep = Builder.CreateTrunc(CI->getArgOperand(0), Ty);
2218       Rep = EmitX86Select(Builder, CI->getArgOperand(2), Rep,
2219                           CI->getArgOperand(1));
2220     } else if (IsX86 && (Name.startswith("avx.vbroadcastf128") ||
2221                          Name == "avx2.vbroadcasti128")) {
2222       // Replace vbroadcastf128/vbroadcasti128 with a vector load+shuffle.
2223       Type *EltTy = CI->getType()->getVectorElementType();
2224       unsigned NumSrcElts = 128 / EltTy->getPrimitiveSizeInBits();
2225       Type *VT = VectorType::get(EltTy, NumSrcElts);
2226       Value *Op = Builder.CreatePointerCast(CI->getArgOperand(0),
2227                                             PointerType::getUnqual(VT));
2228       Value *Load = Builder.CreateAlignedLoad(VT, Op, 1);
2229       if (NumSrcElts == 2)
2230         Rep = Builder.CreateShuffleVector(Load, UndefValue::get(Load->getType()),
2231                                           { 0, 1, 0, 1 });
2232       else
2233         Rep = Builder.CreateShuffleVector(Load, UndefValue::get(Load->getType()),
2234                                           { 0, 1, 2, 3, 0, 1, 2, 3 });
2235     } else if (IsX86 && (Name.startswith("avx512.mask.shuf.i") ||
2236                          Name.startswith("avx512.mask.shuf.f"))) {
2237       unsigned Imm = cast<ConstantInt>(CI->getArgOperand(2))->getZExtValue();
2238       Type *VT = CI->getType();
2239       unsigned NumLanes = VT->getPrimitiveSizeInBits() / 128;
2240       unsigned NumElementsInLane = 128 / VT->getScalarSizeInBits();
2241       unsigned ControlBitsMask = NumLanes - 1;
2242       unsigned NumControlBits = NumLanes / 2;
2243       SmallVector<uint32_t, 8> ShuffleMask(0);
2244 
2245       for (unsigned l = 0; l != NumLanes; ++l) {
2246         unsigned LaneMask = (Imm >> (l * NumControlBits)) & ControlBitsMask;
2247         // We actually need the other source.
2248         if (l >= NumLanes / 2)
2249           LaneMask += NumLanes;
2250         for (unsigned i = 0; i != NumElementsInLane; ++i)
2251           ShuffleMask.push_back(LaneMask * NumElementsInLane + i);
2252       }
2253       Rep = Builder.CreateShuffleVector(CI->getArgOperand(0),
2254                                         CI->getArgOperand(1), ShuffleMask);
2255       Rep = EmitX86Select(Builder, CI->getArgOperand(4), Rep,
2256                           CI->getArgOperand(3));
2257     }else if (IsX86 && (Name.startswith("avx512.mask.broadcastf") ||
2258                          Name.startswith("avx512.mask.broadcasti"))) {
2259       unsigned NumSrcElts =
2260                         CI->getArgOperand(0)->getType()->getVectorNumElements();
2261       unsigned NumDstElts = CI->getType()->getVectorNumElements();
2262 
2263       SmallVector<uint32_t, 8> ShuffleMask(NumDstElts);
2264       for (unsigned i = 0; i != NumDstElts; ++i)
2265         ShuffleMask[i] = i % NumSrcElts;
2266 
2267       Rep = Builder.CreateShuffleVector(CI->getArgOperand(0),
2268                                         CI->getArgOperand(0),
2269                                         ShuffleMask);
2270       Rep = EmitX86Select(Builder, CI->getArgOperand(2), Rep,
2271                           CI->getArgOperand(1));
2272     } else if (IsX86 && (Name.startswith("avx2.pbroadcast") ||
2273                          Name.startswith("avx2.vbroadcast") ||
2274                          Name.startswith("avx512.pbroadcast") ||
2275                          Name.startswith("avx512.mask.broadcast.s"))) {
2276       // Replace vp?broadcasts with a vector shuffle.
2277       Value *Op = CI->getArgOperand(0);
2278       unsigned NumElts = CI->getType()->getVectorNumElements();
2279       Type *MaskTy = VectorType::get(Type::getInt32Ty(C), NumElts);
2280       Rep = Builder.CreateShuffleVector(Op, UndefValue::get(Op->getType()),
2281                                         Constant::getNullValue(MaskTy));
2282 
2283       if (CI->getNumArgOperands() == 3)
2284         Rep = EmitX86Select(Builder, CI->getArgOperand(2), Rep,
2285                             CI->getArgOperand(1));
2286     } else if (IsX86 && (Name.startswith("sse2.padds.") ||
2287                          Name.startswith("sse2.psubs.") ||
2288                          Name.startswith("avx2.padds.") ||
2289                          Name.startswith("avx2.psubs.") ||
2290                          Name.startswith("avx512.padds.") ||
2291                          Name.startswith("avx512.psubs.") ||
2292                          Name.startswith("avx512.mask.padds.") ||
2293                          Name.startswith("avx512.mask.psubs."))) {
2294       bool IsAdd = Name.contains(".padds");
2295       Rep = UpgradeX86AddSubSatIntrinsics(Builder, *CI, true, IsAdd);
2296     } else if (IsX86 && (Name.startswith("sse2.paddus.") ||
2297                          Name.startswith("sse2.psubus.") ||
2298                          Name.startswith("avx2.paddus.") ||
2299                          Name.startswith("avx2.psubus.") ||
2300                          Name.startswith("avx512.mask.paddus.") ||
2301                          Name.startswith("avx512.mask.psubus."))) {
2302       bool IsAdd = Name.contains(".paddus");
2303       Rep = UpgradeX86AddSubSatIntrinsics(Builder, *CI, false, IsAdd);
2304     } else if (IsX86 && Name.startswith("avx512.mask.palignr.")) {
2305       Rep = UpgradeX86ALIGNIntrinsics(Builder, CI->getArgOperand(0),
2306                                       CI->getArgOperand(1),
2307                                       CI->getArgOperand(2),
2308                                       CI->getArgOperand(3),
2309                                       CI->getArgOperand(4),
2310                                       false);
2311     } else if (IsX86 && Name.startswith("avx512.mask.valign.")) {
2312       Rep = UpgradeX86ALIGNIntrinsics(Builder, CI->getArgOperand(0),
2313                                       CI->getArgOperand(1),
2314                                       CI->getArgOperand(2),
2315                                       CI->getArgOperand(3),
2316                                       CI->getArgOperand(4),
2317                                       true);
2318     } else if (IsX86 && (Name == "sse2.psll.dq" ||
2319                          Name == "avx2.psll.dq")) {
2320       // 128/256-bit shift left specified in bits.
2321       unsigned Shift = cast<ConstantInt>(CI->getArgOperand(1))->getZExtValue();
2322       Rep = UpgradeX86PSLLDQIntrinsics(Builder, CI->getArgOperand(0),
2323                                        Shift / 8); // Shift is in bits.
2324     } else if (IsX86 && (Name == "sse2.psrl.dq" ||
2325                          Name == "avx2.psrl.dq")) {
2326       // 128/256-bit shift right specified in bits.
2327       unsigned Shift = cast<ConstantInt>(CI->getArgOperand(1))->getZExtValue();
2328       Rep = UpgradeX86PSRLDQIntrinsics(Builder, CI->getArgOperand(0),
2329                                        Shift / 8); // Shift is in bits.
2330     } else if (IsX86 && (Name == "sse2.psll.dq.bs" ||
2331                          Name == "avx2.psll.dq.bs" ||
2332                          Name == "avx512.psll.dq.512")) {
2333       // 128/256/512-bit shift left specified in bytes.
2334       unsigned Shift = cast<ConstantInt>(CI->getArgOperand(1))->getZExtValue();
2335       Rep = UpgradeX86PSLLDQIntrinsics(Builder, CI->getArgOperand(0), Shift);
2336     } else if (IsX86 && (Name == "sse2.psrl.dq.bs" ||
2337                          Name == "avx2.psrl.dq.bs" ||
2338                          Name == "avx512.psrl.dq.512")) {
2339       // 128/256/512-bit shift right specified in bytes.
2340       unsigned Shift = cast<ConstantInt>(CI->getArgOperand(1))->getZExtValue();
2341       Rep = UpgradeX86PSRLDQIntrinsics(Builder, CI->getArgOperand(0), Shift);
2342     } else if (IsX86 && (Name == "sse41.pblendw" ||
2343                          Name.startswith("sse41.blendp") ||
2344                          Name.startswith("avx.blend.p") ||
2345                          Name == "avx2.pblendw" ||
2346                          Name.startswith("avx2.pblendd."))) {
2347       Value *Op0 = CI->getArgOperand(0);
2348       Value *Op1 = CI->getArgOperand(1);
2349       unsigned Imm = cast <ConstantInt>(CI->getArgOperand(2))->getZExtValue();
2350       VectorType *VecTy = cast<VectorType>(CI->getType());
2351       unsigned NumElts = VecTy->getNumElements();
2352 
2353       SmallVector<uint32_t, 16> Idxs(NumElts);
2354       for (unsigned i = 0; i != NumElts; ++i)
2355         Idxs[i] = ((Imm >> (i%8)) & 1) ? i + NumElts : i;
2356 
2357       Rep = Builder.CreateShuffleVector(Op0, Op1, Idxs);
2358     } else if (IsX86 && (Name.startswith("avx.vinsertf128.") ||
2359                          Name == "avx2.vinserti128" ||
2360                          Name.startswith("avx512.mask.insert"))) {
2361       Value *Op0 = CI->getArgOperand(0);
2362       Value *Op1 = CI->getArgOperand(1);
2363       unsigned Imm = cast<ConstantInt>(CI->getArgOperand(2))->getZExtValue();
2364       unsigned DstNumElts = CI->getType()->getVectorNumElements();
2365       unsigned SrcNumElts = Op1->getType()->getVectorNumElements();
2366       unsigned Scale = DstNumElts / SrcNumElts;
2367 
2368       // Mask off the high bits of the immediate value; hardware ignores those.
2369       Imm = Imm % Scale;
2370 
2371       // Extend the second operand into a vector the size of the destination.
2372       Value *UndefV = UndefValue::get(Op1->getType());
2373       SmallVector<uint32_t, 8> Idxs(DstNumElts);
2374       for (unsigned i = 0; i != SrcNumElts; ++i)
2375         Idxs[i] = i;
2376       for (unsigned i = SrcNumElts; i != DstNumElts; ++i)
2377         Idxs[i] = SrcNumElts;
2378       Rep = Builder.CreateShuffleVector(Op1, UndefV, Idxs);
2379 
2380       // Insert the second operand into the first operand.
2381 
2382       // Note that there is no guarantee that instruction lowering will actually
2383       // produce a vinsertf128 instruction for the created shuffles. In
2384       // particular, the 0 immediate case involves no lane changes, so it can
2385       // be handled as a blend.
2386 
2387       // Example of shuffle mask for 32-bit elements:
2388       // Imm = 1  <i32 0, i32 1, i32 2,  i32 3,  i32 8, i32 9, i32 10, i32 11>
2389       // Imm = 0  <i32 8, i32 9, i32 10, i32 11, i32 4, i32 5, i32 6,  i32 7 >
2390 
2391       // First fill with identify mask.
2392       for (unsigned i = 0; i != DstNumElts; ++i)
2393         Idxs[i] = i;
2394       // Then replace the elements where we need to insert.
2395       for (unsigned i = 0; i != SrcNumElts; ++i)
2396         Idxs[i + Imm * SrcNumElts] = i + DstNumElts;
2397       Rep = Builder.CreateShuffleVector(Op0, Rep, Idxs);
2398 
2399       // If the intrinsic has a mask operand, handle that.
2400       if (CI->getNumArgOperands() == 5)
2401         Rep = EmitX86Select(Builder, CI->getArgOperand(4), Rep,
2402                             CI->getArgOperand(3));
2403     } else if (IsX86 && (Name.startswith("avx.vextractf128.") ||
2404                          Name == "avx2.vextracti128" ||
2405                          Name.startswith("avx512.mask.vextract"))) {
2406       Value *Op0 = CI->getArgOperand(0);
2407       unsigned Imm = cast<ConstantInt>(CI->getArgOperand(1))->getZExtValue();
2408       unsigned DstNumElts = CI->getType()->getVectorNumElements();
2409       unsigned SrcNumElts = Op0->getType()->getVectorNumElements();
2410       unsigned Scale = SrcNumElts / DstNumElts;
2411 
2412       // Mask off the high bits of the immediate value; hardware ignores those.
2413       Imm = Imm % Scale;
2414 
2415       // Get indexes for the subvector of the input vector.
2416       SmallVector<uint32_t, 8> Idxs(DstNumElts);
2417       for (unsigned i = 0; i != DstNumElts; ++i) {
2418         Idxs[i] = i + (Imm * DstNumElts);
2419       }
2420       Rep = Builder.CreateShuffleVector(Op0, Op0, Idxs);
2421 
2422       // If the intrinsic has a mask operand, handle that.
2423       if (CI->getNumArgOperands() == 4)
2424         Rep = EmitX86Select(Builder, CI->getArgOperand(3), Rep,
2425                             CI->getArgOperand(2));
2426     } else if (!IsX86 && Name == "stackprotectorcheck") {
2427       Rep = nullptr;
2428     } else if (IsX86 && (Name.startswith("avx512.mask.perm.df.") ||
2429                          Name.startswith("avx512.mask.perm.di."))) {
2430       Value *Op0 = CI->getArgOperand(0);
2431       unsigned Imm = cast<ConstantInt>(CI->getArgOperand(1))->getZExtValue();
2432       VectorType *VecTy = cast<VectorType>(CI->getType());
2433       unsigned NumElts = VecTy->getNumElements();
2434 
2435       SmallVector<uint32_t, 8> Idxs(NumElts);
2436       for (unsigned i = 0; i != NumElts; ++i)
2437         Idxs[i] = (i & ~0x3) + ((Imm >> (2 * (i & 0x3))) & 3);
2438 
2439       Rep = Builder.CreateShuffleVector(Op0, Op0, Idxs);
2440 
2441       if (CI->getNumArgOperands() == 4)
2442         Rep = EmitX86Select(Builder, CI->getArgOperand(3), Rep,
2443                             CI->getArgOperand(2));
2444     } else if (IsX86 && (Name.startswith("avx.vperm2f128.") ||
2445                          Name == "avx2.vperm2i128")) {
2446       // The immediate permute control byte looks like this:
2447       //    [1:0] - select 128 bits from sources for low half of destination
2448       //    [2]   - ignore
2449       //    [3]   - zero low half of destination
2450       //    [5:4] - select 128 bits from sources for high half of destination
2451       //    [6]   - ignore
2452       //    [7]   - zero high half of destination
2453 
2454       uint8_t Imm = cast<ConstantInt>(CI->getArgOperand(2))->getZExtValue();
2455 
2456       unsigned NumElts = CI->getType()->getVectorNumElements();
2457       unsigned HalfSize = NumElts / 2;
2458       SmallVector<uint32_t, 8> ShuffleMask(NumElts);
2459 
2460       // Determine which operand(s) are actually in use for this instruction.
2461       Value *V0 = (Imm & 0x02) ? CI->getArgOperand(1) : CI->getArgOperand(0);
2462       Value *V1 = (Imm & 0x20) ? CI->getArgOperand(1) : CI->getArgOperand(0);
2463 
2464       // If needed, replace operands based on zero mask.
2465       V0 = (Imm & 0x08) ? ConstantAggregateZero::get(CI->getType()) : V0;
2466       V1 = (Imm & 0x80) ? ConstantAggregateZero::get(CI->getType()) : V1;
2467 
2468       // Permute low half of result.
2469       unsigned StartIndex = (Imm & 0x01) ? HalfSize : 0;
2470       for (unsigned i = 0; i < HalfSize; ++i)
2471         ShuffleMask[i] = StartIndex + i;
2472 
2473       // Permute high half of result.
2474       StartIndex = (Imm & 0x10) ? HalfSize : 0;
2475       for (unsigned i = 0; i < HalfSize; ++i)
2476         ShuffleMask[i + HalfSize] = NumElts + StartIndex + i;
2477 
2478       Rep = Builder.CreateShuffleVector(V0, V1, ShuffleMask);
2479 
2480     } else if (IsX86 && (Name.startswith("avx.vpermil.") ||
2481                          Name == "sse2.pshuf.d" ||
2482                          Name.startswith("avx512.mask.vpermil.p") ||
2483                          Name.startswith("avx512.mask.pshuf.d."))) {
2484       Value *Op0 = CI->getArgOperand(0);
2485       unsigned Imm = cast<ConstantInt>(CI->getArgOperand(1))->getZExtValue();
2486       VectorType *VecTy = cast<VectorType>(CI->getType());
2487       unsigned NumElts = VecTy->getNumElements();
2488       // Calculate the size of each index in the immediate.
2489       unsigned IdxSize = 64 / VecTy->getScalarSizeInBits();
2490       unsigned IdxMask = ((1 << IdxSize) - 1);
2491 
2492       SmallVector<uint32_t, 8> Idxs(NumElts);
2493       // Lookup the bits for this element, wrapping around the immediate every
2494       // 8-bits. Elements are grouped into sets of 2 or 4 elements so we need
2495       // to offset by the first index of each group.
2496       for (unsigned i = 0; i != NumElts; ++i)
2497         Idxs[i] = ((Imm >> ((i * IdxSize) % 8)) & IdxMask) | (i & ~IdxMask);
2498 
2499       Rep = Builder.CreateShuffleVector(Op0, Op0, Idxs);
2500 
2501       if (CI->getNumArgOperands() == 4)
2502         Rep = EmitX86Select(Builder, CI->getArgOperand(3), Rep,
2503                             CI->getArgOperand(2));
2504     } else if (IsX86 && (Name == "sse2.pshufl.w" ||
2505                          Name.startswith("avx512.mask.pshufl.w."))) {
2506       Value *Op0 = CI->getArgOperand(0);
2507       unsigned Imm = cast<ConstantInt>(CI->getArgOperand(1))->getZExtValue();
2508       unsigned NumElts = CI->getType()->getVectorNumElements();
2509 
2510       SmallVector<uint32_t, 16> Idxs(NumElts);
2511       for (unsigned l = 0; l != NumElts; l += 8) {
2512         for (unsigned i = 0; i != 4; ++i)
2513           Idxs[i + l] = ((Imm >> (2 * i)) & 0x3) + l;
2514         for (unsigned i = 4; i != 8; ++i)
2515           Idxs[i + l] = i + l;
2516       }
2517 
2518       Rep = Builder.CreateShuffleVector(Op0, Op0, Idxs);
2519 
2520       if (CI->getNumArgOperands() == 4)
2521         Rep = EmitX86Select(Builder, CI->getArgOperand(3), Rep,
2522                             CI->getArgOperand(2));
2523     } else if (IsX86 && (Name == "sse2.pshufh.w" ||
2524                          Name.startswith("avx512.mask.pshufh.w."))) {
2525       Value *Op0 = CI->getArgOperand(0);
2526       unsigned Imm = cast<ConstantInt>(CI->getArgOperand(1))->getZExtValue();
2527       unsigned NumElts = CI->getType()->getVectorNumElements();
2528 
2529       SmallVector<uint32_t, 16> Idxs(NumElts);
2530       for (unsigned l = 0; l != NumElts; l += 8) {
2531         for (unsigned i = 0; i != 4; ++i)
2532           Idxs[i + l] = i + l;
2533         for (unsigned i = 0; i != 4; ++i)
2534           Idxs[i + l + 4] = ((Imm >> (2 * i)) & 0x3) + 4 + l;
2535       }
2536 
2537       Rep = Builder.CreateShuffleVector(Op0, Op0, Idxs);
2538 
2539       if (CI->getNumArgOperands() == 4)
2540         Rep = EmitX86Select(Builder, CI->getArgOperand(3), Rep,
2541                             CI->getArgOperand(2));
2542     } else if (IsX86 && Name.startswith("avx512.mask.shuf.p")) {
2543       Value *Op0 = CI->getArgOperand(0);
2544       Value *Op1 = CI->getArgOperand(1);
2545       unsigned Imm = cast<ConstantInt>(CI->getArgOperand(2))->getZExtValue();
2546       unsigned NumElts = CI->getType()->getVectorNumElements();
2547 
2548       unsigned NumLaneElts = 128/CI->getType()->getScalarSizeInBits();
2549       unsigned HalfLaneElts = NumLaneElts / 2;
2550 
2551       SmallVector<uint32_t, 16> Idxs(NumElts);
2552       for (unsigned i = 0; i != NumElts; ++i) {
2553         // Base index is the starting element of the lane.
2554         Idxs[i] = i - (i % NumLaneElts);
2555         // If we are half way through the lane switch to the other source.
2556         if ((i % NumLaneElts) >= HalfLaneElts)
2557           Idxs[i] += NumElts;
2558         // Now select the specific element. By adding HalfLaneElts bits from
2559         // the immediate. Wrapping around the immediate every 8-bits.
2560         Idxs[i] += (Imm >> ((i * HalfLaneElts) % 8)) & ((1 << HalfLaneElts) - 1);
2561       }
2562 
2563       Rep = Builder.CreateShuffleVector(Op0, Op1, Idxs);
2564 
2565       Rep = EmitX86Select(Builder, CI->getArgOperand(4), Rep,
2566                           CI->getArgOperand(3));
2567     } else if (IsX86 && (Name.startswith("avx512.mask.movddup") ||
2568                          Name.startswith("avx512.mask.movshdup") ||
2569                          Name.startswith("avx512.mask.movsldup"))) {
2570       Value *Op0 = CI->getArgOperand(0);
2571       unsigned NumElts = CI->getType()->getVectorNumElements();
2572       unsigned NumLaneElts = 128/CI->getType()->getScalarSizeInBits();
2573 
2574       unsigned Offset = 0;
2575       if (Name.startswith("avx512.mask.movshdup."))
2576         Offset = 1;
2577 
2578       SmallVector<uint32_t, 16> Idxs(NumElts);
2579       for (unsigned l = 0; l != NumElts; l += NumLaneElts)
2580         for (unsigned i = 0; i != NumLaneElts; i += 2) {
2581           Idxs[i + l + 0] = i + l + Offset;
2582           Idxs[i + l + 1] = i + l + Offset;
2583         }
2584 
2585       Rep = Builder.CreateShuffleVector(Op0, Op0, Idxs);
2586 
2587       Rep = EmitX86Select(Builder, CI->getArgOperand(2), Rep,
2588                           CI->getArgOperand(1));
2589     } else if (IsX86 && (Name.startswith("avx512.mask.punpckl") ||
2590                          Name.startswith("avx512.mask.unpckl."))) {
2591       Value *Op0 = CI->getArgOperand(0);
2592       Value *Op1 = CI->getArgOperand(1);
2593       int NumElts = CI->getType()->getVectorNumElements();
2594       int NumLaneElts = 128/CI->getType()->getScalarSizeInBits();
2595 
2596       SmallVector<uint32_t, 64> Idxs(NumElts);
2597       for (int l = 0; l != NumElts; l += NumLaneElts)
2598         for (int i = 0; i != NumLaneElts; ++i)
2599           Idxs[i + l] = l + (i / 2) + NumElts * (i % 2);
2600 
2601       Rep = Builder.CreateShuffleVector(Op0, Op1, Idxs);
2602 
2603       Rep = EmitX86Select(Builder, CI->getArgOperand(3), Rep,
2604                           CI->getArgOperand(2));
2605     } else if (IsX86 && (Name.startswith("avx512.mask.punpckh") ||
2606                          Name.startswith("avx512.mask.unpckh."))) {
2607       Value *Op0 = CI->getArgOperand(0);
2608       Value *Op1 = CI->getArgOperand(1);
2609       int NumElts = CI->getType()->getVectorNumElements();
2610       int NumLaneElts = 128/CI->getType()->getScalarSizeInBits();
2611 
2612       SmallVector<uint32_t, 64> Idxs(NumElts);
2613       for (int l = 0; l != NumElts; l += NumLaneElts)
2614         for (int i = 0; i != NumLaneElts; ++i)
2615           Idxs[i + l] = (NumLaneElts / 2) + l + (i / 2) + NumElts * (i % 2);
2616 
2617       Rep = Builder.CreateShuffleVector(Op0, Op1, Idxs);
2618 
2619       Rep = EmitX86Select(Builder, CI->getArgOperand(3), Rep,
2620                           CI->getArgOperand(2));
2621     } else if (IsX86 && (Name.startswith("avx512.mask.and.") ||
2622                          Name.startswith("avx512.mask.pand."))) {
2623       VectorType *FTy = cast<VectorType>(CI->getType());
2624       VectorType *ITy = VectorType::getInteger(FTy);
2625       Rep = Builder.CreateAnd(Builder.CreateBitCast(CI->getArgOperand(0), ITy),
2626                               Builder.CreateBitCast(CI->getArgOperand(1), ITy));
2627       Rep = Builder.CreateBitCast(Rep, FTy);
2628       Rep = EmitX86Select(Builder, CI->getArgOperand(3), Rep,
2629                           CI->getArgOperand(2));
2630     } else if (IsX86 && (Name.startswith("avx512.mask.andn.") ||
2631                          Name.startswith("avx512.mask.pandn."))) {
2632       VectorType *FTy = cast<VectorType>(CI->getType());
2633       VectorType *ITy = VectorType::getInteger(FTy);
2634       Rep = Builder.CreateNot(Builder.CreateBitCast(CI->getArgOperand(0), ITy));
2635       Rep = Builder.CreateAnd(Rep,
2636                               Builder.CreateBitCast(CI->getArgOperand(1), ITy));
2637       Rep = Builder.CreateBitCast(Rep, FTy);
2638       Rep = EmitX86Select(Builder, CI->getArgOperand(3), Rep,
2639                           CI->getArgOperand(2));
2640     } else if (IsX86 && (Name.startswith("avx512.mask.or.") ||
2641                          Name.startswith("avx512.mask.por."))) {
2642       VectorType *FTy = cast<VectorType>(CI->getType());
2643       VectorType *ITy = VectorType::getInteger(FTy);
2644       Rep = Builder.CreateOr(Builder.CreateBitCast(CI->getArgOperand(0), ITy),
2645                              Builder.CreateBitCast(CI->getArgOperand(1), ITy));
2646       Rep = Builder.CreateBitCast(Rep, FTy);
2647       Rep = EmitX86Select(Builder, CI->getArgOperand(3), Rep,
2648                           CI->getArgOperand(2));
2649     } else if (IsX86 && (Name.startswith("avx512.mask.xor.") ||
2650                          Name.startswith("avx512.mask.pxor."))) {
2651       VectorType *FTy = cast<VectorType>(CI->getType());
2652       VectorType *ITy = VectorType::getInteger(FTy);
2653       Rep = Builder.CreateXor(Builder.CreateBitCast(CI->getArgOperand(0), ITy),
2654                               Builder.CreateBitCast(CI->getArgOperand(1), ITy));
2655       Rep = Builder.CreateBitCast(Rep, FTy);
2656       Rep = EmitX86Select(Builder, CI->getArgOperand(3), Rep,
2657                           CI->getArgOperand(2));
2658     } else if (IsX86 && Name.startswith("avx512.mask.padd.")) {
2659       Rep = Builder.CreateAdd(CI->getArgOperand(0), CI->getArgOperand(1));
2660       Rep = EmitX86Select(Builder, CI->getArgOperand(3), Rep,
2661                           CI->getArgOperand(2));
2662     } else if (IsX86 && Name.startswith("avx512.mask.psub.")) {
2663       Rep = Builder.CreateSub(CI->getArgOperand(0), CI->getArgOperand(1));
2664       Rep = EmitX86Select(Builder, CI->getArgOperand(3), Rep,
2665                           CI->getArgOperand(2));
2666     } else if (IsX86 && Name.startswith("avx512.mask.pmull.")) {
2667       Rep = Builder.CreateMul(CI->getArgOperand(0), CI->getArgOperand(1));
2668       Rep = EmitX86Select(Builder, CI->getArgOperand(3), Rep,
2669                           CI->getArgOperand(2));
2670     } else if (IsX86 && Name.startswith("avx512.mask.add.p")) {
2671       if (Name.endswith(".512")) {
2672         Intrinsic::ID IID;
2673         if (Name[17] == 's')
2674           IID = Intrinsic::x86_avx512_add_ps_512;
2675         else
2676           IID = Intrinsic::x86_avx512_add_pd_512;
2677 
2678         Rep = Builder.CreateCall(Intrinsic::getDeclaration(F->getParent(), IID),
2679                                  { CI->getArgOperand(0), CI->getArgOperand(1),
2680                                    CI->getArgOperand(4) });
2681       } else {
2682         Rep = Builder.CreateFAdd(CI->getArgOperand(0), CI->getArgOperand(1));
2683       }
2684       Rep = EmitX86Select(Builder, CI->getArgOperand(3), Rep,
2685                           CI->getArgOperand(2));
2686     } else if (IsX86 && Name.startswith("avx512.mask.div.p")) {
2687       if (Name.endswith(".512")) {
2688         Intrinsic::ID IID;
2689         if (Name[17] == 's')
2690           IID = Intrinsic::x86_avx512_div_ps_512;
2691         else
2692           IID = Intrinsic::x86_avx512_div_pd_512;
2693 
2694         Rep = Builder.CreateCall(Intrinsic::getDeclaration(F->getParent(), IID),
2695                                  { CI->getArgOperand(0), CI->getArgOperand(1),
2696                                    CI->getArgOperand(4) });
2697       } else {
2698         Rep = Builder.CreateFDiv(CI->getArgOperand(0), CI->getArgOperand(1));
2699       }
2700       Rep = EmitX86Select(Builder, CI->getArgOperand(3), Rep,
2701                           CI->getArgOperand(2));
2702     } else if (IsX86 && Name.startswith("avx512.mask.mul.p")) {
2703       if (Name.endswith(".512")) {
2704         Intrinsic::ID IID;
2705         if (Name[17] == 's')
2706           IID = Intrinsic::x86_avx512_mul_ps_512;
2707         else
2708           IID = Intrinsic::x86_avx512_mul_pd_512;
2709 
2710         Rep = Builder.CreateCall(Intrinsic::getDeclaration(F->getParent(), IID),
2711                                  { CI->getArgOperand(0), CI->getArgOperand(1),
2712                                    CI->getArgOperand(4) });
2713       } else {
2714         Rep = Builder.CreateFMul(CI->getArgOperand(0), CI->getArgOperand(1));
2715       }
2716       Rep = EmitX86Select(Builder, CI->getArgOperand(3), Rep,
2717                           CI->getArgOperand(2));
2718     } else if (IsX86 && Name.startswith("avx512.mask.sub.p")) {
2719       if (Name.endswith(".512")) {
2720         Intrinsic::ID IID;
2721         if (Name[17] == 's')
2722           IID = Intrinsic::x86_avx512_sub_ps_512;
2723         else
2724           IID = Intrinsic::x86_avx512_sub_pd_512;
2725 
2726         Rep = Builder.CreateCall(Intrinsic::getDeclaration(F->getParent(), IID),
2727                                  { CI->getArgOperand(0), CI->getArgOperand(1),
2728                                    CI->getArgOperand(4) });
2729       } else {
2730         Rep = Builder.CreateFSub(CI->getArgOperand(0), CI->getArgOperand(1));
2731       }
2732       Rep = EmitX86Select(Builder, CI->getArgOperand(3), Rep,
2733                           CI->getArgOperand(2));
2734     } else if (IsX86 && (Name.startswith("avx512.mask.max.p") ||
2735                          Name.startswith("avx512.mask.min.p")) &&
2736                Name.drop_front(18) == ".512") {
2737       bool IsDouble = Name[17] == 'd';
2738       bool IsMin = Name[13] == 'i';
2739       static const Intrinsic::ID MinMaxTbl[2][2] = {
2740         { Intrinsic::x86_avx512_max_ps_512, Intrinsic::x86_avx512_max_pd_512 },
2741         { Intrinsic::x86_avx512_min_ps_512, Intrinsic::x86_avx512_min_pd_512 }
2742       };
2743       Intrinsic::ID IID = MinMaxTbl[IsMin][IsDouble];
2744 
2745       Rep = Builder.CreateCall(Intrinsic::getDeclaration(F->getParent(), IID),
2746                                { CI->getArgOperand(0), CI->getArgOperand(1),
2747                                  CI->getArgOperand(4) });
2748       Rep = EmitX86Select(Builder, CI->getArgOperand(3), Rep,
2749                           CI->getArgOperand(2));
2750     } else if (IsX86 && Name.startswith("avx512.mask.lzcnt.")) {
2751       Rep = Builder.CreateCall(Intrinsic::getDeclaration(F->getParent(),
2752                                                          Intrinsic::ctlz,
2753                                                          CI->getType()),
2754                                { CI->getArgOperand(0), Builder.getInt1(false) });
2755       Rep = EmitX86Select(Builder, CI->getArgOperand(2), Rep,
2756                           CI->getArgOperand(1));
2757     } else if (IsX86 && Name.startswith("avx512.mask.psll")) {
2758       bool IsImmediate = Name[16] == 'i' ||
2759                          (Name.size() > 18 && Name[18] == 'i');
2760       bool IsVariable = Name[16] == 'v';
2761       char Size = Name[16] == '.' ? Name[17] :
2762                   Name[17] == '.' ? Name[18] :
2763                   Name[18] == '.' ? Name[19] :
2764                                     Name[20];
2765 
2766       Intrinsic::ID IID;
2767       if (IsVariable && Name[17] != '.') {
2768         if (Size == 'd' && Name[17] == '2') // avx512.mask.psllv2.di
2769           IID = Intrinsic::x86_avx2_psllv_q;
2770         else if (Size == 'd' && Name[17] == '4') // avx512.mask.psllv4.di
2771           IID = Intrinsic::x86_avx2_psllv_q_256;
2772         else if (Size == 's' && Name[17] == '4') // avx512.mask.psllv4.si
2773           IID = Intrinsic::x86_avx2_psllv_d;
2774         else if (Size == 's' && Name[17] == '8') // avx512.mask.psllv8.si
2775           IID = Intrinsic::x86_avx2_psllv_d_256;
2776         else if (Size == 'h' && Name[17] == '8') // avx512.mask.psllv8.hi
2777           IID = Intrinsic::x86_avx512_psllv_w_128;
2778         else if (Size == 'h' && Name[17] == '1') // avx512.mask.psllv16.hi
2779           IID = Intrinsic::x86_avx512_psllv_w_256;
2780         else if (Name[17] == '3' && Name[18] == '2') // avx512.mask.psllv32hi
2781           IID = Intrinsic::x86_avx512_psllv_w_512;
2782         else
2783           llvm_unreachable("Unexpected size");
2784       } else if (Name.endswith(".128")) {
2785         if (Size == 'd') // avx512.mask.psll.d.128, avx512.mask.psll.di.128
2786           IID = IsImmediate ? Intrinsic::x86_sse2_pslli_d
2787                             : Intrinsic::x86_sse2_psll_d;
2788         else if (Size == 'q') // avx512.mask.psll.q.128, avx512.mask.psll.qi.128
2789           IID = IsImmediate ? Intrinsic::x86_sse2_pslli_q
2790                             : Intrinsic::x86_sse2_psll_q;
2791         else if (Size == 'w') // avx512.mask.psll.w.128, avx512.mask.psll.wi.128
2792           IID = IsImmediate ? Intrinsic::x86_sse2_pslli_w
2793                             : Intrinsic::x86_sse2_psll_w;
2794         else
2795           llvm_unreachable("Unexpected size");
2796       } else if (Name.endswith(".256")) {
2797         if (Size == 'd') // avx512.mask.psll.d.256, avx512.mask.psll.di.256
2798           IID = IsImmediate ? Intrinsic::x86_avx2_pslli_d
2799                             : Intrinsic::x86_avx2_psll_d;
2800         else if (Size == 'q') // avx512.mask.psll.q.256, avx512.mask.psll.qi.256
2801           IID = IsImmediate ? Intrinsic::x86_avx2_pslli_q
2802                             : Intrinsic::x86_avx2_psll_q;
2803         else if (Size == 'w') // avx512.mask.psll.w.256, avx512.mask.psll.wi.256
2804           IID = IsImmediate ? Intrinsic::x86_avx2_pslli_w
2805                             : Intrinsic::x86_avx2_psll_w;
2806         else
2807           llvm_unreachable("Unexpected size");
2808       } else {
2809         if (Size == 'd') // psll.di.512, pslli.d, psll.d, psllv.d.512
2810           IID = IsImmediate ? Intrinsic::x86_avx512_pslli_d_512 :
2811                 IsVariable  ? Intrinsic::x86_avx512_psllv_d_512 :
2812                               Intrinsic::x86_avx512_psll_d_512;
2813         else if (Size == 'q') // psll.qi.512, pslli.q, psll.q, psllv.q.512
2814           IID = IsImmediate ? Intrinsic::x86_avx512_pslli_q_512 :
2815                 IsVariable  ? Intrinsic::x86_avx512_psllv_q_512 :
2816                               Intrinsic::x86_avx512_psll_q_512;
2817         else if (Size == 'w') // psll.wi.512, pslli.w, psll.w
2818           IID = IsImmediate ? Intrinsic::x86_avx512_pslli_w_512
2819                             : Intrinsic::x86_avx512_psll_w_512;
2820         else
2821           llvm_unreachable("Unexpected size");
2822       }
2823 
2824       Rep = UpgradeX86MaskedShift(Builder, *CI, IID);
2825     } else if (IsX86 && Name.startswith("avx512.mask.psrl")) {
2826       bool IsImmediate = Name[16] == 'i' ||
2827                          (Name.size() > 18 && Name[18] == 'i');
2828       bool IsVariable = Name[16] == 'v';
2829       char Size = Name[16] == '.' ? Name[17] :
2830                   Name[17] == '.' ? Name[18] :
2831                   Name[18] == '.' ? Name[19] :
2832                                     Name[20];
2833 
2834       Intrinsic::ID IID;
2835       if (IsVariable && Name[17] != '.') {
2836         if (Size == 'd' && Name[17] == '2') // avx512.mask.psrlv2.di
2837           IID = Intrinsic::x86_avx2_psrlv_q;
2838         else if (Size == 'd' && Name[17] == '4') // avx512.mask.psrlv4.di
2839           IID = Intrinsic::x86_avx2_psrlv_q_256;
2840         else if (Size == 's' && Name[17] == '4') // avx512.mask.psrlv4.si
2841           IID = Intrinsic::x86_avx2_psrlv_d;
2842         else if (Size == 's' && Name[17] == '8') // avx512.mask.psrlv8.si
2843           IID = Intrinsic::x86_avx2_psrlv_d_256;
2844         else if (Size == 'h' && Name[17] == '8') // avx512.mask.psrlv8.hi
2845           IID = Intrinsic::x86_avx512_psrlv_w_128;
2846         else if (Size == 'h' && Name[17] == '1') // avx512.mask.psrlv16.hi
2847           IID = Intrinsic::x86_avx512_psrlv_w_256;
2848         else if (Name[17] == '3' && Name[18] == '2') // avx512.mask.psrlv32hi
2849           IID = Intrinsic::x86_avx512_psrlv_w_512;
2850         else
2851           llvm_unreachable("Unexpected size");
2852       } else if (Name.endswith(".128")) {
2853         if (Size == 'd') // avx512.mask.psrl.d.128, avx512.mask.psrl.di.128
2854           IID = IsImmediate ? Intrinsic::x86_sse2_psrli_d
2855                             : Intrinsic::x86_sse2_psrl_d;
2856         else if (Size == 'q') // avx512.mask.psrl.q.128, avx512.mask.psrl.qi.128
2857           IID = IsImmediate ? Intrinsic::x86_sse2_psrli_q
2858                             : Intrinsic::x86_sse2_psrl_q;
2859         else if (Size == 'w') // avx512.mask.psrl.w.128, avx512.mask.psrl.wi.128
2860           IID = IsImmediate ? Intrinsic::x86_sse2_psrli_w
2861                             : Intrinsic::x86_sse2_psrl_w;
2862         else
2863           llvm_unreachable("Unexpected size");
2864       } else if (Name.endswith(".256")) {
2865         if (Size == 'd') // avx512.mask.psrl.d.256, avx512.mask.psrl.di.256
2866           IID = IsImmediate ? Intrinsic::x86_avx2_psrli_d
2867                             : Intrinsic::x86_avx2_psrl_d;
2868         else if (Size == 'q') // avx512.mask.psrl.q.256, avx512.mask.psrl.qi.256
2869           IID = IsImmediate ? Intrinsic::x86_avx2_psrli_q
2870                             : Intrinsic::x86_avx2_psrl_q;
2871         else if (Size == 'w') // avx512.mask.psrl.w.256, avx512.mask.psrl.wi.256
2872           IID = IsImmediate ? Intrinsic::x86_avx2_psrli_w
2873                             : Intrinsic::x86_avx2_psrl_w;
2874         else
2875           llvm_unreachable("Unexpected size");
2876       } else {
2877         if (Size == 'd') // psrl.di.512, psrli.d, psrl.d, psrl.d.512
2878           IID = IsImmediate ? Intrinsic::x86_avx512_psrli_d_512 :
2879                 IsVariable  ? Intrinsic::x86_avx512_psrlv_d_512 :
2880                               Intrinsic::x86_avx512_psrl_d_512;
2881         else if (Size == 'q') // psrl.qi.512, psrli.q, psrl.q, psrl.q.512
2882           IID = IsImmediate ? Intrinsic::x86_avx512_psrli_q_512 :
2883                 IsVariable  ? Intrinsic::x86_avx512_psrlv_q_512 :
2884                               Intrinsic::x86_avx512_psrl_q_512;
2885         else if (Size == 'w') // psrl.wi.512, psrli.w, psrl.w)
2886           IID = IsImmediate ? Intrinsic::x86_avx512_psrli_w_512
2887                             : Intrinsic::x86_avx512_psrl_w_512;
2888         else
2889           llvm_unreachable("Unexpected size");
2890       }
2891 
2892       Rep = UpgradeX86MaskedShift(Builder, *CI, IID);
2893     } else if (IsX86 && Name.startswith("avx512.mask.psra")) {
2894       bool IsImmediate = Name[16] == 'i' ||
2895                          (Name.size() > 18 && Name[18] == 'i');
2896       bool IsVariable = Name[16] == 'v';
2897       char Size = Name[16] == '.' ? Name[17] :
2898                   Name[17] == '.' ? Name[18] :
2899                   Name[18] == '.' ? Name[19] :
2900                                     Name[20];
2901 
2902       Intrinsic::ID IID;
2903       if (IsVariable && Name[17] != '.') {
2904         if (Size == 's' && Name[17] == '4') // avx512.mask.psrav4.si
2905           IID = Intrinsic::x86_avx2_psrav_d;
2906         else if (Size == 's' && Name[17] == '8') // avx512.mask.psrav8.si
2907           IID = Intrinsic::x86_avx2_psrav_d_256;
2908         else if (Size == 'h' && Name[17] == '8') // avx512.mask.psrav8.hi
2909           IID = Intrinsic::x86_avx512_psrav_w_128;
2910         else if (Size == 'h' && Name[17] == '1') // avx512.mask.psrav16.hi
2911           IID = Intrinsic::x86_avx512_psrav_w_256;
2912         else if (Name[17] == '3' && Name[18] == '2') // avx512.mask.psrav32hi
2913           IID = Intrinsic::x86_avx512_psrav_w_512;
2914         else
2915           llvm_unreachable("Unexpected size");
2916       } else if (Name.endswith(".128")) {
2917         if (Size == 'd') // avx512.mask.psra.d.128, avx512.mask.psra.di.128
2918           IID = IsImmediate ? Intrinsic::x86_sse2_psrai_d
2919                             : Intrinsic::x86_sse2_psra_d;
2920         else if (Size == 'q') // avx512.mask.psra.q.128, avx512.mask.psra.qi.128
2921           IID = IsImmediate ? Intrinsic::x86_avx512_psrai_q_128 :
2922                 IsVariable  ? Intrinsic::x86_avx512_psrav_q_128 :
2923                               Intrinsic::x86_avx512_psra_q_128;
2924         else if (Size == 'w') // avx512.mask.psra.w.128, avx512.mask.psra.wi.128
2925           IID = IsImmediate ? Intrinsic::x86_sse2_psrai_w
2926                             : Intrinsic::x86_sse2_psra_w;
2927         else
2928           llvm_unreachable("Unexpected size");
2929       } else if (Name.endswith(".256")) {
2930         if (Size == 'd') // avx512.mask.psra.d.256, avx512.mask.psra.di.256
2931           IID = IsImmediate ? Intrinsic::x86_avx2_psrai_d
2932                             : Intrinsic::x86_avx2_psra_d;
2933         else if (Size == 'q') // avx512.mask.psra.q.256, avx512.mask.psra.qi.256
2934           IID = IsImmediate ? Intrinsic::x86_avx512_psrai_q_256 :
2935                 IsVariable  ? Intrinsic::x86_avx512_psrav_q_256 :
2936                               Intrinsic::x86_avx512_psra_q_256;
2937         else if (Size == 'w') // avx512.mask.psra.w.256, avx512.mask.psra.wi.256
2938           IID = IsImmediate ? Intrinsic::x86_avx2_psrai_w
2939                             : Intrinsic::x86_avx2_psra_w;
2940         else
2941           llvm_unreachable("Unexpected size");
2942       } else {
2943         if (Size == 'd') // psra.di.512, psrai.d, psra.d, psrav.d.512
2944           IID = IsImmediate ? Intrinsic::x86_avx512_psrai_d_512 :
2945                 IsVariable  ? Intrinsic::x86_avx512_psrav_d_512 :
2946                               Intrinsic::x86_avx512_psra_d_512;
2947         else if (Size == 'q') // psra.qi.512, psrai.q, psra.q
2948           IID = IsImmediate ? Intrinsic::x86_avx512_psrai_q_512 :
2949                 IsVariable  ? Intrinsic::x86_avx512_psrav_q_512 :
2950                               Intrinsic::x86_avx512_psra_q_512;
2951         else if (Size == 'w') // psra.wi.512, psrai.w, psra.w
2952           IID = IsImmediate ? Intrinsic::x86_avx512_psrai_w_512
2953                             : Intrinsic::x86_avx512_psra_w_512;
2954         else
2955           llvm_unreachable("Unexpected size");
2956       }
2957 
2958       Rep = UpgradeX86MaskedShift(Builder, *CI, IID);
2959     } else if (IsX86 && Name.startswith("avx512.mask.move.s")) {
2960       Rep = upgradeMaskedMove(Builder, *CI);
2961     } else if (IsX86 && Name.startswith("avx512.cvtmask2")) {
2962       Rep = UpgradeMaskToInt(Builder, *CI);
2963     } else if (IsX86 && Name.endswith(".movntdqa")) {
2964       Module *M = F->getParent();
2965       MDNode *Node = MDNode::get(
2966           C, ConstantAsMetadata::get(ConstantInt::get(Type::getInt32Ty(C), 1)));
2967 
2968       Value *Ptr = CI->getArgOperand(0);
2969       VectorType *VTy = cast<VectorType>(CI->getType());
2970 
2971       // Convert the type of the pointer to a pointer to the stored type.
2972       Value *BC =
2973           Builder.CreateBitCast(Ptr, PointerType::getUnqual(VTy), "cast");
2974       LoadInst *LI = Builder.CreateAlignedLoad(VTy, BC, VTy->getBitWidth() / 8);
2975       LI->setMetadata(M->getMDKindID("nontemporal"), Node);
2976       Rep = LI;
2977     } else if (IsX86 &&
2978                (Name.startswith("sse2.pavg") || Name.startswith("avx2.pavg") ||
2979                 Name.startswith("avx512.mask.pavg"))) {
2980       // llvm.x86.sse2.pavg.b/w, llvm.x86.avx2.pavg.b/w,
2981       // llvm.x86.avx512.mask.pavg.b/w
2982       Value *A = CI->getArgOperand(0);
2983       Value *B = CI->getArgOperand(1);
2984       VectorType *ZextType = VectorType::getExtendedElementVectorType(
2985           cast<VectorType>(A->getType()));
2986       Value *ExtendedA = Builder.CreateZExt(A, ZextType);
2987       Value *ExtendedB = Builder.CreateZExt(B, ZextType);
2988       Value *Sum = Builder.CreateAdd(ExtendedA, ExtendedB);
2989       Value *AddOne = Builder.CreateAdd(Sum, ConstantInt::get(ZextType, 1));
2990       Value *ShiftR = Builder.CreateLShr(AddOne, ConstantInt::get(ZextType, 1));
2991       Rep = Builder.CreateTrunc(ShiftR, A->getType());
2992       if (CI->getNumArgOperands() > 2) {
2993         Rep = EmitX86Select(Builder, CI->getArgOperand(3), Rep,
2994                             CI->getArgOperand(2));
2995       }
2996     } else if (IsX86 && (Name.startswith("fma.vfmadd.") ||
2997                          Name.startswith("fma.vfmsub.") ||
2998                          Name.startswith("fma.vfnmadd.") ||
2999                          Name.startswith("fma.vfnmsub."))) {
3000       bool NegMul = Name[6] == 'n';
3001       bool NegAcc = NegMul ? Name[8] == 's' : Name[7] == 's';
3002       bool IsScalar = NegMul ? Name[12] == 's' : Name[11] == 's';
3003 
3004       Value *Ops[] = { CI->getArgOperand(0), CI->getArgOperand(1),
3005                        CI->getArgOperand(2) };
3006 
3007       if (IsScalar) {
3008         Ops[0] = Builder.CreateExtractElement(Ops[0], (uint64_t)0);
3009         Ops[1] = Builder.CreateExtractElement(Ops[1], (uint64_t)0);
3010         Ops[2] = Builder.CreateExtractElement(Ops[2], (uint64_t)0);
3011       }
3012 
3013       if (NegMul && !IsScalar)
3014         Ops[0] = Builder.CreateFNeg(Ops[0]);
3015       if (NegMul && IsScalar)
3016         Ops[1] = Builder.CreateFNeg(Ops[1]);
3017       if (NegAcc)
3018         Ops[2] = Builder.CreateFNeg(Ops[2]);
3019 
3020       Rep = Builder.CreateCall(Intrinsic::getDeclaration(CI->getModule(),
3021                                                          Intrinsic::fma,
3022                                                          Ops[0]->getType()),
3023                                Ops);
3024 
3025       if (IsScalar)
3026         Rep = Builder.CreateInsertElement(CI->getArgOperand(0), Rep,
3027                                           (uint64_t)0);
3028     } else if (IsX86 && Name.startswith("fma4.vfmadd.s")) {
3029       Value *Ops[] = { CI->getArgOperand(0), CI->getArgOperand(1),
3030                        CI->getArgOperand(2) };
3031 
3032       Ops[0] = Builder.CreateExtractElement(Ops[0], (uint64_t)0);
3033       Ops[1] = Builder.CreateExtractElement(Ops[1], (uint64_t)0);
3034       Ops[2] = Builder.CreateExtractElement(Ops[2], (uint64_t)0);
3035 
3036       Rep = Builder.CreateCall(Intrinsic::getDeclaration(CI->getModule(),
3037                                                          Intrinsic::fma,
3038                                                          Ops[0]->getType()),
3039                                Ops);
3040 
3041       Rep = Builder.CreateInsertElement(Constant::getNullValue(CI->getType()),
3042                                         Rep, (uint64_t)0);
3043     } else if (IsX86 && (Name.startswith("avx512.mask.vfmadd.s") ||
3044                          Name.startswith("avx512.maskz.vfmadd.s") ||
3045                          Name.startswith("avx512.mask3.vfmadd.s") ||
3046                          Name.startswith("avx512.mask3.vfmsub.s") ||
3047                          Name.startswith("avx512.mask3.vfnmsub.s"))) {
3048       bool IsMask3 = Name[11] == '3';
3049       bool IsMaskZ = Name[11] == 'z';
3050       // Drop the "avx512.mask." to make it easier.
3051       Name = Name.drop_front(IsMask3 || IsMaskZ ? 13 : 12);
3052       bool NegMul = Name[2] == 'n';
3053       bool NegAcc = NegMul ? Name[4] == 's' : Name[3] == 's';
3054 
3055       Value *A = CI->getArgOperand(0);
3056       Value *B = CI->getArgOperand(1);
3057       Value *C = CI->getArgOperand(2);
3058 
3059       if (NegMul && (IsMask3 || IsMaskZ))
3060         A = Builder.CreateFNeg(A);
3061       if (NegMul && !(IsMask3 || IsMaskZ))
3062         B = Builder.CreateFNeg(B);
3063       if (NegAcc)
3064         C = Builder.CreateFNeg(C);
3065 
3066       A = Builder.CreateExtractElement(A, (uint64_t)0);
3067       B = Builder.CreateExtractElement(B, (uint64_t)0);
3068       C = Builder.CreateExtractElement(C, (uint64_t)0);
3069 
3070       if (!isa<ConstantInt>(CI->getArgOperand(4)) ||
3071           cast<ConstantInt>(CI->getArgOperand(4))->getZExtValue() != 4) {
3072         Value *Ops[] = { A, B, C, CI->getArgOperand(4) };
3073 
3074         Intrinsic::ID IID;
3075         if (Name.back() == 'd')
3076           IID = Intrinsic::x86_avx512_vfmadd_f64;
3077         else
3078           IID = Intrinsic::x86_avx512_vfmadd_f32;
3079         Function *FMA = Intrinsic::getDeclaration(CI->getModule(), IID);
3080         Rep = Builder.CreateCall(FMA, Ops);
3081       } else {
3082         Function *FMA = Intrinsic::getDeclaration(CI->getModule(),
3083                                                   Intrinsic::fma,
3084                                                   A->getType());
3085         Rep = Builder.CreateCall(FMA, { A, B, C });
3086       }
3087 
3088       Value *PassThru = IsMaskZ ? Constant::getNullValue(Rep->getType()) :
3089                         IsMask3 ? C : A;
3090 
3091       // For Mask3 with NegAcc, we need to create a new extractelement that
3092       // avoids the negation above.
3093       if (NegAcc && IsMask3)
3094         PassThru = Builder.CreateExtractElement(CI->getArgOperand(2),
3095                                                 (uint64_t)0);
3096 
3097       Rep = EmitX86ScalarSelect(Builder, CI->getArgOperand(3),
3098                                 Rep, PassThru);
3099       Rep = Builder.CreateInsertElement(CI->getArgOperand(IsMask3 ? 2 : 0),
3100                                         Rep, (uint64_t)0);
3101     } else if (IsX86 && (Name.startswith("avx512.mask.vfmadd.p") ||
3102                          Name.startswith("avx512.mask.vfnmadd.p") ||
3103                          Name.startswith("avx512.mask.vfnmsub.p") ||
3104                          Name.startswith("avx512.mask3.vfmadd.p") ||
3105                          Name.startswith("avx512.mask3.vfmsub.p") ||
3106                          Name.startswith("avx512.mask3.vfnmsub.p") ||
3107                          Name.startswith("avx512.maskz.vfmadd.p"))) {
3108       bool IsMask3 = Name[11] == '3';
3109       bool IsMaskZ = Name[11] == 'z';
3110       // Drop the "avx512.mask." to make it easier.
3111       Name = Name.drop_front(IsMask3 || IsMaskZ ? 13 : 12);
3112       bool NegMul = Name[2] == 'n';
3113       bool NegAcc = NegMul ? Name[4] == 's' : Name[3] == 's';
3114 
3115       Value *A = CI->getArgOperand(0);
3116       Value *B = CI->getArgOperand(1);
3117       Value *C = CI->getArgOperand(2);
3118 
3119       if (NegMul && (IsMask3 || IsMaskZ))
3120         A = Builder.CreateFNeg(A);
3121       if (NegMul && !(IsMask3 || IsMaskZ))
3122         B = Builder.CreateFNeg(B);
3123       if (NegAcc)
3124         C = Builder.CreateFNeg(C);
3125 
3126       if (CI->getNumArgOperands() == 5 &&
3127           (!isa<ConstantInt>(CI->getArgOperand(4)) ||
3128            cast<ConstantInt>(CI->getArgOperand(4))->getZExtValue() != 4)) {
3129         Intrinsic::ID IID;
3130         // Check the character before ".512" in string.
3131         if (Name[Name.size()-5] == 's')
3132           IID = Intrinsic::x86_avx512_vfmadd_ps_512;
3133         else
3134           IID = Intrinsic::x86_avx512_vfmadd_pd_512;
3135 
3136         Rep = Builder.CreateCall(Intrinsic::getDeclaration(F->getParent(), IID),
3137                                  { A, B, C, CI->getArgOperand(4) });
3138       } else {
3139         Function *FMA = Intrinsic::getDeclaration(CI->getModule(),
3140                                                   Intrinsic::fma,
3141                                                   A->getType());
3142         Rep = Builder.CreateCall(FMA, { A, B, C });
3143       }
3144 
3145       Value *PassThru = IsMaskZ ? llvm::Constant::getNullValue(CI->getType()) :
3146                         IsMask3 ? CI->getArgOperand(2) :
3147                                   CI->getArgOperand(0);
3148 
3149       Rep = EmitX86Select(Builder, CI->getArgOperand(3), Rep, PassThru);
3150     } else if (IsX86 && (Name.startswith("fma.vfmaddsub.p") ||
3151                          Name.startswith("fma.vfmsubadd.p"))) {
3152       bool IsSubAdd = Name[7] == 's';
3153       int NumElts = CI->getType()->getVectorNumElements();
3154 
3155       Value *Ops[] = { CI->getArgOperand(0), CI->getArgOperand(1),
3156                        CI->getArgOperand(2) };
3157 
3158       Function *FMA = Intrinsic::getDeclaration(CI->getModule(), Intrinsic::fma,
3159                                                 Ops[0]->getType());
3160       Value *Odd = Builder.CreateCall(FMA, Ops);
3161       Ops[2] = Builder.CreateFNeg(Ops[2]);
3162       Value *Even = Builder.CreateCall(FMA, Ops);
3163 
3164       if (IsSubAdd)
3165         std::swap(Even, Odd);
3166 
3167       SmallVector<uint32_t, 32> Idxs(NumElts);
3168       for (int i = 0; i != NumElts; ++i)
3169         Idxs[i] = i + (i % 2) * NumElts;
3170 
3171       Rep = Builder.CreateShuffleVector(Even, Odd, Idxs);
3172     } else if (IsX86 && (Name.startswith("avx512.mask.vfmaddsub.p") ||
3173                          Name.startswith("avx512.mask3.vfmaddsub.p") ||
3174                          Name.startswith("avx512.maskz.vfmaddsub.p") ||
3175                          Name.startswith("avx512.mask3.vfmsubadd.p"))) {
3176       bool IsMask3 = Name[11] == '3';
3177       bool IsMaskZ = Name[11] == 'z';
3178       // Drop the "avx512.mask." to make it easier.
3179       Name = Name.drop_front(IsMask3 || IsMaskZ ? 13 : 12);
3180       bool IsSubAdd = Name[3] == 's';
3181       if (CI->getNumArgOperands() == 5 &&
3182           (!isa<ConstantInt>(CI->getArgOperand(4)) ||
3183            cast<ConstantInt>(CI->getArgOperand(4))->getZExtValue() != 4)) {
3184         Intrinsic::ID IID;
3185         // Check the character before ".512" in string.
3186         if (Name[Name.size()-5] == 's')
3187           IID = Intrinsic::x86_avx512_vfmaddsub_ps_512;
3188         else
3189           IID = Intrinsic::x86_avx512_vfmaddsub_pd_512;
3190 
3191         Value *Ops[] = { CI->getArgOperand(0), CI->getArgOperand(1),
3192                          CI->getArgOperand(2), CI->getArgOperand(4) };
3193         if (IsSubAdd)
3194           Ops[2] = Builder.CreateFNeg(Ops[2]);
3195 
3196         Rep = Builder.CreateCall(Intrinsic::getDeclaration(F->getParent(), IID),
3197                                  {CI->getArgOperand(0), CI->getArgOperand(1),
3198                                   CI->getArgOperand(2), CI->getArgOperand(4)});
3199       } else {
3200         int NumElts = CI->getType()->getVectorNumElements();
3201 
3202         Value *Ops[] = { CI->getArgOperand(0), CI->getArgOperand(1),
3203                          CI->getArgOperand(2) };
3204 
3205         Function *FMA = Intrinsic::getDeclaration(CI->getModule(), Intrinsic::fma,
3206                                                   Ops[0]->getType());
3207         Value *Odd = Builder.CreateCall(FMA, Ops);
3208         Ops[2] = Builder.CreateFNeg(Ops[2]);
3209         Value *Even = Builder.CreateCall(FMA, Ops);
3210 
3211         if (IsSubAdd)
3212           std::swap(Even, Odd);
3213 
3214         SmallVector<uint32_t, 32> Idxs(NumElts);
3215         for (int i = 0; i != NumElts; ++i)
3216           Idxs[i] = i + (i % 2) * NumElts;
3217 
3218         Rep = Builder.CreateShuffleVector(Even, Odd, Idxs);
3219       }
3220 
3221       Value *PassThru = IsMaskZ ? llvm::Constant::getNullValue(CI->getType()) :
3222                         IsMask3 ? CI->getArgOperand(2) :
3223                                   CI->getArgOperand(0);
3224 
3225       Rep = EmitX86Select(Builder, CI->getArgOperand(3), Rep, PassThru);
3226     } else if (IsX86 && (Name.startswith("avx512.mask.pternlog.") ||
3227                          Name.startswith("avx512.maskz.pternlog."))) {
3228       bool ZeroMask = Name[11] == 'z';
3229       unsigned VecWidth = CI->getType()->getPrimitiveSizeInBits();
3230       unsigned EltWidth = CI->getType()->getScalarSizeInBits();
3231       Intrinsic::ID IID;
3232       if (VecWidth == 128 && EltWidth == 32)
3233         IID = Intrinsic::x86_avx512_pternlog_d_128;
3234       else if (VecWidth == 256 && EltWidth == 32)
3235         IID = Intrinsic::x86_avx512_pternlog_d_256;
3236       else if (VecWidth == 512 && EltWidth == 32)
3237         IID = Intrinsic::x86_avx512_pternlog_d_512;
3238       else if (VecWidth == 128 && EltWidth == 64)
3239         IID = Intrinsic::x86_avx512_pternlog_q_128;
3240       else if (VecWidth == 256 && EltWidth == 64)
3241         IID = Intrinsic::x86_avx512_pternlog_q_256;
3242       else if (VecWidth == 512 && EltWidth == 64)
3243         IID = Intrinsic::x86_avx512_pternlog_q_512;
3244       else
3245         llvm_unreachable("Unexpected intrinsic");
3246 
3247       Value *Args[] = { CI->getArgOperand(0) , CI->getArgOperand(1),
3248                         CI->getArgOperand(2), CI->getArgOperand(3) };
3249       Rep = Builder.CreateCall(Intrinsic::getDeclaration(CI->getModule(), IID),
3250                                Args);
3251       Value *PassThru = ZeroMask ? ConstantAggregateZero::get(CI->getType())
3252                                  : CI->getArgOperand(0);
3253       Rep = EmitX86Select(Builder, CI->getArgOperand(4), Rep, PassThru);
3254     } else if (IsX86 && (Name.startswith("avx512.mask.vpmadd52") ||
3255                          Name.startswith("avx512.maskz.vpmadd52"))) {
3256       bool ZeroMask = Name[11] == 'z';
3257       bool High = Name[20] == 'h' || Name[21] == 'h';
3258       unsigned VecWidth = CI->getType()->getPrimitiveSizeInBits();
3259       Intrinsic::ID IID;
3260       if (VecWidth == 128 && !High)
3261         IID = Intrinsic::x86_avx512_vpmadd52l_uq_128;
3262       else if (VecWidth == 256 && !High)
3263         IID = Intrinsic::x86_avx512_vpmadd52l_uq_256;
3264       else if (VecWidth == 512 && !High)
3265         IID = Intrinsic::x86_avx512_vpmadd52l_uq_512;
3266       else if (VecWidth == 128 && High)
3267         IID = Intrinsic::x86_avx512_vpmadd52h_uq_128;
3268       else if (VecWidth == 256 && High)
3269         IID = Intrinsic::x86_avx512_vpmadd52h_uq_256;
3270       else if (VecWidth == 512 && High)
3271         IID = Intrinsic::x86_avx512_vpmadd52h_uq_512;
3272       else
3273         llvm_unreachable("Unexpected intrinsic");
3274 
3275       Value *Args[] = { CI->getArgOperand(0) , CI->getArgOperand(1),
3276                         CI->getArgOperand(2) };
3277       Rep = Builder.CreateCall(Intrinsic::getDeclaration(CI->getModule(), IID),
3278                                Args);
3279       Value *PassThru = ZeroMask ? ConstantAggregateZero::get(CI->getType())
3280                                  : CI->getArgOperand(0);
3281       Rep = EmitX86Select(Builder, CI->getArgOperand(3), Rep, PassThru);
3282     } else if (IsX86 && (Name.startswith("avx512.mask.vpermi2var.") ||
3283                          Name.startswith("avx512.mask.vpermt2var.") ||
3284                          Name.startswith("avx512.maskz.vpermt2var."))) {
3285       bool ZeroMask = Name[11] == 'z';
3286       bool IndexForm = Name[17] == 'i';
3287       Rep = UpgradeX86VPERMT2Intrinsics(Builder, *CI, ZeroMask, IndexForm);
3288     } else if (IsX86 && (Name.startswith("avx512.mask.vpdpbusd.") ||
3289                          Name.startswith("avx512.maskz.vpdpbusd.") ||
3290                          Name.startswith("avx512.mask.vpdpbusds.") ||
3291                          Name.startswith("avx512.maskz.vpdpbusds."))) {
3292       bool ZeroMask = Name[11] == 'z';
3293       bool IsSaturating = Name[ZeroMask ? 21 : 20] == 's';
3294       unsigned VecWidth = CI->getType()->getPrimitiveSizeInBits();
3295       Intrinsic::ID IID;
3296       if (VecWidth == 128 && !IsSaturating)
3297         IID = Intrinsic::x86_avx512_vpdpbusd_128;
3298       else if (VecWidth == 256 && !IsSaturating)
3299         IID = Intrinsic::x86_avx512_vpdpbusd_256;
3300       else if (VecWidth == 512 && !IsSaturating)
3301         IID = Intrinsic::x86_avx512_vpdpbusd_512;
3302       else if (VecWidth == 128 && IsSaturating)
3303         IID = Intrinsic::x86_avx512_vpdpbusds_128;
3304       else if (VecWidth == 256 && IsSaturating)
3305         IID = Intrinsic::x86_avx512_vpdpbusds_256;
3306       else if (VecWidth == 512 && IsSaturating)
3307         IID = Intrinsic::x86_avx512_vpdpbusds_512;
3308       else
3309         llvm_unreachable("Unexpected intrinsic");
3310 
3311       Value *Args[] = { CI->getArgOperand(0), CI->getArgOperand(1),
3312                         CI->getArgOperand(2)  };
3313       Rep = Builder.CreateCall(Intrinsic::getDeclaration(CI->getModule(), IID),
3314                                Args);
3315       Value *PassThru = ZeroMask ? ConstantAggregateZero::get(CI->getType())
3316                                  : CI->getArgOperand(0);
3317       Rep = EmitX86Select(Builder, CI->getArgOperand(3), Rep, PassThru);
3318     } else if (IsX86 && (Name.startswith("avx512.mask.vpdpwssd.") ||
3319                          Name.startswith("avx512.maskz.vpdpwssd.") ||
3320                          Name.startswith("avx512.mask.vpdpwssds.") ||
3321                          Name.startswith("avx512.maskz.vpdpwssds."))) {
3322       bool ZeroMask = Name[11] == 'z';
3323       bool IsSaturating = Name[ZeroMask ? 21 : 20] == 's';
3324       unsigned VecWidth = CI->getType()->getPrimitiveSizeInBits();
3325       Intrinsic::ID IID;
3326       if (VecWidth == 128 && !IsSaturating)
3327         IID = Intrinsic::x86_avx512_vpdpwssd_128;
3328       else if (VecWidth == 256 && !IsSaturating)
3329         IID = Intrinsic::x86_avx512_vpdpwssd_256;
3330       else if (VecWidth == 512 && !IsSaturating)
3331         IID = Intrinsic::x86_avx512_vpdpwssd_512;
3332       else if (VecWidth == 128 && IsSaturating)
3333         IID = Intrinsic::x86_avx512_vpdpwssds_128;
3334       else if (VecWidth == 256 && IsSaturating)
3335         IID = Intrinsic::x86_avx512_vpdpwssds_256;
3336       else if (VecWidth == 512 && IsSaturating)
3337         IID = Intrinsic::x86_avx512_vpdpwssds_512;
3338       else
3339         llvm_unreachable("Unexpected intrinsic");
3340 
3341       Value *Args[] = { CI->getArgOperand(0), CI->getArgOperand(1),
3342                         CI->getArgOperand(2)  };
3343       Rep = Builder.CreateCall(Intrinsic::getDeclaration(CI->getModule(), IID),
3344                                Args);
3345       Value *PassThru = ZeroMask ? ConstantAggregateZero::get(CI->getType())
3346                                  : CI->getArgOperand(0);
3347       Rep = EmitX86Select(Builder, CI->getArgOperand(3), Rep, PassThru);
3348     } else if (IsX86 && (Name == "addcarryx.u32" || Name == "addcarryx.u64" ||
3349                          Name == "addcarry.u32" || Name == "addcarry.u64" ||
3350                          Name == "subborrow.u32" || Name == "subborrow.u64")) {
3351       Intrinsic::ID IID;
3352       if (Name[0] == 'a' && Name.back() == '2')
3353         IID = Intrinsic::x86_addcarry_32;
3354       else if (Name[0] == 'a' && Name.back() == '4')
3355         IID = Intrinsic::x86_addcarry_64;
3356       else if (Name[0] == 's' && Name.back() == '2')
3357         IID = Intrinsic::x86_subborrow_32;
3358       else if (Name[0] == 's' && Name.back() == '4')
3359         IID = Intrinsic::x86_subborrow_64;
3360       else
3361         llvm_unreachable("Unexpected intrinsic");
3362 
3363       // Make a call with 3 operands.
3364       Value *Args[] = { CI->getArgOperand(0), CI->getArgOperand(1),
3365                         CI->getArgOperand(2)};
3366       Value *NewCall = Builder.CreateCall(
3367                                 Intrinsic::getDeclaration(CI->getModule(), IID),
3368                                 Args);
3369 
3370       // Extract the second result and store it.
3371       Value *Data = Builder.CreateExtractValue(NewCall, 1);
3372       // Cast the pointer to the right type.
3373       Value *Ptr = Builder.CreateBitCast(CI->getArgOperand(3),
3374                                  llvm::PointerType::getUnqual(Data->getType()));
3375       Builder.CreateAlignedStore(Data, Ptr, 1);
3376       // Replace the original call result with the first result of the new call.
3377       Value *CF = Builder.CreateExtractValue(NewCall, 0);
3378 
3379       CI->replaceAllUsesWith(CF);
3380       Rep = nullptr;
3381     } else if (IsX86 && Name.startswith("avx512.mask.") &&
3382                upgradeAVX512MaskToSelect(Name, Builder, *CI, Rep)) {
3383       // Rep will be updated by the call in the condition.
3384     } else if (IsNVVM && (Name == "abs.i" || Name == "abs.ll")) {
3385       Value *Arg = CI->getArgOperand(0);
3386       Value *Neg = Builder.CreateNeg(Arg, "neg");
3387       Value *Cmp = Builder.CreateICmpSGE(
3388           Arg, llvm::Constant::getNullValue(Arg->getType()), "abs.cond");
3389       Rep = Builder.CreateSelect(Cmp, Arg, Neg, "abs");
3390     } else if (IsNVVM && (Name == "max.i" || Name == "max.ll" ||
3391                           Name == "max.ui" || Name == "max.ull")) {
3392       Value *Arg0 = CI->getArgOperand(0);
3393       Value *Arg1 = CI->getArgOperand(1);
3394       Value *Cmp = Name.endswith(".ui") || Name.endswith(".ull")
3395                        ? Builder.CreateICmpUGE(Arg0, Arg1, "max.cond")
3396                        : Builder.CreateICmpSGE(Arg0, Arg1, "max.cond");
3397       Rep = Builder.CreateSelect(Cmp, Arg0, Arg1, "max");
3398     } else if (IsNVVM && (Name == "min.i" || Name == "min.ll" ||
3399                           Name == "min.ui" || Name == "min.ull")) {
3400       Value *Arg0 = CI->getArgOperand(0);
3401       Value *Arg1 = CI->getArgOperand(1);
3402       Value *Cmp = Name.endswith(".ui") || Name.endswith(".ull")
3403                        ? Builder.CreateICmpULE(Arg0, Arg1, "min.cond")
3404                        : Builder.CreateICmpSLE(Arg0, Arg1, "min.cond");
3405       Rep = Builder.CreateSelect(Cmp, Arg0, Arg1, "min");
3406     } else if (IsNVVM && Name == "clz.ll") {
3407       // llvm.nvvm.clz.ll returns an i32, but llvm.ctlz.i64 and returns an i64.
3408       Value *Arg = CI->getArgOperand(0);
3409       Value *Ctlz = Builder.CreateCall(
3410           Intrinsic::getDeclaration(F->getParent(), Intrinsic::ctlz,
3411                                     {Arg->getType()}),
3412           {Arg, Builder.getFalse()}, "ctlz");
3413       Rep = Builder.CreateTrunc(Ctlz, Builder.getInt32Ty(), "ctlz.trunc");
3414     } else if (IsNVVM && Name == "popc.ll") {
3415       // llvm.nvvm.popc.ll returns an i32, but llvm.ctpop.i64 and returns an
3416       // i64.
3417       Value *Arg = CI->getArgOperand(0);
3418       Value *Popc = Builder.CreateCall(
3419           Intrinsic::getDeclaration(F->getParent(), Intrinsic::ctpop,
3420                                     {Arg->getType()}),
3421           Arg, "ctpop");
3422       Rep = Builder.CreateTrunc(Popc, Builder.getInt32Ty(), "ctpop.trunc");
3423     } else if (IsNVVM && Name == "h2f") {
3424       Rep = Builder.CreateCall(Intrinsic::getDeclaration(
3425                                    F->getParent(), Intrinsic::convert_from_fp16,
3426                                    {Builder.getFloatTy()}),
3427                                CI->getArgOperand(0), "h2f");
3428     } else {
3429       llvm_unreachable("Unknown function for CallInst upgrade.");
3430     }
3431 
3432     if (Rep)
3433       CI->replaceAllUsesWith(Rep);
3434     CI->eraseFromParent();
3435     return;
3436   }
3437 
3438   const auto &DefaultCase = [&NewFn, &CI]() -> void {
3439     // Handle generic mangling change, but nothing else
3440     assert(
3441         (CI->getCalledFunction()->getName() != NewFn->getName()) &&
3442         "Unknown function for CallInst upgrade and isn't just a name change");
3443     CI->setCalledFunction(NewFn);
3444   };
3445   CallInst *NewCall = nullptr;
3446   switch (NewFn->getIntrinsicID()) {
3447   default: {
3448     DefaultCase();
3449     return;
3450   }
3451 
3452   case Intrinsic::arm_neon_vld1:
3453   case Intrinsic::arm_neon_vld2:
3454   case Intrinsic::arm_neon_vld3:
3455   case Intrinsic::arm_neon_vld4:
3456   case Intrinsic::arm_neon_vld2lane:
3457   case Intrinsic::arm_neon_vld3lane:
3458   case Intrinsic::arm_neon_vld4lane:
3459   case Intrinsic::arm_neon_vst1:
3460   case Intrinsic::arm_neon_vst2:
3461   case Intrinsic::arm_neon_vst3:
3462   case Intrinsic::arm_neon_vst4:
3463   case Intrinsic::arm_neon_vst2lane:
3464   case Intrinsic::arm_neon_vst3lane:
3465   case Intrinsic::arm_neon_vst4lane: {
3466     SmallVector<Value *, 4> Args(CI->arg_operands().begin(),
3467                                  CI->arg_operands().end());
3468     NewCall = Builder.CreateCall(NewFn, Args);
3469     break;
3470   }
3471 
3472   case Intrinsic::bitreverse:
3473     NewCall = Builder.CreateCall(NewFn, {CI->getArgOperand(0)});
3474     break;
3475 
3476   case Intrinsic::ctlz:
3477   case Intrinsic::cttz:
3478     assert(CI->getNumArgOperands() == 1 &&
3479            "Mismatch between function args and call args");
3480     NewCall =
3481         Builder.CreateCall(NewFn, {CI->getArgOperand(0), Builder.getFalse()});
3482     break;
3483 
3484   case Intrinsic::objectsize: {
3485     Value *NullIsUnknownSize = CI->getNumArgOperands() == 2
3486                                    ? Builder.getFalse()
3487                                    : CI->getArgOperand(2);
3488     Value *Dynamic =
3489         CI->getNumArgOperands() < 4 ? Builder.getFalse() : CI->getArgOperand(3);
3490     NewCall = Builder.CreateCall(
3491         NewFn, {CI->getArgOperand(0), CI->getArgOperand(1), NullIsUnknownSize, Dynamic});
3492     break;
3493   }
3494 
3495   case Intrinsic::ctpop:
3496     NewCall = Builder.CreateCall(NewFn, {CI->getArgOperand(0)});
3497     break;
3498 
3499   case Intrinsic::convert_from_fp16:
3500     NewCall = Builder.CreateCall(NewFn, {CI->getArgOperand(0)});
3501     break;
3502 
3503   case Intrinsic::dbg_value:
3504     // Upgrade from the old version that had an extra offset argument.
3505     assert(CI->getNumArgOperands() == 4);
3506     // Drop nonzero offsets instead of attempting to upgrade them.
3507     if (auto *Offset = dyn_cast_or_null<Constant>(CI->getArgOperand(1)))
3508       if (Offset->isZeroValue()) {
3509         NewCall = Builder.CreateCall(
3510             NewFn,
3511             {CI->getArgOperand(0), CI->getArgOperand(2), CI->getArgOperand(3)});
3512         break;
3513       }
3514     CI->eraseFromParent();
3515     return;
3516 
3517   case Intrinsic::x86_xop_vfrcz_ss:
3518   case Intrinsic::x86_xop_vfrcz_sd:
3519     NewCall = Builder.CreateCall(NewFn, {CI->getArgOperand(1)});
3520     break;
3521 
3522   case Intrinsic::x86_xop_vpermil2pd:
3523   case Intrinsic::x86_xop_vpermil2ps:
3524   case Intrinsic::x86_xop_vpermil2pd_256:
3525   case Intrinsic::x86_xop_vpermil2ps_256: {
3526     SmallVector<Value *, 4> Args(CI->arg_operands().begin(),
3527                                  CI->arg_operands().end());
3528     VectorType *FltIdxTy = cast<VectorType>(Args[2]->getType());
3529     VectorType *IntIdxTy = VectorType::getInteger(FltIdxTy);
3530     Args[2] = Builder.CreateBitCast(Args[2], IntIdxTy);
3531     NewCall = Builder.CreateCall(NewFn, Args);
3532     break;
3533   }
3534 
3535   case Intrinsic::x86_sse41_ptestc:
3536   case Intrinsic::x86_sse41_ptestz:
3537   case Intrinsic::x86_sse41_ptestnzc: {
3538     // The arguments for these intrinsics used to be v4f32, and changed
3539     // to v2i64. This is purely a nop, since those are bitwise intrinsics.
3540     // So, the only thing required is a bitcast for both arguments.
3541     // First, check the arguments have the old type.
3542     Value *Arg0 = CI->getArgOperand(0);
3543     if (Arg0->getType() != VectorType::get(Type::getFloatTy(C), 4))
3544       return;
3545 
3546     // Old intrinsic, add bitcasts
3547     Value *Arg1 = CI->getArgOperand(1);
3548 
3549     Type *NewVecTy = VectorType::get(Type::getInt64Ty(C), 2);
3550 
3551     Value *BC0 = Builder.CreateBitCast(Arg0, NewVecTy, "cast");
3552     Value *BC1 = Builder.CreateBitCast(Arg1, NewVecTy, "cast");
3553 
3554     NewCall = Builder.CreateCall(NewFn, {BC0, BC1});
3555     break;
3556   }
3557 
3558   case Intrinsic::x86_rdtscp: {
3559     // This used to take 1 arguments. If we have no arguments, it is already
3560     // upgraded.
3561     if (CI->getNumOperands() == 0)
3562       return;
3563 
3564     NewCall = Builder.CreateCall(NewFn);
3565     // Extract the second result and store it.
3566     Value *Data = Builder.CreateExtractValue(NewCall, 1);
3567     // Cast the pointer to the right type.
3568     Value *Ptr = Builder.CreateBitCast(CI->getArgOperand(0),
3569                                  llvm::PointerType::getUnqual(Data->getType()));
3570     Builder.CreateAlignedStore(Data, Ptr, 1);
3571     // Replace the original call result with the first result of the new call.
3572     Value *TSC = Builder.CreateExtractValue(NewCall, 0);
3573 
3574     std::string Name = CI->getName();
3575     if (!Name.empty()) {
3576       CI->setName(Name + ".old");
3577       NewCall->setName(Name);
3578     }
3579     CI->replaceAllUsesWith(TSC);
3580     CI->eraseFromParent();
3581     return;
3582   }
3583 
3584   case Intrinsic::x86_sse41_insertps:
3585   case Intrinsic::x86_sse41_dppd:
3586   case Intrinsic::x86_sse41_dpps:
3587   case Intrinsic::x86_sse41_mpsadbw:
3588   case Intrinsic::x86_avx_dp_ps_256:
3589   case Intrinsic::x86_avx2_mpsadbw: {
3590     // Need to truncate the last argument from i32 to i8 -- this argument models
3591     // an inherently 8-bit immediate operand to these x86 instructions.
3592     SmallVector<Value *, 4> Args(CI->arg_operands().begin(),
3593                                  CI->arg_operands().end());
3594 
3595     // Replace the last argument with a trunc.
3596     Args.back() = Builder.CreateTrunc(Args.back(), Type::getInt8Ty(C), "trunc");
3597     NewCall = Builder.CreateCall(NewFn, Args);
3598     break;
3599   }
3600 
3601   case Intrinsic::thread_pointer: {
3602     NewCall = Builder.CreateCall(NewFn, {});
3603     break;
3604   }
3605 
3606   case Intrinsic::invariant_start:
3607   case Intrinsic::invariant_end:
3608   case Intrinsic::masked_load:
3609   case Intrinsic::masked_store:
3610   case Intrinsic::masked_gather:
3611   case Intrinsic::masked_scatter: {
3612     SmallVector<Value *, 4> Args(CI->arg_operands().begin(),
3613                                  CI->arg_operands().end());
3614     NewCall = Builder.CreateCall(NewFn, Args);
3615     break;
3616   }
3617 
3618   case Intrinsic::memcpy:
3619   case Intrinsic::memmove:
3620   case Intrinsic::memset: {
3621     // We have to make sure that the call signature is what we're expecting.
3622     // We only want to change the old signatures by removing the alignment arg:
3623     //  @llvm.mem[cpy|move]...(i8*, i8*, i[32|i64], i32, i1)
3624     //    -> @llvm.mem[cpy|move]...(i8*, i8*, i[32|i64], i1)
3625     //  @llvm.memset...(i8*, i8, i[32|64], i32, i1)
3626     //    -> @llvm.memset...(i8*, i8, i[32|64], i1)
3627     // Note: i8*'s in the above can be any pointer type
3628     if (CI->getNumArgOperands() != 5) {
3629       DefaultCase();
3630       return;
3631     }
3632     // Remove alignment argument (3), and add alignment attributes to the
3633     // dest/src pointers.
3634     Value *Args[4] = {CI->getArgOperand(0), CI->getArgOperand(1),
3635                       CI->getArgOperand(2), CI->getArgOperand(4)};
3636     NewCall = Builder.CreateCall(NewFn, Args);
3637     auto *MemCI = cast<MemIntrinsic>(NewCall);
3638     // All mem intrinsics support dest alignment.
3639     const ConstantInt *Align = cast<ConstantInt>(CI->getArgOperand(3));
3640     MemCI->setDestAlignment(Align->getZExtValue());
3641     // Memcpy/Memmove also support source alignment.
3642     if (auto *MTI = dyn_cast<MemTransferInst>(MemCI))
3643       MTI->setSourceAlignment(Align->getZExtValue());
3644     break;
3645   }
3646   }
3647   assert(NewCall && "Should have either set this variable or returned through "
3648                     "the default case");
3649   std::string Name = CI->getName();
3650   if (!Name.empty()) {
3651     CI->setName(Name + ".old");
3652     NewCall->setName(Name);
3653   }
3654   CI->replaceAllUsesWith(NewCall);
3655   CI->eraseFromParent();
3656 }
3657 
3658 void llvm::UpgradeCallsToIntrinsic(Function *F) {
3659   assert(F && "Illegal attempt to upgrade a non-existent intrinsic.");
3660 
3661   // Check if this function should be upgraded and get the replacement function
3662   // if there is one.
3663   Function *NewFn;
3664   if (UpgradeIntrinsicFunction(F, NewFn)) {
3665     // Replace all users of the old function with the new function or new
3666     // instructions. This is not a range loop because the call is deleted.
3667     for (auto UI = F->user_begin(), UE = F->user_end(); UI != UE; )
3668       if (CallInst *CI = dyn_cast<CallInst>(*UI++))
3669         UpgradeIntrinsicCall(CI, NewFn);
3670 
3671     // Remove old function, no longer used, from the module.
3672     F->eraseFromParent();
3673   }
3674 }
3675 
3676 MDNode *llvm::UpgradeTBAANode(MDNode &MD) {
3677   // Check if the tag uses struct-path aware TBAA format.
3678   if (isa<MDNode>(MD.getOperand(0)) && MD.getNumOperands() >= 3)
3679     return &MD;
3680 
3681   auto &Context = MD.getContext();
3682   if (MD.getNumOperands() == 3) {
3683     Metadata *Elts[] = {MD.getOperand(0), MD.getOperand(1)};
3684     MDNode *ScalarType = MDNode::get(Context, Elts);
3685     // Create a MDNode <ScalarType, ScalarType, offset 0, const>
3686     Metadata *Elts2[] = {ScalarType, ScalarType,
3687                          ConstantAsMetadata::get(
3688                              Constant::getNullValue(Type::getInt64Ty(Context))),
3689                          MD.getOperand(2)};
3690     return MDNode::get(Context, Elts2);
3691   }
3692   // Create a MDNode <MD, MD, offset 0>
3693   Metadata *Elts[] = {&MD, &MD, ConstantAsMetadata::get(Constant::getNullValue(
3694                                     Type::getInt64Ty(Context)))};
3695   return MDNode::get(Context, Elts);
3696 }
3697 
3698 Instruction *llvm::UpgradeBitCastInst(unsigned Opc, Value *V, Type *DestTy,
3699                                       Instruction *&Temp) {
3700   if (Opc != Instruction::BitCast)
3701     return nullptr;
3702 
3703   Temp = nullptr;
3704   Type *SrcTy = V->getType();
3705   if (SrcTy->isPtrOrPtrVectorTy() && DestTy->isPtrOrPtrVectorTy() &&
3706       SrcTy->getPointerAddressSpace() != DestTy->getPointerAddressSpace()) {
3707     LLVMContext &Context = V->getContext();
3708 
3709     // We have no information about target data layout, so we assume that
3710     // the maximum pointer size is 64bit.
3711     Type *MidTy = Type::getInt64Ty(Context);
3712     Temp = CastInst::Create(Instruction::PtrToInt, V, MidTy);
3713 
3714     return CastInst::Create(Instruction::IntToPtr, Temp, DestTy);
3715   }
3716 
3717   return nullptr;
3718 }
3719 
3720 Value *llvm::UpgradeBitCastExpr(unsigned Opc, Constant *C, Type *DestTy) {
3721   if (Opc != Instruction::BitCast)
3722     return nullptr;
3723 
3724   Type *SrcTy = C->getType();
3725   if (SrcTy->isPtrOrPtrVectorTy() && DestTy->isPtrOrPtrVectorTy() &&
3726       SrcTy->getPointerAddressSpace() != DestTy->getPointerAddressSpace()) {
3727     LLVMContext &Context = C->getContext();
3728 
3729     // We have no information about target data layout, so we assume that
3730     // the maximum pointer size is 64bit.
3731     Type *MidTy = Type::getInt64Ty(Context);
3732 
3733     return ConstantExpr::getIntToPtr(ConstantExpr::getPtrToInt(C, MidTy),
3734                                      DestTy);
3735   }
3736 
3737   return nullptr;
3738 }
3739 
3740 /// Check the debug info version number, if it is out-dated, drop the debug
3741 /// info. Return true if module is modified.
3742 bool llvm::UpgradeDebugInfo(Module &M) {
3743   unsigned Version = getDebugMetadataVersionFromModule(M);
3744   if (Version == DEBUG_METADATA_VERSION) {
3745     bool BrokenDebugInfo = false;
3746     if (verifyModule(M, &llvm::errs(), &BrokenDebugInfo))
3747       report_fatal_error("Broken module found, compilation aborted!");
3748     if (!BrokenDebugInfo)
3749       // Everything is ok.
3750       return false;
3751     else {
3752       // Diagnose malformed debug info.
3753       DiagnosticInfoIgnoringInvalidDebugMetadata Diag(M);
3754       M.getContext().diagnose(Diag);
3755     }
3756   }
3757   bool Modified = StripDebugInfo(M);
3758   if (Modified && Version != DEBUG_METADATA_VERSION) {
3759     // Diagnose a version mismatch.
3760     DiagnosticInfoDebugMetadataVersion DiagVersion(M, Version);
3761     M.getContext().diagnose(DiagVersion);
3762   }
3763   return Modified;
3764 }
3765 
3766 bool llvm::UpgradeRetainReleaseMarker(Module &M) {
3767   bool Changed = false;
3768   const char *MarkerKey = "clang.arc.retainAutoreleasedReturnValueMarker";
3769   NamedMDNode *ModRetainReleaseMarker = M.getNamedMetadata(MarkerKey);
3770   if (ModRetainReleaseMarker) {
3771     MDNode *Op = ModRetainReleaseMarker->getOperand(0);
3772     if (Op) {
3773       MDString *ID = dyn_cast_or_null<MDString>(Op->getOperand(0));
3774       if (ID) {
3775         SmallVector<StringRef, 4> ValueComp;
3776         ID->getString().split(ValueComp, "#");
3777         if (ValueComp.size() == 2) {
3778           std::string NewValue = ValueComp[0].str() + ";" + ValueComp[1].str();
3779           ID = MDString::get(M.getContext(), NewValue);
3780         }
3781         M.addModuleFlag(Module::Error, MarkerKey, ID);
3782         M.eraseNamedMetadata(ModRetainReleaseMarker);
3783         Changed = true;
3784       }
3785     }
3786   }
3787   return Changed;
3788 }
3789 
3790 bool llvm::UpgradeModuleFlags(Module &M) {
3791   NamedMDNode *ModFlags = M.getModuleFlagsMetadata();
3792   if (!ModFlags)
3793     return false;
3794 
3795   bool HasObjCFlag = false, HasClassProperties = false, Changed = false;
3796   for (unsigned I = 0, E = ModFlags->getNumOperands(); I != E; ++I) {
3797     MDNode *Op = ModFlags->getOperand(I);
3798     if (Op->getNumOperands() != 3)
3799       continue;
3800     MDString *ID = dyn_cast_or_null<MDString>(Op->getOperand(1));
3801     if (!ID)
3802       continue;
3803     if (ID->getString() == "Objective-C Image Info Version")
3804       HasObjCFlag = true;
3805     if (ID->getString() == "Objective-C Class Properties")
3806       HasClassProperties = true;
3807     // Upgrade PIC/PIE Module Flags. The module flag behavior for these two
3808     // field was Error and now they are Max.
3809     if (ID->getString() == "PIC Level" || ID->getString() == "PIE Level") {
3810       if (auto *Behavior =
3811               mdconst::dyn_extract_or_null<ConstantInt>(Op->getOperand(0))) {
3812         if (Behavior->getLimitedValue() == Module::Error) {
3813           Type *Int32Ty = Type::getInt32Ty(M.getContext());
3814           Metadata *Ops[3] = {
3815               ConstantAsMetadata::get(ConstantInt::get(Int32Ty, Module::Max)),
3816               MDString::get(M.getContext(), ID->getString()),
3817               Op->getOperand(2)};
3818           ModFlags->setOperand(I, MDNode::get(M.getContext(), Ops));
3819           Changed = true;
3820         }
3821       }
3822     }
3823     // Upgrade Objective-C Image Info Section. Removed the whitespce in the
3824     // section name so that llvm-lto will not complain about mismatching
3825     // module flags that is functionally the same.
3826     if (ID->getString() == "Objective-C Image Info Section") {
3827       if (auto *Value = dyn_cast_or_null<MDString>(Op->getOperand(2))) {
3828         SmallVector<StringRef, 4> ValueComp;
3829         Value->getString().split(ValueComp, " ");
3830         if (ValueComp.size() != 1) {
3831           std::string NewValue;
3832           for (auto &S : ValueComp)
3833             NewValue += S.str();
3834           Metadata *Ops[3] = {Op->getOperand(0), Op->getOperand(1),
3835                               MDString::get(M.getContext(), NewValue)};
3836           ModFlags->setOperand(I, MDNode::get(M.getContext(), Ops));
3837           Changed = true;
3838         }
3839       }
3840     }
3841   }
3842 
3843   // "Objective-C Class Properties" is recently added for Objective-C. We
3844   // upgrade ObjC bitcodes to contain a "Objective-C Class Properties" module
3845   // flag of value 0, so we can correclty downgrade this flag when trying to
3846   // link an ObjC bitcode without this module flag with an ObjC bitcode with
3847   // this module flag.
3848   if (HasObjCFlag && !HasClassProperties) {
3849     M.addModuleFlag(llvm::Module::Override, "Objective-C Class Properties",
3850                     (uint32_t)0);
3851     Changed = true;
3852   }
3853 
3854   return Changed;
3855 }
3856 
3857 void llvm::UpgradeSectionAttributes(Module &M) {
3858   auto TrimSpaces = [](StringRef Section) -> std::string {
3859     SmallVector<StringRef, 5> Components;
3860     Section.split(Components, ',');
3861 
3862     SmallString<32> Buffer;
3863     raw_svector_ostream OS(Buffer);
3864 
3865     for (auto Component : Components)
3866       OS << ',' << Component.trim();
3867 
3868     return OS.str().substr(1);
3869   };
3870 
3871   for (auto &GV : M.globals()) {
3872     if (!GV.hasSection())
3873       continue;
3874 
3875     StringRef Section = GV.getSection();
3876 
3877     if (!Section.startswith("__DATA, __objc_catlist"))
3878       continue;
3879 
3880     // __DATA, __objc_catlist, regular, no_dead_strip
3881     // __DATA,__objc_catlist,regular,no_dead_strip
3882     GV.setSection(TrimSpaces(Section));
3883   }
3884 }
3885 
3886 static bool isOldLoopArgument(Metadata *MD) {
3887   auto *T = dyn_cast_or_null<MDTuple>(MD);
3888   if (!T)
3889     return false;
3890   if (T->getNumOperands() < 1)
3891     return false;
3892   auto *S = dyn_cast_or_null<MDString>(T->getOperand(0));
3893   if (!S)
3894     return false;
3895   return S->getString().startswith("llvm.vectorizer.");
3896 }
3897 
3898 static MDString *upgradeLoopTag(LLVMContext &C, StringRef OldTag) {
3899   StringRef OldPrefix = "llvm.vectorizer.";
3900   assert(OldTag.startswith(OldPrefix) && "Expected old prefix");
3901 
3902   if (OldTag == "llvm.vectorizer.unroll")
3903     return MDString::get(C, "llvm.loop.interleave.count");
3904 
3905   return MDString::get(
3906       C, (Twine("llvm.loop.vectorize.") + OldTag.drop_front(OldPrefix.size()))
3907              .str());
3908 }
3909 
3910 static Metadata *upgradeLoopArgument(Metadata *MD) {
3911   auto *T = dyn_cast_or_null<MDTuple>(MD);
3912   if (!T)
3913     return MD;
3914   if (T->getNumOperands() < 1)
3915     return MD;
3916   auto *OldTag = dyn_cast_or_null<MDString>(T->getOperand(0));
3917   if (!OldTag)
3918     return MD;
3919   if (!OldTag->getString().startswith("llvm.vectorizer."))
3920     return MD;
3921 
3922   // This has an old tag.  Upgrade it.
3923   SmallVector<Metadata *, 8> Ops;
3924   Ops.reserve(T->getNumOperands());
3925   Ops.push_back(upgradeLoopTag(T->getContext(), OldTag->getString()));
3926   for (unsigned I = 1, E = T->getNumOperands(); I != E; ++I)
3927     Ops.push_back(T->getOperand(I));
3928 
3929   return MDTuple::get(T->getContext(), Ops);
3930 }
3931 
3932 MDNode *llvm::upgradeInstructionLoopAttachment(MDNode &N) {
3933   auto *T = dyn_cast<MDTuple>(&N);
3934   if (!T)
3935     return &N;
3936 
3937   if (none_of(T->operands(), isOldLoopArgument))
3938     return &N;
3939 
3940   SmallVector<Metadata *, 8> Ops;
3941   Ops.reserve(T->getNumOperands());
3942   for (Metadata *MD : T->operands())
3943     Ops.push_back(upgradeLoopArgument(MD));
3944 
3945   return MDTuple::get(T->getContext(), Ops);
3946 }
3947