1 //===-- AutoUpgrade.cpp - Implement auto-upgrade helper functions ---------===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 // This file implements the auto-upgrade helper functions.
10 // This is where deprecated IR intrinsics and other IR features are updated to
11 // current specifications.
12 //
13 //===----------------------------------------------------------------------===//
14 
15 #include "llvm/IR/AutoUpgrade.h"
16 #include "llvm/ADT/StringSwitch.h"
17 #include "llvm/IR/Constants.h"
18 #include "llvm/IR/DIBuilder.h"
19 #include "llvm/IR/DebugInfo.h"
20 #include "llvm/IR/DiagnosticInfo.h"
21 #include "llvm/IR/Function.h"
22 #include "llvm/IR/IRBuilder.h"
23 #include "llvm/IR/Instruction.h"
24 #include "llvm/IR/IntrinsicInst.h"
25 #include "llvm/IR/LLVMContext.h"
26 #include "llvm/IR/Module.h"
27 #include "llvm/IR/Verifier.h"
28 #include "llvm/Support/ErrorHandling.h"
29 #include "llvm/Support/Regex.h"
30 #include <cstring>
31 using namespace llvm;
32 
33 static void rename(GlobalValue *GV) { GV->setName(GV->getName() + ".old"); }
34 
35 // Upgrade the declarations of the SSE4.1 ptest intrinsics whose arguments have
36 // changed their type from v4f32 to v2i64.
37 static bool UpgradePTESTIntrinsic(Function* F, Intrinsic::ID IID,
38                                   Function *&NewFn) {
39   // Check whether this is an old version of the function, which received
40   // v4f32 arguments.
41   Type *Arg0Type = F->getFunctionType()->getParamType(0);
42   if (Arg0Type != VectorType::get(Type::getFloatTy(F->getContext()), 4))
43     return false;
44 
45   // Yes, it's old, replace it with new version.
46   rename(F);
47   NewFn = Intrinsic::getDeclaration(F->getParent(), IID);
48   return true;
49 }
50 
51 // Upgrade the declarations of intrinsic functions whose 8-bit immediate mask
52 // arguments have changed their type from i32 to i8.
53 static bool UpgradeX86IntrinsicsWith8BitMask(Function *F, Intrinsic::ID IID,
54                                              Function *&NewFn) {
55   // Check that the last argument is an i32.
56   Type *LastArgType = F->getFunctionType()->getParamType(
57      F->getFunctionType()->getNumParams() - 1);
58   if (!LastArgType->isIntegerTy(32))
59     return false;
60 
61   // Move this function aside and map down.
62   rename(F);
63   NewFn = Intrinsic::getDeclaration(F->getParent(), IID);
64   return true;
65 }
66 
67 static bool ShouldUpgradeX86Intrinsic(Function *F, StringRef Name) {
68   // All of the intrinsics matches below should be marked with which llvm
69   // version started autoupgrading them. At some point in the future we would
70   // like to use this information to remove upgrade code for some older
71   // intrinsics. It is currently undecided how we will determine that future
72   // point.
73   if (Name == "addcarryx.u32" || // Added in 8.0
74       Name == "addcarryx.u64" || // Added in 8.0
75       Name == "addcarry.u32" || // Added in 8.0
76       Name == "addcarry.u64" || // Added in 8.0
77       Name == "subborrow.u32" || // Added in 8.0
78       Name == "subborrow.u64" || // Added in 8.0
79       Name.startswith("sse2.padds.") || // Added in 8.0
80       Name.startswith("sse2.psubs.") || // Added in 8.0
81       Name.startswith("sse2.paddus.") || // Added in 8.0
82       Name.startswith("sse2.psubus.") || // Added in 8.0
83       Name.startswith("avx2.padds.") || // Added in 8.0
84       Name.startswith("avx2.psubs.") || // Added in 8.0
85       Name.startswith("avx2.paddus.") || // Added in 8.0
86       Name.startswith("avx2.psubus.") || // Added in 8.0
87       Name.startswith("avx512.padds.") || // Added in 8.0
88       Name.startswith("avx512.psubs.") || // Added in 8.0
89       Name.startswith("avx512.mask.padds.") || // Added in 8.0
90       Name.startswith("avx512.mask.psubs.") || // Added in 8.0
91       Name.startswith("avx512.mask.paddus.") || // Added in 8.0
92       Name.startswith("avx512.mask.psubus.") || // Added in 8.0
93       Name=="ssse3.pabs.b.128" || // Added in 6.0
94       Name=="ssse3.pabs.w.128" || // Added in 6.0
95       Name=="ssse3.pabs.d.128" || // Added in 6.0
96       Name.startswith("fma4.vfmadd.s") || // Added in 7.0
97       Name.startswith("fma.vfmadd.") || // Added in 7.0
98       Name.startswith("fma.vfmsub.") || // Added in 7.0
99       Name.startswith("fma.vfmaddsub.") || // Added in 7.0
100       Name.startswith("fma.vfmsubadd.") || // Added in 7.0
101       Name.startswith("fma.vfnmadd.") || // Added in 7.0
102       Name.startswith("fma.vfnmsub.") || // Added in 7.0
103       Name.startswith("avx512.mask.vfmadd.") || // Added in 7.0
104       Name.startswith("avx512.mask.vfnmadd.") || // Added in 7.0
105       Name.startswith("avx512.mask.vfnmsub.") || // Added in 7.0
106       Name.startswith("avx512.mask3.vfmadd.") || // Added in 7.0
107       Name.startswith("avx512.maskz.vfmadd.") || // Added in 7.0
108       Name.startswith("avx512.mask3.vfmsub.") || // Added in 7.0
109       Name.startswith("avx512.mask3.vfnmsub.") || // Added in 7.0
110       Name.startswith("avx512.mask.vfmaddsub.") || // Added in 7.0
111       Name.startswith("avx512.maskz.vfmaddsub.") || // Added in 7.0
112       Name.startswith("avx512.mask3.vfmaddsub.") || // Added in 7.0
113       Name.startswith("avx512.mask3.vfmsubadd.") || // Added in 7.0
114       Name.startswith("avx512.mask.shuf.i") || // Added in 6.0
115       Name.startswith("avx512.mask.shuf.f") || // Added in 6.0
116       Name.startswith("avx512.kunpck") || //added in 6.0
117       Name.startswith("avx2.pabs.") || // Added in 6.0
118       Name.startswith("avx512.mask.pabs.") || // Added in 6.0
119       Name.startswith("avx512.broadcastm") || // Added in 6.0
120       Name == "sse.sqrt.ss" || // Added in 7.0
121       Name == "sse2.sqrt.sd" || // Added in 7.0
122       Name.startswith("avx512.mask.sqrt.p") || // Added in 7.0
123       Name.startswith("avx.sqrt.p") || // Added in 7.0
124       Name.startswith("sse2.sqrt.p") || // Added in 7.0
125       Name.startswith("sse.sqrt.p") || // Added in 7.0
126       Name.startswith("avx512.mask.pbroadcast") || // Added in 6.0
127       Name.startswith("sse2.pcmpeq.") || // Added in 3.1
128       Name.startswith("sse2.pcmpgt.") || // Added in 3.1
129       Name.startswith("avx2.pcmpeq.") || // Added in 3.1
130       Name.startswith("avx2.pcmpgt.") || // Added in 3.1
131       Name.startswith("avx512.mask.pcmpeq.") || // Added in 3.9
132       Name.startswith("avx512.mask.pcmpgt.") || // Added in 3.9
133       Name.startswith("avx.vperm2f128.") || // Added in 6.0
134       Name == "avx2.vperm2i128" || // Added in 6.0
135       Name == "sse.add.ss" || // Added in 4.0
136       Name == "sse2.add.sd" || // Added in 4.0
137       Name == "sse.sub.ss" || // Added in 4.0
138       Name == "sse2.sub.sd" || // Added in 4.0
139       Name == "sse.mul.ss" || // Added in 4.0
140       Name == "sse2.mul.sd" || // Added in 4.0
141       Name == "sse.div.ss" || // Added in 4.0
142       Name == "sse2.div.sd" || // Added in 4.0
143       Name == "sse41.pmaxsb" || // Added in 3.9
144       Name == "sse2.pmaxs.w" || // Added in 3.9
145       Name == "sse41.pmaxsd" || // Added in 3.9
146       Name == "sse2.pmaxu.b" || // Added in 3.9
147       Name == "sse41.pmaxuw" || // Added in 3.9
148       Name == "sse41.pmaxud" || // Added in 3.9
149       Name == "sse41.pminsb" || // Added in 3.9
150       Name == "sse2.pmins.w" || // Added in 3.9
151       Name == "sse41.pminsd" || // Added in 3.9
152       Name == "sse2.pminu.b" || // Added in 3.9
153       Name == "sse41.pminuw" || // Added in 3.9
154       Name == "sse41.pminud" || // Added in 3.9
155       Name == "avx512.kand.w" || // Added in 7.0
156       Name == "avx512.kandn.w" || // Added in 7.0
157       Name == "avx512.knot.w" || // Added in 7.0
158       Name == "avx512.kor.w" || // Added in 7.0
159       Name == "avx512.kxor.w" || // Added in 7.0
160       Name == "avx512.kxnor.w" || // Added in 7.0
161       Name == "avx512.kortestc.w" || // Added in 7.0
162       Name == "avx512.kortestz.w" || // Added in 7.0
163       Name.startswith("avx512.mask.pshuf.b.") || // Added in 4.0
164       Name.startswith("avx2.pmax") || // Added in 3.9
165       Name.startswith("avx2.pmin") || // Added in 3.9
166       Name.startswith("avx512.mask.pmax") || // Added in 4.0
167       Name.startswith("avx512.mask.pmin") || // Added in 4.0
168       Name.startswith("avx2.vbroadcast") || // Added in 3.8
169       Name.startswith("avx2.pbroadcast") || // Added in 3.8
170       Name.startswith("avx.vpermil.") || // Added in 3.1
171       Name.startswith("sse2.pshuf") || // Added in 3.9
172       Name.startswith("avx512.pbroadcast") || // Added in 3.9
173       Name.startswith("avx512.mask.broadcast.s") || // Added in 3.9
174       Name.startswith("avx512.mask.movddup") || // Added in 3.9
175       Name.startswith("avx512.mask.movshdup") || // Added in 3.9
176       Name.startswith("avx512.mask.movsldup") || // Added in 3.9
177       Name.startswith("avx512.mask.pshuf.d.") || // Added in 3.9
178       Name.startswith("avx512.mask.pshufl.w.") || // Added in 3.9
179       Name.startswith("avx512.mask.pshufh.w.") || // Added in 3.9
180       Name.startswith("avx512.mask.shuf.p") || // Added in 4.0
181       Name.startswith("avx512.mask.vpermil.p") || // Added in 3.9
182       Name.startswith("avx512.mask.perm.df.") || // Added in 3.9
183       Name.startswith("avx512.mask.perm.di.") || // Added in 3.9
184       Name.startswith("avx512.mask.punpckl") || // Added in 3.9
185       Name.startswith("avx512.mask.punpckh") || // Added in 3.9
186       Name.startswith("avx512.mask.unpckl.") || // Added in 3.9
187       Name.startswith("avx512.mask.unpckh.") || // Added in 3.9
188       Name.startswith("avx512.mask.pand.") || // Added in 3.9
189       Name.startswith("avx512.mask.pandn.") || // Added in 3.9
190       Name.startswith("avx512.mask.por.") || // Added in 3.9
191       Name.startswith("avx512.mask.pxor.") || // Added in 3.9
192       Name.startswith("avx512.mask.and.") || // Added in 3.9
193       Name.startswith("avx512.mask.andn.") || // Added in 3.9
194       Name.startswith("avx512.mask.or.") || // Added in 3.9
195       Name.startswith("avx512.mask.xor.") || // Added in 3.9
196       Name.startswith("avx512.mask.padd.") || // Added in 4.0
197       Name.startswith("avx512.mask.psub.") || // Added in 4.0
198       Name.startswith("avx512.mask.pmull.") || // Added in 4.0
199       Name.startswith("avx512.mask.cvtdq2pd.") || // Added in 4.0
200       Name.startswith("avx512.mask.cvtudq2pd.") || // Added in 4.0
201       Name.startswith("avx512.mask.cvtudq2ps.") || // Added in 7.0 updated 9.0
202       Name.startswith("avx512.mask.cvtqq2pd.") || // Added in 7.0 updated 9.0
203       Name.startswith("avx512.mask.cvtuqq2pd.") || // Added in 7.0 updated 9.0
204       Name.startswith("avx512.mask.cvtdq2ps.") || // Added in 7.0 updated 9.0
205       Name == "avx512.mask.cvtqq2ps.256" || // Added in 9.0
206       Name == "avx512.mask.cvtqq2ps.512" || // Added in 9.0
207       Name == "avx512.mask.cvtuqq2ps.256" || // Added in 9.0
208       Name == "avx512.mask.cvtuqq2ps.512" || // Added in 9.0
209       Name == "avx512.mask.cvtpd2dq.256" || // Added in 7.0
210       Name == "avx512.mask.cvtpd2ps.256" || // Added in 7.0
211       Name == "avx512.mask.cvttpd2dq.256" || // Added in 7.0
212       Name == "avx512.mask.cvttps2dq.128" || // Added in 7.0
213       Name == "avx512.mask.cvttps2dq.256" || // Added in 7.0
214       Name == "avx512.mask.cvtps2pd.128" || // Added in 7.0
215       Name == "avx512.mask.cvtps2pd.256" || // Added in 7.0
216       Name == "avx512.cvtusi2sd" || // Added in 7.0
217       Name.startswith("avx512.mask.permvar.") || // Added in 7.0
218       Name.startswith("avx512.mask.permvar.") || // Added in 7.0
219       Name == "sse2.pmulu.dq" || // Added in 7.0
220       Name == "sse41.pmuldq" || // Added in 7.0
221       Name == "avx2.pmulu.dq" || // Added in 7.0
222       Name == "avx2.pmul.dq" || // Added in 7.0
223       Name == "avx512.pmulu.dq.512" || // Added in 7.0
224       Name == "avx512.pmul.dq.512" || // Added in 7.0
225       Name.startswith("avx512.mask.pmul.dq.") || // Added in 4.0
226       Name.startswith("avx512.mask.pmulu.dq.") || // Added in 4.0
227       Name.startswith("avx512.mask.pmul.hr.sw.") || // Added in 7.0
228       Name.startswith("avx512.mask.pmulh.w.") || // Added in 7.0
229       Name.startswith("avx512.mask.pmulhu.w.") || // Added in 7.0
230       Name.startswith("avx512.mask.pmaddw.d.") || // Added in 7.0
231       Name.startswith("avx512.mask.pmaddubs.w.") || // Added in 7.0
232       Name.startswith("avx512.mask.packsswb.") || // Added in 5.0
233       Name.startswith("avx512.mask.packssdw.") || // Added in 5.0
234       Name.startswith("avx512.mask.packuswb.") || // Added in 5.0
235       Name.startswith("avx512.mask.packusdw.") || // Added in 5.0
236       Name.startswith("avx512.mask.cmp.b") || // Added in 5.0
237       Name.startswith("avx512.mask.cmp.d") || // Added in 5.0
238       Name.startswith("avx512.mask.cmp.q") || // Added in 5.0
239       Name.startswith("avx512.mask.cmp.w") || // Added in 5.0
240       Name.startswith("avx512.mask.cmp.p") || // Added in 7.0
241       Name.startswith("avx512.mask.ucmp.") || // Added in 5.0
242       Name.startswith("avx512.cvtb2mask.") || // Added in 7.0
243       Name.startswith("avx512.cvtw2mask.") || // Added in 7.0
244       Name.startswith("avx512.cvtd2mask.") || // Added in 7.0
245       Name.startswith("avx512.cvtq2mask.") || // Added in 7.0
246       Name.startswith("avx512.mask.vpermilvar.") || // Added in 4.0
247       Name.startswith("avx512.mask.psll.d") || // Added in 4.0
248       Name.startswith("avx512.mask.psll.q") || // Added in 4.0
249       Name.startswith("avx512.mask.psll.w") || // Added in 4.0
250       Name.startswith("avx512.mask.psra.d") || // Added in 4.0
251       Name.startswith("avx512.mask.psra.q") || // Added in 4.0
252       Name.startswith("avx512.mask.psra.w") || // Added in 4.0
253       Name.startswith("avx512.mask.psrl.d") || // Added in 4.0
254       Name.startswith("avx512.mask.psrl.q") || // Added in 4.0
255       Name.startswith("avx512.mask.psrl.w") || // Added in 4.0
256       Name.startswith("avx512.mask.pslli") || // Added in 4.0
257       Name.startswith("avx512.mask.psrai") || // Added in 4.0
258       Name.startswith("avx512.mask.psrli") || // Added in 4.0
259       Name.startswith("avx512.mask.psllv") || // Added in 4.0
260       Name.startswith("avx512.mask.psrav") || // Added in 4.0
261       Name.startswith("avx512.mask.psrlv") || // Added in 4.0
262       Name.startswith("sse41.pmovsx") || // Added in 3.8
263       Name.startswith("sse41.pmovzx") || // Added in 3.9
264       Name.startswith("avx2.pmovsx") || // Added in 3.9
265       Name.startswith("avx2.pmovzx") || // Added in 3.9
266       Name.startswith("avx512.mask.pmovsx") || // Added in 4.0
267       Name.startswith("avx512.mask.pmovzx") || // Added in 4.0
268       Name.startswith("avx512.mask.lzcnt.") || // Added in 5.0
269       Name.startswith("avx512.mask.pternlog.") || // Added in 7.0
270       Name.startswith("avx512.maskz.pternlog.") || // Added in 7.0
271       Name.startswith("avx512.mask.vpmadd52") || // Added in 7.0
272       Name.startswith("avx512.maskz.vpmadd52") || // Added in 7.0
273       Name.startswith("avx512.mask.vpermi2var.") || // Added in 7.0
274       Name.startswith("avx512.mask.vpermt2var.") || // Added in 7.0
275       Name.startswith("avx512.maskz.vpermt2var.") || // Added in 7.0
276       Name.startswith("avx512.mask.vpdpbusd.") || // Added in 7.0
277       Name.startswith("avx512.maskz.vpdpbusd.") || // Added in 7.0
278       Name.startswith("avx512.mask.vpdpbusds.") || // Added in 7.0
279       Name.startswith("avx512.maskz.vpdpbusds.") || // Added in 7.0
280       Name.startswith("avx512.mask.vpdpwssd.") || // Added in 7.0
281       Name.startswith("avx512.maskz.vpdpwssd.") || // Added in 7.0
282       Name.startswith("avx512.mask.vpdpwssds.") || // Added in 7.0
283       Name.startswith("avx512.maskz.vpdpwssds.") || // Added in 7.0
284       Name.startswith("avx512.mask.dbpsadbw.") || // Added in 7.0
285       Name.startswith("avx512.mask.vpshld.") || // Added in 7.0
286       Name.startswith("avx512.mask.vpshrd.") || // Added in 7.0
287       Name.startswith("avx512.mask.vpshldv.") || // Added in 8.0
288       Name.startswith("avx512.mask.vpshrdv.") || // Added in 8.0
289       Name.startswith("avx512.maskz.vpshldv.") || // Added in 8.0
290       Name.startswith("avx512.maskz.vpshrdv.") || // Added in 8.0
291       Name.startswith("avx512.vpshld.") || // Added in 8.0
292       Name.startswith("avx512.vpshrd.") || // Added in 8.0
293       Name.startswith("avx512.mask.add.p") || // Added in 7.0. 128/256 in 4.0
294       Name.startswith("avx512.mask.sub.p") || // Added in 7.0. 128/256 in 4.0
295       Name.startswith("avx512.mask.mul.p") || // Added in 7.0. 128/256 in 4.0
296       Name.startswith("avx512.mask.div.p") || // Added in 7.0. 128/256 in 4.0
297       Name.startswith("avx512.mask.max.p") || // Added in 7.0. 128/256 in 5.0
298       Name.startswith("avx512.mask.min.p") || // Added in 7.0. 128/256 in 5.0
299       Name.startswith("avx512.mask.fpclass.p") || // Added in 7.0
300       Name.startswith("avx512.mask.vpshufbitqmb.") || // Added in 8.0
301       Name.startswith("avx512.mask.pmultishift.qb.") || // Added in 8.0
302       Name.startswith("avx512.mask.conflict.") || // Added in 9.0
303       Name == "avx512.mask.pmov.qd.256" || // Added in 9.0
304       Name == "avx512.mask.pmov.qd.512" || // Added in 9.0
305       Name == "avx512.mask.pmov.wb.256" || // Added in 9.0
306       Name == "avx512.mask.pmov.wb.512" || // Added in 9.0
307       Name == "sse.cvtsi2ss" || // Added in 7.0
308       Name == "sse.cvtsi642ss" || // Added in 7.0
309       Name == "sse2.cvtsi2sd" || // Added in 7.0
310       Name == "sse2.cvtsi642sd" || // Added in 7.0
311       Name == "sse2.cvtss2sd" || // Added in 7.0
312       Name == "sse2.cvtdq2pd" || // Added in 3.9
313       Name == "sse2.cvtdq2ps" || // Added in 7.0
314       Name == "sse2.cvtps2pd" || // Added in 3.9
315       Name == "avx.cvtdq2.pd.256" || // Added in 3.9
316       Name == "avx.cvtdq2.ps.256" || // Added in 7.0
317       Name == "avx.cvt.ps2.pd.256" || // Added in 3.9
318       Name.startswith("avx.vinsertf128.") || // Added in 3.7
319       Name == "avx2.vinserti128" || // Added in 3.7
320       Name.startswith("avx512.mask.insert") || // Added in 4.0
321       Name.startswith("avx.vextractf128.") || // Added in 3.7
322       Name == "avx2.vextracti128" || // Added in 3.7
323       Name.startswith("avx512.mask.vextract") || // Added in 4.0
324       Name.startswith("sse4a.movnt.") || // Added in 3.9
325       Name.startswith("avx.movnt.") || // Added in 3.2
326       Name.startswith("avx512.storent.") || // Added in 3.9
327       Name == "sse41.movntdqa" || // Added in 5.0
328       Name == "avx2.movntdqa" || // Added in 5.0
329       Name == "avx512.movntdqa" || // Added in 5.0
330       Name == "sse2.storel.dq" || // Added in 3.9
331       Name.startswith("sse.storeu.") || // Added in 3.9
332       Name.startswith("sse2.storeu.") || // Added in 3.9
333       Name.startswith("avx.storeu.") || // Added in 3.9
334       Name.startswith("avx512.mask.storeu.") || // Added in 3.9
335       Name.startswith("avx512.mask.store.p") || // Added in 3.9
336       Name.startswith("avx512.mask.store.b.") || // Added in 3.9
337       Name.startswith("avx512.mask.store.w.") || // Added in 3.9
338       Name.startswith("avx512.mask.store.d.") || // Added in 3.9
339       Name.startswith("avx512.mask.store.q.") || // Added in 3.9
340       Name == "avx512.mask.store.ss" || // Added in 7.0
341       Name.startswith("avx512.mask.loadu.") || // Added in 3.9
342       Name.startswith("avx512.mask.load.") || // Added in 3.9
343       Name.startswith("avx512.mask.expand.load.") || // Added in 7.0
344       Name.startswith("avx512.mask.compress.store.") || // Added in 7.0
345       Name.startswith("avx512.mask.expand.b") || // Added in 9.0
346       Name.startswith("avx512.mask.expand.w") || // Added in 9.0
347       Name.startswith("avx512.mask.expand.d") || // Added in 9.0
348       Name.startswith("avx512.mask.expand.q") || // Added in 9.0
349       Name.startswith("avx512.mask.expand.p") || // Added in 9.0
350       Name.startswith("avx512.mask.compress.b") || // Added in 9.0
351       Name.startswith("avx512.mask.compress.w") || // Added in 9.0
352       Name.startswith("avx512.mask.compress.d") || // Added in 9.0
353       Name.startswith("avx512.mask.compress.q") || // Added in 9.0
354       Name.startswith("avx512.mask.compress.p") || // Added in 9.0
355       Name == "sse42.crc32.64.8" || // Added in 3.4
356       Name.startswith("avx.vbroadcast.s") || // Added in 3.5
357       Name.startswith("avx512.vbroadcast.s") || // Added in 7.0
358       Name.startswith("avx512.mask.palignr.") || // Added in 3.9
359       Name.startswith("avx512.mask.valign.") || // Added in 4.0
360       Name.startswith("sse2.psll.dq") || // Added in 3.7
361       Name.startswith("sse2.psrl.dq") || // Added in 3.7
362       Name.startswith("avx2.psll.dq") || // Added in 3.7
363       Name.startswith("avx2.psrl.dq") || // Added in 3.7
364       Name.startswith("avx512.psll.dq") || // Added in 3.9
365       Name.startswith("avx512.psrl.dq") || // Added in 3.9
366       Name == "sse41.pblendw" || // Added in 3.7
367       Name.startswith("sse41.blendp") || // Added in 3.7
368       Name.startswith("avx.blend.p") || // Added in 3.7
369       Name == "avx2.pblendw" || // Added in 3.7
370       Name.startswith("avx2.pblendd.") || // Added in 3.7
371       Name.startswith("avx.vbroadcastf128") || // Added in 4.0
372       Name == "avx2.vbroadcasti128" || // Added in 3.7
373       Name.startswith("avx512.mask.broadcastf") || // Added in 6.0
374       Name.startswith("avx512.mask.broadcasti") || // Added in 6.0
375       Name == "xop.vpcmov" || // Added in 3.8
376       Name == "xop.vpcmov.256" || // Added in 5.0
377       Name.startswith("avx512.mask.move.s") || // Added in 4.0
378       Name.startswith("avx512.cvtmask2") || // Added in 5.0
379       Name.startswith("xop.vpcom") || // Added in 3.2, Updated in 9.0
380       Name.startswith("xop.vprot") || // Added in 8.0
381       Name.startswith("avx512.prol") || // Added in 8.0
382       Name.startswith("avx512.pror") || // Added in 8.0
383       Name.startswith("avx512.mask.prorv.") || // Added in 8.0
384       Name.startswith("avx512.mask.pror.") ||  // Added in 8.0
385       Name.startswith("avx512.mask.prolv.") || // Added in 8.0
386       Name.startswith("avx512.mask.prol.") ||  // Added in 8.0
387       Name.startswith("avx512.ptestm") || //Added in 6.0
388       Name.startswith("avx512.ptestnm") || //Added in 6.0
389       Name.startswith("avx512.mask.pavg")) // Added in 6.0
390     return true;
391 
392   return false;
393 }
394 
395 static bool UpgradeX86IntrinsicFunction(Function *F, StringRef Name,
396                                         Function *&NewFn) {
397   // Only handle intrinsics that start with "x86.".
398   if (!Name.startswith("x86."))
399     return false;
400   // Remove "x86." prefix.
401   Name = Name.substr(4);
402 
403   if (ShouldUpgradeX86Intrinsic(F, Name)) {
404     NewFn = nullptr;
405     return true;
406   }
407 
408   if (Name == "rdtscp") { // Added in 8.0
409     // If this intrinsic has 0 operands, it's the new version.
410     if (F->getFunctionType()->getNumParams() == 0)
411       return false;
412 
413     rename(F);
414     NewFn = Intrinsic::getDeclaration(F->getParent(),
415                                       Intrinsic::x86_rdtscp);
416     return true;
417   }
418 
419   // SSE4.1 ptest functions may have an old signature.
420   if (Name.startswith("sse41.ptest")) { // Added in 3.2
421     if (Name.substr(11) == "c")
422       return UpgradePTESTIntrinsic(F, Intrinsic::x86_sse41_ptestc, NewFn);
423     if (Name.substr(11) == "z")
424       return UpgradePTESTIntrinsic(F, Intrinsic::x86_sse41_ptestz, NewFn);
425     if (Name.substr(11) == "nzc")
426       return UpgradePTESTIntrinsic(F, Intrinsic::x86_sse41_ptestnzc, NewFn);
427   }
428   // Several blend and other instructions with masks used the wrong number of
429   // bits.
430   if (Name == "sse41.insertps") // Added in 3.6
431     return UpgradeX86IntrinsicsWith8BitMask(F, Intrinsic::x86_sse41_insertps,
432                                             NewFn);
433   if (Name == "sse41.dppd") // Added in 3.6
434     return UpgradeX86IntrinsicsWith8BitMask(F, Intrinsic::x86_sse41_dppd,
435                                             NewFn);
436   if (Name == "sse41.dpps") // Added in 3.6
437     return UpgradeX86IntrinsicsWith8BitMask(F, Intrinsic::x86_sse41_dpps,
438                                             NewFn);
439   if (Name == "sse41.mpsadbw") // Added in 3.6
440     return UpgradeX86IntrinsicsWith8BitMask(F, Intrinsic::x86_sse41_mpsadbw,
441                                             NewFn);
442   if (Name == "avx.dp.ps.256") // Added in 3.6
443     return UpgradeX86IntrinsicsWith8BitMask(F, Intrinsic::x86_avx_dp_ps_256,
444                                             NewFn);
445   if (Name == "avx2.mpsadbw") // Added in 3.6
446     return UpgradeX86IntrinsicsWith8BitMask(F, Intrinsic::x86_avx2_mpsadbw,
447                                             NewFn);
448 
449   // frcz.ss/sd may need to have an argument dropped. Added in 3.2
450   if (Name.startswith("xop.vfrcz.ss") && F->arg_size() == 2) {
451     rename(F);
452     NewFn = Intrinsic::getDeclaration(F->getParent(),
453                                       Intrinsic::x86_xop_vfrcz_ss);
454     return true;
455   }
456   if (Name.startswith("xop.vfrcz.sd") && F->arg_size() == 2) {
457     rename(F);
458     NewFn = Intrinsic::getDeclaration(F->getParent(),
459                                       Intrinsic::x86_xop_vfrcz_sd);
460     return true;
461   }
462   // Upgrade any XOP PERMIL2 index operand still using a float/double vector.
463   if (Name.startswith("xop.vpermil2")) { // Added in 3.9
464     auto Idx = F->getFunctionType()->getParamType(2);
465     if (Idx->isFPOrFPVectorTy()) {
466       rename(F);
467       unsigned IdxSize = Idx->getPrimitiveSizeInBits();
468       unsigned EltSize = Idx->getScalarSizeInBits();
469       Intrinsic::ID Permil2ID;
470       if (EltSize == 64 && IdxSize == 128)
471         Permil2ID = Intrinsic::x86_xop_vpermil2pd;
472       else if (EltSize == 32 && IdxSize == 128)
473         Permil2ID = Intrinsic::x86_xop_vpermil2ps;
474       else if (EltSize == 64 && IdxSize == 256)
475         Permil2ID = Intrinsic::x86_xop_vpermil2pd_256;
476       else
477         Permil2ID = Intrinsic::x86_xop_vpermil2ps_256;
478       NewFn = Intrinsic::getDeclaration(F->getParent(), Permil2ID);
479       return true;
480     }
481   }
482 
483   if (Name == "seh.recoverfp") {
484     NewFn = Intrinsic::getDeclaration(F->getParent(), Intrinsic::eh_recoverfp);
485     return true;
486   }
487 
488   return false;
489 }
490 
491 static bool UpgradeIntrinsicFunction1(Function *F, Function *&NewFn) {
492   assert(F && "Illegal to upgrade a non-existent Function.");
493 
494   // Upgrade intrinsics "clang.arc.use" which doesn't start with "llvm.".
495   if (F->getName() == "clang.arc.use") {
496     NewFn = nullptr;
497     return true;
498   }
499 
500   // Quickly eliminate it, if it's not a candidate.
501   StringRef Name = F->getName();
502   if (Name.size() <= 8 || !Name.startswith("llvm."))
503     return false;
504   Name = Name.substr(5); // Strip off "llvm."
505 
506   switch (Name[0]) {
507   default: break;
508   case 'a': {
509     if (Name.startswith("arm.rbit") || Name.startswith("aarch64.rbit")) {
510       NewFn = Intrinsic::getDeclaration(F->getParent(), Intrinsic::bitreverse,
511                                         F->arg_begin()->getType());
512       return true;
513     }
514     if (Name.startswith("arm.neon.vclz")) {
515       Type* args[2] = {
516         F->arg_begin()->getType(),
517         Type::getInt1Ty(F->getContext())
518       };
519       // Can't use Intrinsic::getDeclaration here as it adds a ".i1" to
520       // the end of the name. Change name from llvm.arm.neon.vclz.* to
521       //  llvm.ctlz.*
522       FunctionType* fType = FunctionType::get(F->getReturnType(), args, false);
523       NewFn = Function::Create(fType, F->getLinkage(), F->getAddressSpace(),
524                                "llvm.ctlz." + Name.substr(14), F->getParent());
525       return true;
526     }
527     if (Name.startswith("arm.neon.vcnt")) {
528       NewFn = Intrinsic::getDeclaration(F->getParent(), Intrinsic::ctpop,
529                                         F->arg_begin()->getType());
530       return true;
531     }
532     Regex vldRegex("^arm\\.neon\\.vld([1234]|[234]lane)\\.v[a-z0-9]*$");
533     if (vldRegex.match(Name)) {
534       auto fArgs = F->getFunctionType()->params();
535       SmallVector<Type *, 4> Tys(fArgs.begin(), fArgs.end());
536       // Can't use Intrinsic::getDeclaration here as the return types might
537       // then only be structurally equal.
538       FunctionType* fType = FunctionType::get(F->getReturnType(), Tys, false);
539       NewFn = Function::Create(fType, F->getLinkage(), F->getAddressSpace(),
540                                "llvm." + Name + ".p0i8", F->getParent());
541       return true;
542     }
543     Regex vstRegex("^arm\\.neon\\.vst([1234]|[234]lane)\\.v[a-z0-9]*$");
544     if (vstRegex.match(Name)) {
545       static const Intrinsic::ID StoreInts[] = {Intrinsic::arm_neon_vst1,
546                                                 Intrinsic::arm_neon_vst2,
547                                                 Intrinsic::arm_neon_vst3,
548                                                 Intrinsic::arm_neon_vst4};
549 
550       static const Intrinsic::ID StoreLaneInts[] = {
551         Intrinsic::arm_neon_vst2lane, Intrinsic::arm_neon_vst3lane,
552         Intrinsic::arm_neon_vst4lane
553       };
554 
555       auto fArgs = F->getFunctionType()->params();
556       Type *Tys[] = {fArgs[0], fArgs[1]};
557       if (Name.find("lane") == StringRef::npos)
558         NewFn = Intrinsic::getDeclaration(F->getParent(),
559                                           StoreInts[fArgs.size() - 3], Tys);
560       else
561         NewFn = Intrinsic::getDeclaration(F->getParent(),
562                                           StoreLaneInts[fArgs.size() - 5], Tys);
563       return true;
564     }
565     if (Name == "aarch64.thread.pointer" || Name == "arm.thread.pointer") {
566       NewFn = Intrinsic::getDeclaration(F->getParent(), Intrinsic::thread_pointer);
567       return true;
568     }
569     if (Name.startswith("aarch64.neon.addp")) {
570       if (F->arg_size() != 2)
571         break; // Invalid IR.
572       auto fArgs = F->getFunctionType()->params();
573       VectorType *ArgTy = dyn_cast<VectorType>(fArgs[0]);
574       if (ArgTy && ArgTy->getElementType()->isFloatingPointTy()) {
575         NewFn = Intrinsic::getDeclaration(F->getParent(),
576                                           Intrinsic::aarch64_neon_faddp, fArgs);
577         return true;
578       }
579     }
580     break;
581   }
582 
583   case 'c': {
584     if (Name.startswith("ctlz.") && F->arg_size() == 1) {
585       rename(F);
586       NewFn = Intrinsic::getDeclaration(F->getParent(), Intrinsic::ctlz,
587                                         F->arg_begin()->getType());
588       return true;
589     }
590     if (Name.startswith("cttz.") && F->arg_size() == 1) {
591       rename(F);
592       NewFn = Intrinsic::getDeclaration(F->getParent(), Intrinsic::cttz,
593                                         F->arg_begin()->getType());
594       return true;
595     }
596     break;
597   }
598   case 'd': {
599     if (Name == "dbg.value" && F->arg_size() == 4) {
600       rename(F);
601       NewFn = Intrinsic::getDeclaration(F->getParent(), Intrinsic::dbg_value);
602       return true;
603     }
604     break;
605   }
606   case 'i':
607   case 'l': {
608     bool IsLifetimeStart = Name.startswith("lifetime.start");
609     if (IsLifetimeStart || Name.startswith("invariant.start")) {
610       Intrinsic::ID ID = IsLifetimeStart ?
611         Intrinsic::lifetime_start : Intrinsic::invariant_start;
612       auto Args = F->getFunctionType()->params();
613       Type* ObjectPtr[1] = {Args[1]};
614       if (F->getName() != Intrinsic::getName(ID, ObjectPtr)) {
615         rename(F);
616         NewFn = Intrinsic::getDeclaration(F->getParent(), ID, ObjectPtr);
617         return true;
618       }
619     }
620 
621     bool IsLifetimeEnd = Name.startswith("lifetime.end");
622     if (IsLifetimeEnd || Name.startswith("invariant.end")) {
623       Intrinsic::ID ID = IsLifetimeEnd ?
624         Intrinsic::lifetime_end : Intrinsic::invariant_end;
625 
626       auto Args = F->getFunctionType()->params();
627       Type* ObjectPtr[1] = {Args[IsLifetimeEnd ? 1 : 2]};
628       if (F->getName() != Intrinsic::getName(ID, ObjectPtr)) {
629         rename(F);
630         NewFn = Intrinsic::getDeclaration(F->getParent(), ID, ObjectPtr);
631         return true;
632       }
633     }
634     if (Name.startswith("invariant.group.barrier")) {
635       // Rename invariant.group.barrier to launder.invariant.group
636       auto Args = F->getFunctionType()->params();
637       Type* ObjectPtr[1] = {Args[0]};
638       rename(F);
639       NewFn = Intrinsic::getDeclaration(F->getParent(),
640           Intrinsic::launder_invariant_group, ObjectPtr);
641       return true;
642 
643     }
644 
645     break;
646   }
647   case 'm': {
648     if (Name.startswith("masked.load.")) {
649       Type *Tys[] = { F->getReturnType(), F->arg_begin()->getType() };
650       if (F->getName() != Intrinsic::getName(Intrinsic::masked_load, Tys)) {
651         rename(F);
652         NewFn = Intrinsic::getDeclaration(F->getParent(),
653                                           Intrinsic::masked_load,
654                                           Tys);
655         return true;
656       }
657     }
658     if (Name.startswith("masked.store.")) {
659       auto Args = F->getFunctionType()->params();
660       Type *Tys[] = { Args[0], Args[1] };
661       if (F->getName() != Intrinsic::getName(Intrinsic::masked_store, Tys)) {
662         rename(F);
663         NewFn = Intrinsic::getDeclaration(F->getParent(),
664                                           Intrinsic::masked_store,
665                                           Tys);
666         return true;
667       }
668     }
669     // Renaming gather/scatter intrinsics with no address space overloading
670     // to the new overload which includes an address space
671     if (Name.startswith("masked.gather.")) {
672       Type *Tys[] = {F->getReturnType(), F->arg_begin()->getType()};
673       if (F->getName() != Intrinsic::getName(Intrinsic::masked_gather, Tys)) {
674         rename(F);
675         NewFn = Intrinsic::getDeclaration(F->getParent(),
676                                           Intrinsic::masked_gather, Tys);
677         return true;
678       }
679     }
680     if (Name.startswith("masked.scatter.")) {
681       auto Args = F->getFunctionType()->params();
682       Type *Tys[] = {Args[0], Args[1]};
683       if (F->getName() != Intrinsic::getName(Intrinsic::masked_scatter, Tys)) {
684         rename(F);
685         NewFn = Intrinsic::getDeclaration(F->getParent(),
686                                           Intrinsic::masked_scatter, Tys);
687         return true;
688       }
689     }
690     // Updating the memory intrinsics (memcpy/memmove/memset) that have an
691     // alignment parameter to embedding the alignment as an attribute of
692     // the pointer args.
693     if (Name.startswith("memcpy.") && F->arg_size() == 5) {
694       rename(F);
695       // Get the types of dest, src, and len
696       ArrayRef<Type *> ParamTypes = F->getFunctionType()->params().slice(0, 3);
697       NewFn = Intrinsic::getDeclaration(F->getParent(), Intrinsic::memcpy,
698                                         ParamTypes);
699       return true;
700     }
701     if (Name.startswith("memmove.") && F->arg_size() == 5) {
702       rename(F);
703       // Get the types of dest, src, and len
704       ArrayRef<Type *> ParamTypes = F->getFunctionType()->params().slice(0, 3);
705       NewFn = Intrinsic::getDeclaration(F->getParent(), Intrinsic::memmove,
706                                         ParamTypes);
707       return true;
708     }
709     if (Name.startswith("memset.") && F->arg_size() == 5) {
710       rename(F);
711       // Get the types of dest, and len
712       const auto *FT = F->getFunctionType();
713       Type *ParamTypes[2] = {
714           FT->getParamType(0), // Dest
715           FT->getParamType(2)  // len
716       };
717       NewFn = Intrinsic::getDeclaration(F->getParent(), Intrinsic::memset,
718                                         ParamTypes);
719       return true;
720     }
721     break;
722   }
723   case 'n': {
724     if (Name.startswith("nvvm.")) {
725       Name = Name.substr(5);
726 
727       // The following nvvm intrinsics correspond exactly to an LLVM intrinsic.
728       Intrinsic::ID IID = StringSwitch<Intrinsic::ID>(Name)
729                               .Cases("brev32", "brev64", Intrinsic::bitreverse)
730                               .Case("clz.i", Intrinsic::ctlz)
731                               .Case("popc.i", Intrinsic::ctpop)
732                               .Default(Intrinsic::not_intrinsic);
733       if (IID != Intrinsic::not_intrinsic && F->arg_size() == 1) {
734         NewFn = Intrinsic::getDeclaration(F->getParent(), IID,
735                                           {F->getReturnType()});
736         return true;
737       }
738 
739       // The following nvvm intrinsics correspond exactly to an LLVM idiom, but
740       // not to an intrinsic alone.  We expand them in UpgradeIntrinsicCall.
741       //
742       // TODO: We could add lohi.i2d.
743       bool Expand = StringSwitch<bool>(Name)
744                         .Cases("abs.i", "abs.ll", true)
745                         .Cases("clz.ll", "popc.ll", "h2f", true)
746                         .Cases("max.i", "max.ll", "max.ui", "max.ull", true)
747                         .Cases("min.i", "min.ll", "min.ui", "min.ull", true)
748                         .Default(false);
749       if (Expand) {
750         NewFn = nullptr;
751         return true;
752       }
753     }
754     break;
755   }
756   case 'o':
757     // We only need to change the name to match the mangling including the
758     // address space.
759     if (Name.startswith("objectsize.")) {
760       Type *Tys[2] = { F->getReturnType(), F->arg_begin()->getType() };
761       if (F->arg_size() == 2 || F->arg_size() == 3 ||
762           F->getName() != Intrinsic::getName(Intrinsic::objectsize, Tys)) {
763         rename(F);
764         NewFn = Intrinsic::getDeclaration(F->getParent(), Intrinsic::objectsize,
765                                           Tys);
766         return true;
767       }
768     }
769     break;
770 
771   case 's':
772     if (Name == "stackprotectorcheck") {
773       NewFn = nullptr;
774       return true;
775     }
776     break;
777 
778   case 'x':
779     if (UpgradeX86IntrinsicFunction(F, Name, NewFn))
780       return true;
781   }
782   // Remangle our intrinsic since we upgrade the mangling
783   auto Result = llvm::Intrinsic::remangleIntrinsicFunction(F);
784   if (Result != None) {
785     NewFn = Result.getValue();
786     return true;
787   }
788 
789   //  This may not belong here. This function is effectively being overloaded
790   //  to both detect an intrinsic which needs upgrading, and to provide the
791   //  upgraded form of the intrinsic. We should perhaps have two separate
792   //  functions for this.
793   return false;
794 }
795 
796 bool llvm::UpgradeIntrinsicFunction(Function *F, Function *&NewFn) {
797   NewFn = nullptr;
798   bool Upgraded = UpgradeIntrinsicFunction1(F, NewFn);
799   assert(F != NewFn && "Intrinsic function upgraded to the same function");
800 
801   // Upgrade intrinsic attributes.  This does not change the function.
802   if (NewFn)
803     F = NewFn;
804   if (Intrinsic::ID id = F->getIntrinsicID())
805     F->setAttributes(Intrinsic::getAttributes(F->getContext(), id));
806   return Upgraded;
807 }
808 
809 bool llvm::UpgradeGlobalVariable(GlobalVariable *GV) {
810   // Nothing to do yet.
811   return false;
812 }
813 
814 // Handles upgrading SSE2/AVX2/AVX512BW PSLLDQ intrinsics by converting them
815 // to byte shuffles.
816 static Value *UpgradeX86PSLLDQIntrinsics(IRBuilder<> &Builder,
817                                          Value *Op, unsigned Shift) {
818   Type *ResultTy = Op->getType();
819   unsigned NumElts = ResultTy->getVectorNumElements() * 8;
820 
821   // Bitcast from a 64-bit element type to a byte element type.
822   Type *VecTy = VectorType::get(Builder.getInt8Ty(), NumElts);
823   Op = Builder.CreateBitCast(Op, VecTy, "cast");
824 
825   // We'll be shuffling in zeroes.
826   Value *Res = Constant::getNullValue(VecTy);
827 
828   // If shift is less than 16, emit a shuffle to move the bytes. Otherwise,
829   // we'll just return the zero vector.
830   if (Shift < 16) {
831     uint32_t Idxs[64];
832     // 256/512-bit version is split into 2/4 16-byte lanes.
833     for (unsigned l = 0; l != NumElts; l += 16)
834       for (unsigned i = 0; i != 16; ++i) {
835         unsigned Idx = NumElts + i - Shift;
836         if (Idx < NumElts)
837           Idx -= NumElts - 16; // end of lane, switch operand.
838         Idxs[l + i] = Idx + l;
839       }
840 
841     Res = Builder.CreateShuffleVector(Res, Op, makeArrayRef(Idxs, NumElts));
842   }
843 
844   // Bitcast back to a 64-bit element type.
845   return Builder.CreateBitCast(Res, ResultTy, "cast");
846 }
847 
848 // Handles upgrading SSE2/AVX2/AVX512BW PSRLDQ intrinsics by converting them
849 // to byte shuffles.
850 static Value *UpgradeX86PSRLDQIntrinsics(IRBuilder<> &Builder, Value *Op,
851                                          unsigned Shift) {
852   Type *ResultTy = Op->getType();
853   unsigned NumElts = ResultTy->getVectorNumElements() * 8;
854 
855   // Bitcast from a 64-bit element type to a byte element type.
856   Type *VecTy = VectorType::get(Builder.getInt8Ty(), NumElts);
857   Op = Builder.CreateBitCast(Op, VecTy, "cast");
858 
859   // We'll be shuffling in zeroes.
860   Value *Res = Constant::getNullValue(VecTy);
861 
862   // If shift is less than 16, emit a shuffle to move the bytes. Otherwise,
863   // we'll just return the zero vector.
864   if (Shift < 16) {
865     uint32_t Idxs[64];
866     // 256/512-bit version is split into 2/4 16-byte lanes.
867     for (unsigned l = 0; l != NumElts; l += 16)
868       for (unsigned i = 0; i != 16; ++i) {
869         unsigned Idx = i + Shift;
870         if (Idx >= 16)
871           Idx += NumElts - 16; // end of lane, switch operand.
872         Idxs[l + i] = Idx + l;
873       }
874 
875     Res = Builder.CreateShuffleVector(Op, Res, makeArrayRef(Idxs, NumElts));
876   }
877 
878   // Bitcast back to a 64-bit element type.
879   return Builder.CreateBitCast(Res, ResultTy, "cast");
880 }
881 
882 static Value *getX86MaskVec(IRBuilder<> &Builder, Value *Mask,
883                             unsigned NumElts) {
884   llvm::VectorType *MaskTy = llvm::VectorType::get(Builder.getInt1Ty(),
885                              cast<IntegerType>(Mask->getType())->getBitWidth());
886   Mask = Builder.CreateBitCast(Mask, MaskTy);
887 
888   // If we have less than 8 elements, then the starting mask was an i8 and
889   // we need to extract down to the right number of elements.
890   if (NumElts < 8) {
891     uint32_t Indices[4];
892     for (unsigned i = 0; i != NumElts; ++i)
893       Indices[i] = i;
894     Mask = Builder.CreateShuffleVector(Mask, Mask,
895                                        makeArrayRef(Indices, NumElts),
896                                        "extract");
897   }
898 
899   return Mask;
900 }
901 
902 static Value *EmitX86Select(IRBuilder<> &Builder, Value *Mask,
903                             Value *Op0, Value *Op1) {
904   // If the mask is all ones just emit the first operation.
905   if (const auto *C = dyn_cast<Constant>(Mask))
906     if (C->isAllOnesValue())
907       return Op0;
908 
909   Mask = getX86MaskVec(Builder, Mask, Op0->getType()->getVectorNumElements());
910   return Builder.CreateSelect(Mask, Op0, Op1);
911 }
912 
913 static Value *EmitX86ScalarSelect(IRBuilder<> &Builder, Value *Mask,
914                                   Value *Op0, Value *Op1) {
915   // If the mask is all ones just emit the first operation.
916   if (const auto *C = dyn_cast<Constant>(Mask))
917     if (C->isAllOnesValue())
918       return Op0;
919 
920   llvm::VectorType *MaskTy =
921     llvm::VectorType::get(Builder.getInt1Ty(),
922                           Mask->getType()->getIntegerBitWidth());
923   Mask = Builder.CreateBitCast(Mask, MaskTy);
924   Mask = Builder.CreateExtractElement(Mask, (uint64_t)0);
925   return Builder.CreateSelect(Mask, Op0, Op1);
926 }
927 
928 // Handle autoupgrade for masked PALIGNR and VALIGND/Q intrinsics.
929 // PALIGNR handles large immediates by shifting while VALIGN masks the immediate
930 // so we need to handle both cases. VALIGN also doesn't have 128-bit lanes.
931 static Value *UpgradeX86ALIGNIntrinsics(IRBuilder<> &Builder, Value *Op0,
932                                         Value *Op1, Value *Shift,
933                                         Value *Passthru, Value *Mask,
934                                         bool IsVALIGN) {
935   unsigned ShiftVal = cast<llvm::ConstantInt>(Shift)->getZExtValue();
936 
937   unsigned NumElts = Op0->getType()->getVectorNumElements();
938   assert((IsVALIGN || NumElts % 16 == 0) && "Illegal NumElts for PALIGNR!");
939   assert((!IsVALIGN || NumElts <= 16) && "NumElts too large for VALIGN!");
940   assert(isPowerOf2_32(NumElts) && "NumElts not a power of 2!");
941 
942   // Mask the immediate for VALIGN.
943   if (IsVALIGN)
944     ShiftVal &= (NumElts - 1);
945 
946   // If palignr is shifting the pair of vectors more than the size of two
947   // lanes, emit zero.
948   if (ShiftVal >= 32)
949     return llvm::Constant::getNullValue(Op0->getType());
950 
951   // If palignr is shifting the pair of input vectors more than one lane,
952   // but less than two lanes, convert to shifting in zeroes.
953   if (ShiftVal > 16) {
954     ShiftVal -= 16;
955     Op1 = Op0;
956     Op0 = llvm::Constant::getNullValue(Op0->getType());
957   }
958 
959   uint32_t Indices[64];
960   // 256-bit palignr operates on 128-bit lanes so we need to handle that
961   for (unsigned l = 0; l < NumElts; l += 16) {
962     for (unsigned i = 0; i != 16; ++i) {
963       unsigned Idx = ShiftVal + i;
964       if (!IsVALIGN && Idx >= 16) // Disable wrap for VALIGN.
965         Idx += NumElts - 16; // End of lane, switch operand.
966       Indices[l + i] = Idx + l;
967     }
968   }
969 
970   Value *Align = Builder.CreateShuffleVector(Op1, Op0,
971                                              makeArrayRef(Indices, NumElts),
972                                              "palignr");
973 
974   return EmitX86Select(Builder, Mask, Align, Passthru);
975 }
976 
977 static Value *UpgradeX86VPERMT2Intrinsics(IRBuilder<> &Builder, CallInst &CI,
978                                           bool ZeroMask, bool IndexForm) {
979   Type *Ty = CI.getType();
980   unsigned VecWidth = Ty->getPrimitiveSizeInBits();
981   unsigned EltWidth = Ty->getScalarSizeInBits();
982   bool IsFloat = Ty->isFPOrFPVectorTy();
983   Intrinsic::ID IID;
984   if (VecWidth == 128 && EltWidth == 32 && IsFloat)
985     IID = Intrinsic::x86_avx512_vpermi2var_ps_128;
986   else if (VecWidth == 128 && EltWidth == 32 && !IsFloat)
987     IID = Intrinsic::x86_avx512_vpermi2var_d_128;
988   else if (VecWidth == 128 && EltWidth == 64 && IsFloat)
989     IID = Intrinsic::x86_avx512_vpermi2var_pd_128;
990   else if (VecWidth == 128 && EltWidth == 64 && !IsFloat)
991     IID = Intrinsic::x86_avx512_vpermi2var_q_128;
992   else if (VecWidth == 256 && EltWidth == 32 && IsFloat)
993     IID = Intrinsic::x86_avx512_vpermi2var_ps_256;
994   else if (VecWidth == 256 && EltWidth == 32 && !IsFloat)
995     IID = Intrinsic::x86_avx512_vpermi2var_d_256;
996   else if (VecWidth == 256 && EltWidth == 64 && IsFloat)
997     IID = Intrinsic::x86_avx512_vpermi2var_pd_256;
998   else if (VecWidth == 256 && EltWidth == 64 && !IsFloat)
999     IID = Intrinsic::x86_avx512_vpermi2var_q_256;
1000   else if (VecWidth == 512 && EltWidth == 32 && IsFloat)
1001     IID = Intrinsic::x86_avx512_vpermi2var_ps_512;
1002   else if (VecWidth == 512 && EltWidth == 32 && !IsFloat)
1003     IID = Intrinsic::x86_avx512_vpermi2var_d_512;
1004   else if (VecWidth == 512 && EltWidth == 64 && IsFloat)
1005     IID = Intrinsic::x86_avx512_vpermi2var_pd_512;
1006   else if (VecWidth == 512 && EltWidth == 64 && !IsFloat)
1007     IID = Intrinsic::x86_avx512_vpermi2var_q_512;
1008   else if (VecWidth == 128 && EltWidth == 16)
1009     IID = Intrinsic::x86_avx512_vpermi2var_hi_128;
1010   else if (VecWidth == 256 && EltWidth == 16)
1011     IID = Intrinsic::x86_avx512_vpermi2var_hi_256;
1012   else if (VecWidth == 512 && EltWidth == 16)
1013     IID = Intrinsic::x86_avx512_vpermi2var_hi_512;
1014   else if (VecWidth == 128 && EltWidth == 8)
1015     IID = Intrinsic::x86_avx512_vpermi2var_qi_128;
1016   else if (VecWidth == 256 && EltWidth == 8)
1017     IID = Intrinsic::x86_avx512_vpermi2var_qi_256;
1018   else if (VecWidth == 512 && EltWidth == 8)
1019     IID = Intrinsic::x86_avx512_vpermi2var_qi_512;
1020   else
1021     llvm_unreachable("Unexpected intrinsic");
1022 
1023   Value *Args[] = { CI.getArgOperand(0) , CI.getArgOperand(1),
1024                     CI.getArgOperand(2) };
1025 
1026   // If this isn't index form we need to swap operand 0 and 1.
1027   if (!IndexForm)
1028     std::swap(Args[0], Args[1]);
1029 
1030   Value *V = Builder.CreateCall(Intrinsic::getDeclaration(CI.getModule(), IID),
1031                                 Args);
1032   Value *PassThru = ZeroMask ? ConstantAggregateZero::get(Ty)
1033                              : Builder.CreateBitCast(CI.getArgOperand(1),
1034                                                      Ty);
1035   return EmitX86Select(Builder, CI.getArgOperand(3), V, PassThru);
1036 }
1037 
1038 static Value *UpgradeX86AddSubSatIntrinsics(IRBuilder<> &Builder, CallInst &CI,
1039                                             bool IsSigned, bool IsAddition) {
1040   Type *Ty = CI.getType();
1041   Value *Op0 = CI.getOperand(0);
1042   Value *Op1 = CI.getOperand(1);
1043 
1044   Intrinsic::ID IID =
1045       IsSigned ? (IsAddition ? Intrinsic::sadd_sat : Intrinsic::ssub_sat)
1046                : (IsAddition ? Intrinsic::uadd_sat : Intrinsic::usub_sat);
1047   Function *Intrin = Intrinsic::getDeclaration(CI.getModule(), IID, Ty);
1048   Value *Res = Builder.CreateCall(Intrin, {Op0, Op1});
1049 
1050   if (CI.getNumArgOperands() == 4) { // For masked intrinsics.
1051     Value *VecSrc = CI.getOperand(2);
1052     Value *Mask = CI.getOperand(3);
1053     Res = EmitX86Select(Builder, Mask, Res, VecSrc);
1054   }
1055   return Res;
1056 }
1057 
1058 static Value *upgradeX86Rotate(IRBuilder<> &Builder, CallInst &CI,
1059                                bool IsRotateRight) {
1060   Type *Ty = CI.getType();
1061   Value *Src = CI.getArgOperand(0);
1062   Value *Amt = CI.getArgOperand(1);
1063 
1064   // Amount may be scalar immediate, in which case create a splat vector.
1065   // Funnel shifts amounts are treated as modulo and types are all power-of-2 so
1066   // we only care about the lowest log2 bits anyway.
1067   if (Amt->getType() != Ty) {
1068     unsigned NumElts = Ty->getVectorNumElements();
1069     Amt = Builder.CreateIntCast(Amt, Ty->getScalarType(), false);
1070     Amt = Builder.CreateVectorSplat(NumElts, Amt);
1071   }
1072 
1073   Intrinsic::ID IID = IsRotateRight ? Intrinsic::fshr : Intrinsic::fshl;
1074   Function *Intrin = Intrinsic::getDeclaration(CI.getModule(), IID, Ty);
1075   Value *Res = Builder.CreateCall(Intrin, {Src, Src, Amt});
1076 
1077   if (CI.getNumArgOperands() == 4) { // For masked intrinsics.
1078     Value *VecSrc = CI.getOperand(2);
1079     Value *Mask = CI.getOperand(3);
1080     Res = EmitX86Select(Builder, Mask, Res, VecSrc);
1081   }
1082   return Res;
1083 }
1084 
1085 static Value *upgradeX86vpcom(IRBuilder<> &Builder, CallInst &CI, unsigned Imm,
1086                               bool IsSigned) {
1087   Type *Ty = CI.getType();
1088   Value *LHS = CI.getArgOperand(0);
1089   Value *RHS = CI.getArgOperand(1);
1090 
1091   CmpInst::Predicate Pred;
1092   switch (Imm) {
1093   case 0x0:
1094     Pred = IsSigned ? ICmpInst::ICMP_SLT : ICmpInst::ICMP_ULT;
1095     break;
1096   case 0x1:
1097     Pred = IsSigned ? ICmpInst::ICMP_SLE : ICmpInst::ICMP_ULE;
1098     break;
1099   case 0x2:
1100     Pred = IsSigned ? ICmpInst::ICMP_SGT : ICmpInst::ICMP_UGT;
1101     break;
1102   case 0x3:
1103     Pred = IsSigned ? ICmpInst::ICMP_SGE : ICmpInst::ICMP_UGE;
1104     break;
1105   case 0x4:
1106     Pred = ICmpInst::ICMP_EQ;
1107     break;
1108   case 0x5:
1109     Pred = ICmpInst::ICMP_NE;
1110     break;
1111   case 0x6:
1112     return Constant::getNullValue(Ty); // FALSE
1113   case 0x7:
1114     return Constant::getAllOnesValue(Ty); // TRUE
1115   default:
1116     llvm_unreachable("Unknown XOP vpcom/vpcomu predicate");
1117   }
1118 
1119   Value *Cmp = Builder.CreateICmp(Pred, LHS, RHS);
1120   Value *Ext = Builder.CreateSExt(Cmp, Ty);
1121   return Ext;
1122 }
1123 
1124 static Value *upgradeX86ConcatShift(IRBuilder<> &Builder, CallInst &CI,
1125                                     bool IsShiftRight, bool ZeroMask) {
1126   Type *Ty = CI.getType();
1127   Value *Op0 = CI.getArgOperand(0);
1128   Value *Op1 = CI.getArgOperand(1);
1129   Value *Amt = CI.getArgOperand(2);
1130 
1131   if (IsShiftRight)
1132     std::swap(Op0, Op1);
1133 
1134   // Amount may be scalar immediate, in which case create a splat vector.
1135   // Funnel shifts amounts are treated as modulo and types are all power-of-2 so
1136   // we only care about the lowest log2 bits anyway.
1137   if (Amt->getType() != Ty) {
1138     unsigned NumElts = Ty->getVectorNumElements();
1139     Amt = Builder.CreateIntCast(Amt, Ty->getScalarType(), false);
1140     Amt = Builder.CreateVectorSplat(NumElts, Amt);
1141   }
1142 
1143   Intrinsic::ID IID = IsShiftRight ? Intrinsic::fshr : Intrinsic::fshl;
1144   Function *Intrin = Intrinsic::getDeclaration(CI.getModule(), IID, Ty);
1145   Value *Res = Builder.CreateCall(Intrin, {Op0, Op1, Amt});
1146 
1147   unsigned NumArgs = CI.getNumArgOperands();
1148   if (NumArgs >= 4) { // For masked intrinsics.
1149     Value *VecSrc = NumArgs == 5 ? CI.getArgOperand(3) :
1150                     ZeroMask     ? ConstantAggregateZero::get(CI.getType()) :
1151                                    CI.getArgOperand(0);
1152     Value *Mask = CI.getOperand(NumArgs - 1);
1153     Res = EmitX86Select(Builder, Mask, Res, VecSrc);
1154   }
1155   return Res;
1156 }
1157 
1158 static Value *UpgradeMaskedStore(IRBuilder<> &Builder,
1159                                  Value *Ptr, Value *Data, Value *Mask,
1160                                  bool Aligned) {
1161   // Cast the pointer to the right type.
1162   Ptr = Builder.CreateBitCast(Ptr,
1163                               llvm::PointerType::getUnqual(Data->getType()));
1164   unsigned Align =
1165     Aligned ? cast<VectorType>(Data->getType())->getBitWidth() / 8 : 1;
1166 
1167   // If the mask is all ones just emit a regular store.
1168   if (const auto *C = dyn_cast<Constant>(Mask))
1169     if (C->isAllOnesValue())
1170       return Builder.CreateAlignedStore(Data, Ptr, Align);
1171 
1172   // Convert the mask from an integer type to a vector of i1.
1173   unsigned NumElts = Data->getType()->getVectorNumElements();
1174   Mask = getX86MaskVec(Builder, Mask, NumElts);
1175   return Builder.CreateMaskedStore(Data, Ptr, Align, Mask);
1176 }
1177 
1178 static Value *UpgradeMaskedLoad(IRBuilder<> &Builder,
1179                                 Value *Ptr, Value *Passthru, Value *Mask,
1180                                 bool Aligned) {
1181   Type *ValTy = Passthru->getType();
1182   // Cast the pointer to the right type.
1183   Ptr = Builder.CreateBitCast(Ptr, llvm::PointerType::getUnqual(ValTy));
1184   unsigned Align =
1185     Aligned ? cast<VectorType>(Passthru->getType())->getBitWidth() / 8 : 1;
1186 
1187   // If the mask is all ones just emit a regular store.
1188   if (const auto *C = dyn_cast<Constant>(Mask))
1189     if (C->isAllOnesValue())
1190       return Builder.CreateAlignedLoad(ValTy, Ptr, Align);
1191 
1192   // Convert the mask from an integer type to a vector of i1.
1193   unsigned NumElts = Passthru->getType()->getVectorNumElements();
1194   Mask = getX86MaskVec(Builder, Mask, NumElts);
1195   return Builder.CreateMaskedLoad(Ptr, Align, Mask, Passthru);
1196 }
1197 
1198 static Value *upgradeAbs(IRBuilder<> &Builder, CallInst &CI) {
1199   Value *Op0 = CI.getArgOperand(0);
1200   llvm::Type *Ty = Op0->getType();
1201   Value *Zero = llvm::Constant::getNullValue(Ty);
1202   Value *Cmp = Builder.CreateICmp(ICmpInst::ICMP_SGT, Op0, Zero);
1203   Value *Neg = Builder.CreateNeg(Op0);
1204   Value *Res = Builder.CreateSelect(Cmp, Op0, Neg);
1205 
1206   if (CI.getNumArgOperands() == 3)
1207     Res = EmitX86Select(Builder,CI.getArgOperand(2), Res, CI.getArgOperand(1));
1208 
1209   return Res;
1210 }
1211 
1212 static Value *upgradeIntMinMax(IRBuilder<> &Builder, CallInst &CI,
1213                                ICmpInst::Predicate Pred) {
1214   Value *Op0 = CI.getArgOperand(0);
1215   Value *Op1 = CI.getArgOperand(1);
1216   Value *Cmp = Builder.CreateICmp(Pred, Op0, Op1);
1217   Value *Res = Builder.CreateSelect(Cmp, Op0, Op1);
1218 
1219   if (CI.getNumArgOperands() == 4)
1220     Res = EmitX86Select(Builder, CI.getArgOperand(3), Res, CI.getArgOperand(2));
1221 
1222   return Res;
1223 }
1224 
1225 static Value *upgradePMULDQ(IRBuilder<> &Builder, CallInst &CI, bool IsSigned) {
1226   Type *Ty = CI.getType();
1227 
1228   // Arguments have a vXi32 type so cast to vXi64.
1229   Value *LHS = Builder.CreateBitCast(CI.getArgOperand(0), Ty);
1230   Value *RHS = Builder.CreateBitCast(CI.getArgOperand(1), Ty);
1231 
1232   if (IsSigned) {
1233     // Shift left then arithmetic shift right.
1234     Constant *ShiftAmt = ConstantInt::get(Ty, 32);
1235     LHS = Builder.CreateShl(LHS, ShiftAmt);
1236     LHS = Builder.CreateAShr(LHS, ShiftAmt);
1237     RHS = Builder.CreateShl(RHS, ShiftAmt);
1238     RHS = Builder.CreateAShr(RHS, ShiftAmt);
1239   } else {
1240     // Clear the upper bits.
1241     Constant *Mask = ConstantInt::get(Ty, 0xffffffff);
1242     LHS = Builder.CreateAnd(LHS, Mask);
1243     RHS = Builder.CreateAnd(RHS, Mask);
1244   }
1245 
1246   Value *Res = Builder.CreateMul(LHS, RHS);
1247 
1248   if (CI.getNumArgOperands() == 4)
1249     Res = EmitX86Select(Builder, CI.getArgOperand(3), Res, CI.getArgOperand(2));
1250 
1251   return Res;
1252 }
1253 
1254 // Applying mask on vector of i1's and make sure result is at least 8 bits wide.
1255 static Value *ApplyX86MaskOn1BitsVec(IRBuilder<> &Builder, Value *Vec,
1256                                      Value *Mask) {
1257   unsigned NumElts = Vec->getType()->getVectorNumElements();
1258   if (Mask) {
1259     const auto *C = dyn_cast<Constant>(Mask);
1260     if (!C || !C->isAllOnesValue())
1261       Vec = Builder.CreateAnd(Vec, getX86MaskVec(Builder, Mask, NumElts));
1262   }
1263 
1264   if (NumElts < 8) {
1265     uint32_t Indices[8];
1266     for (unsigned i = 0; i != NumElts; ++i)
1267       Indices[i] = i;
1268     for (unsigned i = NumElts; i != 8; ++i)
1269       Indices[i] = NumElts + i % NumElts;
1270     Vec = Builder.CreateShuffleVector(Vec,
1271                                       Constant::getNullValue(Vec->getType()),
1272                                       Indices);
1273   }
1274   return Builder.CreateBitCast(Vec, Builder.getIntNTy(std::max(NumElts, 8U)));
1275 }
1276 
1277 static Value *upgradeMaskedCompare(IRBuilder<> &Builder, CallInst &CI,
1278                                    unsigned CC, bool Signed) {
1279   Value *Op0 = CI.getArgOperand(0);
1280   unsigned NumElts = Op0->getType()->getVectorNumElements();
1281 
1282   Value *Cmp;
1283   if (CC == 3) {
1284     Cmp = Constant::getNullValue(llvm::VectorType::get(Builder.getInt1Ty(), NumElts));
1285   } else if (CC == 7) {
1286     Cmp = Constant::getAllOnesValue(llvm::VectorType::get(Builder.getInt1Ty(), NumElts));
1287   } else {
1288     ICmpInst::Predicate Pred;
1289     switch (CC) {
1290     default: llvm_unreachable("Unknown condition code");
1291     case 0: Pred = ICmpInst::ICMP_EQ;  break;
1292     case 1: Pred = Signed ? ICmpInst::ICMP_SLT : ICmpInst::ICMP_ULT; break;
1293     case 2: Pred = Signed ? ICmpInst::ICMP_SLE : ICmpInst::ICMP_ULE; break;
1294     case 4: Pred = ICmpInst::ICMP_NE;  break;
1295     case 5: Pred = Signed ? ICmpInst::ICMP_SGE : ICmpInst::ICMP_UGE; break;
1296     case 6: Pred = Signed ? ICmpInst::ICMP_SGT : ICmpInst::ICMP_UGT; break;
1297     }
1298     Cmp = Builder.CreateICmp(Pred, Op0, CI.getArgOperand(1));
1299   }
1300 
1301   Value *Mask = CI.getArgOperand(CI.getNumArgOperands() - 1);
1302 
1303   return ApplyX86MaskOn1BitsVec(Builder, Cmp, Mask);
1304 }
1305 
1306 // Replace a masked intrinsic with an older unmasked intrinsic.
1307 static Value *UpgradeX86MaskedShift(IRBuilder<> &Builder, CallInst &CI,
1308                                     Intrinsic::ID IID) {
1309   Function *Intrin = Intrinsic::getDeclaration(CI.getModule(), IID);
1310   Value *Rep = Builder.CreateCall(Intrin,
1311                                  { CI.getArgOperand(0), CI.getArgOperand(1) });
1312   return EmitX86Select(Builder, CI.getArgOperand(3), Rep, CI.getArgOperand(2));
1313 }
1314 
1315 static Value* upgradeMaskedMove(IRBuilder<> &Builder, CallInst &CI) {
1316   Value* A = CI.getArgOperand(0);
1317   Value* B = CI.getArgOperand(1);
1318   Value* Src = CI.getArgOperand(2);
1319   Value* Mask = CI.getArgOperand(3);
1320 
1321   Value* AndNode = Builder.CreateAnd(Mask, APInt(8, 1));
1322   Value* Cmp = Builder.CreateIsNotNull(AndNode);
1323   Value* Extract1 = Builder.CreateExtractElement(B, (uint64_t)0);
1324   Value* Extract2 = Builder.CreateExtractElement(Src, (uint64_t)0);
1325   Value* Select = Builder.CreateSelect(Cmp, Extract1, Extract2);
1326   return Builder.CreateInsertElement(A, Select, (uint64_t)0);
1327 }
1328 
1329 
1330 static Value* UpgradeMaskToInt(IRBuilder<> &Builder, CallInst &CI) {
1331   Value* Op = CI.getArgOperand(0);
1332   Type* ReturnOp = CI.getType();
1333   unsigned NumElts = CI.getType()->getVectorNumElements();
1334   Value *Mask = getX86MaskVec(Builder, Op, NumElts);
1335   return Builder.CreateSExt(Mask, ReturnOp, "vpmovm2");
1336 }
1337 
1338 // Replace intrinsic with unmasked version and a select.
1339 static bool upgradeAVX512MaskToSelect(StringRef Name, IRBuilder<> &Builder,
1340                                       CallInst &CI, Value *&Rep) {
1341   Name = Name.substr(12); // Remove avx512.mask.
1342 
1343   unsigned VecWidth = CI.getType()->getPrimitiveSizeInBits();
1344   unsigned EltWidth = CI.getType()->getScalarSizeInBits();
1345   Intrinsic::ID IID;
1346   if (Name.startswith("max.p")) {
1347     if (VecWidth == 128 && EltWidth == 32)
1348       IID = Intrinsic::x86_sse_max_ps;
1349     else if (VecWidth == 128 && EltWidth == 64)
1350       IID = Intrinsic::x86_sse2_max_pd;
1351     else if (VecWidth == 256 && EltWidth == 32)
1352       IID = Intrinsic::x86_avx_max_ps_256;
1353     else if (VecWidth == 256 && EltWidth == 64)
1354       IID = Intrinsic::x86_avx_max_pd_256;
1355     else
1356       llvm_unreachable("Unexpected intrinsic");
1357   } else if (Name.startswith("min.p")) {
1358     if (VecWidth == 128 && EltWidth == 32)
1359       IID = Intrinsic::x86_sse_min_ps;
1360     else if (VecWidth == 128 && EltWidth == 64)
1361       IID = Intrinsic::x86_sse2_min_pd;
1362     else if (VecWidth == 256 && EltWidth == 32)
1363       IID = Intrinsic::x86_avx_min_ps_256;
1364     else if (VecWidth == 256 && EltWidth == 64)
1365       IID = Intrinsic::x86_avx_min_pd_256;
1366     else
1367       llvm_unreachable("Unexpected intrinsic");
1368   } else if (Name.startswith("pshuf.b.")) {
1369     if (VecWidth == 128)
1370       IID = Intrinsic::x86_ssse3_pshuf_b_128;
1371     else if (VecWidth == 256)
1372       IID = Intrinsic::x86_avx2_pshuf_b;
1373     else if (VecWidth == 512)
1374       IID = Intrinsic::x86_avx512_pshuf_b_512;
1375     else
1376       llvm_unreachable("Unexpected intrinsic");
1377   } else if (Name.startswith("pmul.hr.sw.")) {
1378     if (VecWidth == 128)
1379       IID = Intrinsic::x86_ssse3_pmul_hr_sw_128;
1380     else if (VecWidth == 256)
1381       IID = Intrinsic::x86_avx2_pmul_hr_sw;
1382     else if (VecWidth == 512)
1383       IID = Intrinsic::x86_avx512_pmul_hr_sw_512;
1384     else
1385       llvm_unreachable("Unexpected intrinsic");
1386   } else if (Name.startswith("pmulh.w.")) {
1387     if (VecWidth == 128)
1388       IID = Intrinsic::x86_sse2_pmulh_w;
1389     else if (VecWidth == 256)
1390       IID = Intrinsic::x86_avx2_pmulh_w;
1391     else if (VecWidth == 512)
1392       IID = Intrinsic::x86_avx512_pmulh_w_512;
1393     else
1394       llvm_unreachable("Unexpected intrinsic");
1395   } else if (Name.startswith("pmulhu.w.")) {
1396     if (VecWidth == 128)
1397       IID = Intrinsic::x86_sse2_pmulhu_w;
1398     else if (VecWidth == 256)
1399       IID = Intrinsic::x86_avx2_pmulhu_w;
1400     else if (VecWidth == 512)
1401       IID = Intrinsic::x86_avx512_pmulhu_w_512;
1402     else
1403       llvm_unreachable("Unexpected intrinsic");
1404   } else if (Name.startswith("pmaddw.d.")) {
1405     if (VecWidth == 128)
1406       IID = Intrinsic::x86_sse2_pmadd_wd;
1407     else if (VecWidth == 256)
1408       IID = Intrinsic::x86_avx2_pmadd_wd;
1409     else if (VecWidth == 512)
1410       IID = Intrinsic::x86_avx512_pmaddw_d_512;
1411     else
1412       llvm_unreachable("Unexpected intrinsic");
1413   } else if (Name.startswith("pmaddubs.w.")) {
1414     if (VecWidth == 128)
1415       IID = Intrinsic::x86_ssse3_pmadd_ub_sw_128;
1416     else if (VecWidth == 256)
1417       IID = Intrinsic::x86_avx2_pmadd_ub_sw;
1418     else if (VecWidth == 512)
1419       IID = Intrinsic::x86_avx512_pmaddubs_w_512;
1420     else
1421       llvm_unreachable("Unexpected intrinsic");
1422   } else if (Name.startswith("packsswb.")) {
1423     if (VecWidth == 128)
1424       IID = Intrinsic::x86_sse2_packsswb_128;
1425     else if (VecWidth == 256)
1426       IID = Intrinsic::x86_avx2_packsswb;
1427     else if (VecWidth == 512)
1428       IID = Intrinsic::x86_avx512_packsswb_512;
1429     else
1430       llvm_unreachable("Unexpected intrinsic");
1431   } else if (Name.startswith("packssdw.")) {
1432     if (VecWidth == 128)
1433       IID = Intrinsic::x86_sse2_packssdw_128;
1434     else if (VecWidth == 256)
1435       IID = Intrinsic::x86_avx2_packssdw;
1436     else if (VecWidth == 512)
1437       IID = Intrinsic::x86_avx512_packssdw_512;
1438     else
1439       llvm_unreachable("Unexpected intrinsic");
1440   } else if (Name.startswith("packuswb.")) {
1441     if (VecWidth == 128)
1442       IID = Intrinsic::x86_sse2_packuswb_128;
1443     else if (VecWidth == 256)
1444       IID = Intrinsic::x86_avx2_packuswb;
1445     else if (VecWidth == 512)
1446       IID = Intrinsic::x86_avx512_packuswb_512;
1447     else
1448       llvm_unreachable("Unexpected intrinsic");
1449   } else if (Name.startswith("packusdw.")) {
1450     if (VecWidth == 128)
1451       IID = Intrinsic::x86_sse41_packusdw;
1452     else if (VecWidth == 256)
1453       IID = Intrinsic::x86_avx2_packusdw;
1454     else if (VecWidth == 512)
1455       IID = Intrinsic::x86_avx512_packusdw_512;
1456     else
1457       llvm_unreachable("Unexpected intrinsic");
1458   } else if (Name.startswith("vpermilvar.")) {
1459     if (VecWidth == 128 && EltWidth == 32)
1460       IID = Intrinsic::x86_avx_vpermilvar_ps;
1461     else if (VecWidth == 128 && EltWidth == 64)
1462       IID = Intrinsic::x86_avx_vpermilvar_pd;
1463     else if (VecWidth == 256 && EltWidth == 32)
1464       IID = Intrinsic::x86_avx_vpermilvar_ps_256;
1465     else if (VecWidth == 256 && EltWidth == 64)
1466       IID = Intrinsic::x86_avx_vpermilvar_pd_256;
1467     else if (VecWidth == 512 && EltWidth == 32)
1468       IID = Intrinsic::x86_avx512_vpermilvar_ps_512;
1469     else if (VecWidth == 512 && EltWidth == 64)
1470       IID = Intrinsic::x86_avx512_vpermilvar_pd_512;
1471     else
1472       llvm_unreachable("Unexpected intrinsic");
1473   } else if (Name == "cvtpd2dq.256") {
1474     IID = Intrinsic::x86_avx_cvt_pd2dq_256;
1475   } else if (Name == "cvtpd2ps.256") {
1476     IID = Intrinsic::x86_avx_cvt_pd2_ps_256;
1477   } else if (Name == "cvttpd2dq.256") {
1478     IID = Intrinsic::x86_avx_cvtt_pd2dq_256;
1479   } else if (Name == "cvttps2dq.128") {
1480     IID = Intrinsic::x86_sse2_cvttps2dq;
1481   } else if (Name == "cvttps2dq.256") {
1482     IID = Intrinsic::x86_avx_cvtt_ps2dq_256;
1483   } else if (Name.startswith("permvar.")) {
1484     bool IsFloat = CI.getType()->isFPOrFPVectorTy();
1485     if (VecWidth == 256 && EltWidth == 32 && IsFloat)
1486       IID = Intrinsic::x86_avx2_permps;
1487     else if (VecWidth == 256 && EltWidth == 32 && !IsFloat)
1488       IID = Intrinsic::x86_avx2_permd;
1489     else if (VecWidth == 256 && EltWidth == 64 && IsFloat)
1490       IID = Intrinsic::x86_avx512_permvar_df_256;
1491     else if (VecWidth == 256 && EltWidth == 64 && !IsFloat)
1492       IID = Intrinsic::x86_avx512_permvar_di_256;
1493     else if (VecWidth == 512 && EltWidth == 32 && IsFloat)
1494       IID = Intrinsic::x86_avx512_permvar_sf_512;
1495     else if (VecWidth == 512 && EltWidth == 32 && !IsFloat)
1496       IID = Intrinsic::x86_avx512_permvar_si_512;
1497     else if (VecWidth == 512 && EltWidth == 64 && IsFloat)
1498       IID = Intrinsic::x86_avx512_permvar_df_512;
1499     else if (VecWidth == 512 && EltWidth == 64 && !IsFloat)
1500       IID = Intrinsic::x86_avx512_permvar_di_512;
1501     else if (VecWidth == 128 && EltWidth == 16)
1502       IID = Intrinsic::x86_avx512_permvar_hi_128;
1503     else if (VecWidth == 256 && EltWidth == 16)
1504       IID = Intrinsic::x86_avx512_permvar_hi_256;
1505     else if (VecWidth == 512 && EltWidth == 16)
1506       IID = Intrinsic::x86_avx512_permvar_hi_512;
1507     else if (VecWidth == 128 && EltWidth == 8)
1508       IID = Intrinsic::x86_avx512_permvar_qi_128;
1509     else if (VecWidth == 256 && EltWidth == 8)
1510       IID = Intrinsic::x86_avx512_permvar_qi_256;
1511     else if (VecWidth == 512 && EltWidth == 8)
1512       IID = Intrinsic::x86_avx512_permvar_qi_512;
1513     else
1514       llvm_unreachable("Unexpected intrinsic");
1515   } else if (Name.startswith("dbpsadbw.")) {
1516     if (VecWidth == 128)
1517       IID = Intrinsic::x86_avx512_dbpsadbw_128;
1518     else if (VecWidth == 256)
1519       IID = Intrinsic::x86_avx512_dbpsadbw_256;
1520     else if (VecWidth == 512)
1521       IID = Intrinsic::x86_avx512_dbpsadbw_512;
1522     else
1523       llvm_unreachable("Unexpected intrinsic");
1524   } else if (Name.startswith("pmultishift.qb.")) {
1525     if (VecWidth == 128)
1526       IID = Intrinsic::x86_avx512_pmultishift_qb_128;
1527     else if (VecWidth == 256)
1528       IID = Intrinsic::x86_avx512_pmultishift_qb_256;
1529     else if (VecWidth == 512)
1530       IID = Intrinsic::x86_avx512_pmultishift_qb_512;
1531     else
1532       llvm_unreachable("Unexpected intrinsic");
1533   } else if (Name.startswith("conflict.")) {
1534     if (Name[9] == 'd' && VecWidth == 128)
1535       IID = Intrinsic::x86_avx512_conflict_d_128;
1536     else if (Name[9] == 'd' && VecWidth == 256)
1537       IID = Intrinsic::x86_avx512_conflict_d_256;
1538     else if (Name[9] == 'd' && VecWidth == 512)
1539       IID = Intrinsic::x86_avx512_conflict_d_512;
1540     else if (Name[9] == 'q' && VecWidth == 128)
1541       IID = Intrinsic::x86_avx512_conflict_q_128;
1542     else if (Name[9] == 'q' && VecWidth == 256)
1543       IID = Intrinsic::x86_avx512_conflict_q_256;
1544     else if (Name[9] == 'q' && VecWidth == 512)
1545       IID = Intrinsic::x86_avx512_conflict_q_512;
1546     else
1547       llvm_unreachable("Unexpected intrinsic");
1548   } else if (Name.startswith("pavg.")) {
1549     if (Name[5] == 'b' && VecWidth == 128)
1550       IID = Intrinsic::x86_sse2_pavg_b;
1551     else if (Name[5] == 'b' && VecWidth == 256)
1552       IID = Intrinsic::x86_avx2_pavg_b;
1553     else if (Name[5] == 'b' && VecWidth == 512)
1554       IID = Intrinsic::x86_avx512_pavg_b_512;
1555     else if (Name[5] == 'w' && VecWidth == 128)
1556       IID = Intrinsic::x86_sse2_pavg_w;
1557     else if (Name[5] == 'w' && VecWidth == 256)
1558       IID = Intrinsic::x86_avx2_pavg_w;
1559     else if (Name[5] == 'w' && VecWidth == 512)
1560       IID = Intrinsic::x86_avx512_pavg_w_512;
1561     else
1562       llvm_unreachable("Unexpected intrinsic");
1563   } else
1564     return false;
1565 
1566   SmallVector<Value *, 4> Args(CI.arg_operands().begin(),
1567                                CI.arg_operands().end());
1568   Args.pop_back();
1569   Args.pop_back();
1570   Rep = Builder.CreateCall(Intrinsic::getDeclaration(CI.getModule(), IID),
1571                            Args);
1572   unsigned NumArgs = CI.getNumArgOperands();
1573   Rep = EmitX86Select(Builder, CI.getArgOperand(NumArgs - 1), Rep,
1574                       CI.getArgOperand(NumArgs - 2));
1575   return true;
1576 }
1577 
1578 /// Upgrade comment in call to inline asm that represents an objc retain release
1579 /// marker.
1580 void llvm::UpgradeInlineAsmString(std::string *AsmStr) {
1581   size_t Pos;
1582   if (AsmStr->find("mov\tfp") == 0 &&
1583       AsmStr->find("objc_retainAutoreleaseReturnValue") != std::string::npos &&
1584       (Pos = AsmStr->find("# marker")) != std::string::npos) {
1585     AsmStr->replace(Pos, 1, ";");
1586   }
1587   return;
1588 }
1589 
1590 /// Upgrade a call to an old intrinsic. All argument and return casting must be
1591 /// provided to seamlessly integrate with existing context.
1592 void llvm::UpgradeIntrinsicCall(CallInst *CI, Function *NewFn) {
1593   Function *F = CI->getCalledFunction();
1594   LLVMContext &C = CI->getContext();
1595   IRBuilder<> Builder(C);
1596   Builder.SetInsertPoint(CI->getParent(), CI->getIterator());
1597 
1598   assert(F && "Intrinsic call is not direct?");
1599 
1600   if (!NewFn) {
1601     // Get the Function's name.
1602     StringRef Name = F->getName();
1603 
1604     // clang.arc.use is an old name for llvm.arc.clang.arc.use. It is dropped
1605     // from upgrader because the optimizer now only recognizes intrinsics for
1606     // ARC runtime calls.
1607     if (Name == "clang.arc.use") {
1608       CI->eraseFromParent();
1609       return;
1610     }
1611 
1612     assert(Name.startswith("llvm.") && "Intrinsic doesn't start with 'llvm.'");
1613     Name = Name.substr(5);
1614 
1615     bool IsX86 = Name.startswith("x86.");
1616     if (IsX86)
1617       Name = Name.substr(4);
1618     bool IsNVVM = Name.startswith("nvvm.");
1619     if (IsNVVM)
1620       Name = Name.substr(5);
1621 
1622     if (IsX86 && Name.startswith("sse4a.movnt.")) {
1623       Module *M = F->getParent();
1624       SmallVector<Metadata *, 1> Elts;
1625       Elts.push_back(
1626           ConstantAsMetadata::get(ConstantInt::get(Type::getInt32Ty(C), 1)));
1627       MDNode *Node = MDNode::get(C, Elts);
1628 
1629       Value *Arg0 = CI->getArgOperand(0);
1630       Value *Arg1 = CI->getArgOperand(1);
1631 
1632       // Nontemporal (unaligned) store of the 0'th element of the float/double
1633       // vector.
1634       Type *SrcEltTy = cast<VectorType>(Arg1->getType())->getElementType();
1635       PointerType *EltPtrTy = PointerType::getUnqual(SrcEltTy);
1636       Value *Addr = Builder.CreateBitCast(Arg0, EltPtrTy, "cast");
1637       Value *Extract =
1638           Builder.CreateExtractElement(Arg1, (uint64_t)0, "extractelement");
1639 
1640       StoreInst *SI = Builder.CreateAlignedStore(Extract, Addr, 1);
1641       SI->setMetadata(M->getMDKindID("nontemporal"), Node);
1642 
1643       // Remove intrinsic.
1644       CI->eraseFromParent();
1645       return;
1646     }
1647 
1648     if (IsX86 && (Name.startswith("avx.movnt.") ||
1649                   Name.startswith("avx512.storent."))) {
1650       Module *M = F->getParent();
1651       SmallVector<Metadata *, 1> Elts;
1652       Elts.push_back(
1653           ConstantAsMetadata::get(ConstantInt::get(Type::getInt32Ty(C), 1)));
1654       MDNode *Node = MDNode::get(C, Elts);
1655 
1656       Value *Arg0 = CI->getArgOperand(0);
1657       Value *Arg1 = CI->getArgOperand(1);
1658 
1659       // Convert the type of the pointer to a pointer to the stored type.
1660       Value *BC = Builder.CreateBitCast(Arg0,
1661                                         PointerType::getUnqual(Arg1->getType()),
1662                                         "cast");
1663       VectorType *VTy = cast<VectorType>(Arg1->getType());
1664       StoreInst *SI = Builder.CreateAlignedStore(Arg1, BC,
1665                                                  VTy->getBitWidth() / 8);
1666       SI->setMetadata(M->getMDKindID("nontemporal"), Node);
1667 
1668       // Remove intrinsic.
1669       CI->eraseFromParent();
1670       return;
1671     }
1672 
1673     if (IsX86 && Name == "sse2.storel.dq") {
1674       Value *Arg0 = CI->getArgOperand(0);
1675       Value *Arg1 = CI->getArgOperand(1);
1676 
1677       Type *NewVecTy = VectorType::get(Type::getInt64Ty(C), 2);
1678       Value *BC0 = Builder.CreateBitCast(Arg1, NewVecTy, "cast");
1679       Value *Elt = Builder.CreateExtractElement(BC0, (uint64_t)0);
1680       Value *BC = Builder.CreateBitCast(Arg0,
1681                                         PointerType::getUnqual(Elt->getType()),
1682                                         "cast");
1683       Builder.CreateAlignedStore(Elt, BC, 1);
1684 
1685       // Remove intrinsic.
1686       CI->eraseFromParent();
1687       return;
1688     }
1689 
1690     if (IsX86 && (Name.startswith("sse.storeu.") ||
1691                   Name.startswith("sse2.storeu.") ||
1692                   Name.startswith("avx.storeu."))) {
1693       Value *Arg0 = CI->getArgOperand(0);
1694       Value *Arg1 = CI->getArgOperand(1);
1695 
1696       Arg0 = Builder.CreateBitCast(Arg0,
1697                                    PointerType::getUnqual(Arg1->getType()),
1698                                    "cast");
1699       Builder.CreateAlignedStore(Arg1, Arg0, 1);
1700 
1701       // Remove intrinsic.
1702       CI->eraseFromParent();
1703       return;
1704     }
1705 
1706     if (IsX86 && Name == "avx512.mask.store.ss") {
1707       Value *Mask = Builder.CreateAnd(CI->getArgOperand(2), Builder.getInt8(1));
1708       UpgradeMaskedStore(Builder, CI->getArgOperand(0), CI->getArgOperand(1),
1709                          Mask, false);
1710 
1711       // Remove intrinsic.
1712       CI->eraseFromParent();
1713       return;
1714     }
1715 
1716     if (IsX86 && (Name.startswith("avx512.mask.store"))) {
1717       // "avx512.mask.storeu." or "avx512.mask.store."
1718       bool Aligned = Name[17] != 'u'; // "avx512.mask.storeu".
1719       UpgradeMaskedStore(Builder, CI->getArgOperand(0), CI->getArgOperand(1),
1720                          CI->getArgOperand(2), Aligned);
1721 
1722       // Remove intrinsic.
1723       CI->eraseFromParent();
1724       return;
1725     }
1726 
1727     Value *Rep;
1728     // Upgrade packed integer vector compare intrinsics to compare instructions.
1729     if (IsX86 && (Name.startswith("sse2.pcmp") ||
1730                   Name.startswith("avx2.pcmp"))) {
1731       // "sse2.pcpmpeq." "sse2.pcmpgt." "avx2.pcmpeq." or "avx2.pcmpgt."
1732       bool CmpEq = Name[9] == 'e';
1733       Rep = Builder.CreateICmp(CmpEq ? ICmpInst::ICMP_EQ : ICmpInst::ICMP_SGT,
1734                                CI->getArgOperand(0), CI->getArgOperand(1));
1735       Rep = Builder.CreateSExt(Rep, CI->getType(), "");
1736     } else if (IsX86 && (Name.startswith("avx512.broadcastm"))) {
1737       Type *ExtTy = Type::getInt32Ty(C);
1738       if (CI->getOperand(0)->getType()->isIntegerTy(8))
1739         ExtTy = Type::getInt64Ty(C);
1740       unsigned NumElts = CI->getType()->getPrimitiveSizeInBits() /
1741                          ExtTy->getPrimitiveSizeInBits();
1742       Rep = Builder.CreateZExt(CI->getArgOperand(0), ExtTy);
1743       Rep = Builder.CreateVectorSplat(NumElts, Rep);
1744     } else if (IsX86 && (Name == "sse.sqrt.ss" ||
1745                          Name == "sse2.sqrt.sd")) {
1746       Value *Vec = CI->getArgOperand(0);
1747       Value *Elt0 = Builder.CreateExtractElement(Vec, (uint64_t)0);
1748       Function *Intr = Intrinsic::getDeclaration(F->getParent(),
1749                                                  Intrinsic::sqrt, Elt0->getType());
1750       Elt0 = Builder.CreateCall(Intr, Elt0);
1751       Rep = Builder.CreateInsertElement(Vec, Elt0, (uint64_t)0);
1752     } else if (IsX86 && (Name.startswith("avx.sqrt.p") ||
1753                          Name.startswith("sse2.sqrt.p") ||
1754                          Name.startswith("sse.sqrt.p"))) {
1755       Rep = Builder.CreateCall(Intrinsic::getDeclaration(F->getParent(),
1756                                                          Intrinsic::sqrt,
1757                                                          CI->getType()),
1758                                {CI->getArgOperand(0)});
1759     } else if (IsX86 && (Name.startswith("avx512.mask.sqrt.p"))) {
1760       if (CI->getNumArgOperands() == 4 &&
1761           (!isa<ConstantInt>(CI->getArgOperand(3)) ||
1762            cast<ConstantInt>(CI->getArgOperand(3))->getZExtValue() != 4)) {
1763         Intrinsic::ID IID = Name[18] == 's' ? Intrinsic::x86_avx512_sqrt_ps_512
1764                                             : Intrinsic::x86_avx512_sqrt_pd_512;
1765 
1766         Value *Args[] = { CI->getArgOperand(0), CI->getArgOperand(3) };
1767         Rep = Builder.CreateCall(Intrinsic::getDeclaration(CI->getModule(),
1768                                                            IID), Args);
1769       } else {
1770         Rep = Builder.CreateCall(Intrinsic::getDeclaration(F->getParent(),
1771                                                            Intrinsic::sqrt,
1772                                                            CI->getType()),
1773                                  {CI->getArgOperand(0)});
1774       }
1775       Rep = EmitX86Select(Builder, CI->getArgOperand(2), Rep,
1776                           CI->getArgOperand(1));
1777     } else if (IsX86 && (Name.startswith("avx512.ptestm") ||
1778                          Name.startswith("avx512.ptestnm"))) {
1779       Value *Op0 = CI->getArgOperand(0);
1780       Value *Op1 = CI->getArgOperand(1);
1781       Value *Mask = CI->getArgOperand(2);
1782       Rep = Builder.CreateAnd(Op0, Op1);
1783       llvm::Type *Ty = Op0->getType();
1784       Value *Zero = llvm::Constant::getNullValue(Ty);
1785       ICmpInst::Predicate Pred =
1786         Name.startswith("avx512.ptestm") ? ICmpInst::ICMP_NE : ICmpInst::ICMP_EQ;
1787       Rep = Builder.CreateICmp(Pred, Rep, Zero);
1788       Rep = ApplyX86MaskOn1BitsVec(Builder, Rep, Mask);
1789     } else if (IsX86 && (Name.startswith("avx512.mask.pbroadcast"))){
1790       unsigned NumElts =
1791           CI->getArgOperand(1)->getType()->getVectorNumElements();
1792       Rep = Builder.CreateVectorSplat(NumElts, CI->getArgOperand(0));
1793       Rep = EmitX86Select(Builder, CI->getArgOperand(2), Rep,
1794                           CI->getArgOperand(1));
1795     } else if (IsX86 && (Name.startswith("avx512.kunpck"))) {
1796       unsigned NumElts = CI->getType()->getScalarSizeInBits();
1797       Value *LHS = getX86MaskVec(Builder, CI->getArgOperand(0), NumElts);
1798       Value *RHS = getX86MaskVec(Builder, CI->getArgOperand(1), NumElts);
1799       uint32_t Indices[64];
1800       for (unsigned i = 0; i != NumElts; ++i)
1801         Indices[i] = i;
1802 
1803       // First extract half of each vector. This gives better codegen than
1804       // doing it in a single shuffle.
1805       LHS = Builder.CreateShuffleVector(LHS, LHS,
1806                                         makeArrayRef(Indices, NumElts / 2));
1807       RHS = Builder.CreateShuffleVector(RHS, RHS,
1808                                         makeArrayRef(Indices, NumElts / 2));
1809       // Concat the vectors.
1810       // NOTE: Operands have to be swapped to match intrinsic definition.
1811       Rep = Builder.CreateShuffleVector(RHS, LHS,
1812                                         makeArrayRef(Indices, NumElts));
1813       Rep = Builder.CreateBitCast(Rep, CI->getType());
1814     } else if (IsX86 && Name == "avx512.kand.w") {
1815       Value *LHS = getX86MaskVec(Builder, CI->getArgOperand(0), 16);
1816       Value *RHS = getX86MaskVec(Builder, CI->getArgOperand(1), 16);
1817       Rep = Builder.CreateAnd(LHS, RHS);
1818       Rep = Builder.CreateBitCast(Rep, CI->getType());
1819     } else if (IsX86 && Name == "avx512.kandn.w") {
1820       Value *LHS = getX86MaskVec(Builder, CI->getArgOperand(0), 16);
1821       Value *RHS = getX86MaskVec(Builder, CI->getArgOperand(1), 16);
1822       LHS = Builder.CreateNot(LHS);
1823       Rep = Builder.CreateAnd(LHS, RHS);
1824       Rep = Builder.CreateBitCast(Rep, CI->getType());
1825     } else if (IsX86 && Name == "avx512.kor.w") {
1826       Value *LHS = getX86MaskVec(Builder, CI->getArgOperand(0), 16);
1827       Value *RHS = getX86MaskVec(Builder, CI->getArgOperand(1), 16);
1828       Rep = Builder.CreateOr(LHS, RHS);
1829       Rep = Builder.CreateBitCast(Rep, CI->getType());
1830     } else if (IsX86 && Name == "avx512.kxor.w") {
1831       Value *LHS = getX86MaskVec(Builder, CI->getArgOperand(0), 16);
1832       Value *RHS = getX86MaskVec(Builder, CI->getArgOperand(1), 16);
1833       Rep = Builder.CreateXor(LHS, RHS);
1834       Rep = Builder.CreateBitCast(Rep, CI->getType());
1835     } else if (IsX86 && Name == "avx512.kxnor.w") {
1836       Value *LHS = getX86MaskVec(Builder, CI->getArgOperand(0), 16);
1837       Value *RHS = getX86MaskVec(Builder, CI->getArgOperand(1), 16);
1838       LHS = Builder.CreateNot(LHS);
1839       Rep = Builder.CreateXor(LHS, RHS);
1840       Rep = Builder.CreateBitCast(Rep, CI->getType());
1841     } else if (IsX86 && Name == "avx512.knot.w") {
1842       Rep = getX86MaskVec(Builder, CI->getArgOperand(0), 16);
1843       Rep = Builder.CreateNot(Rep);
1844       Rep = Builder.CreateBitCast(Rep, CI->getType());
1845     } else if (IsX86 &&
1846                (Name == "avx512.kortestz.w" || Name == "avx512.kortestc.w")) {
1847       Value *LHS = getX86MaskVec(Builder, CI->getArgOperand(0), 16);
1848       Value *RHS = getX86MaskVec(Builder, CI->getArgOperand(1), 16);
1849       Rep = Builder.CreateOr(LHS, RHS);
1850       Rep = Builder.CreateBitCast(Rep, Builder.getInt16Ty());
1851       Value *C;
1852       if (Name[14] == 'c')
1853         C = ConstantInt::getAllOnesValue(Builder.getInt16Ty());
1854       else
1855         C = ConstantInt::getNullValue(Builder.getInt16Ty());
1856       Rep = Builder.CreateICmpEQ(Rep, C);
1857       Rep = Builder.CreateZExt(Rep, Builder.getInt32Ty());
1858     } else if (IsX86 && (Name == "sse.add.ss" || Name == "sse2.add.sd" ||
1859                          Name == "sse.sub.ss" || Name == "sse2.sub.sd" ||
1860                          Name == "sse.mul.ss" || Name == "sse2.mul.sd" ||
1861                          Name == "sse.div.ss" || Name == "sse2.div.sd")) {
1862       Type *I32Ty = Type::getInt32Ty(C);
1863       Value *Elt0 = Builder.CreateExtractElement(CI->getArgOperand(0),
1864                                                  ConstantInt::get(I32Ty, 0));
1865       Value *Elt1 = Builder.CreateExtractElement(CI->getArgOperand(1),
1866                                                  ConstantInt::get(I32Ty, 0));
1867       Value *EltOp;
1868       if (Name.contains(".add."))
1869         EltOp = Builder.CreateFAdd(Elt0, Elt1);
1870       else if (Name.contains(".sub."))
1871         EltOp = Builder.CreateFSub(Elt0, Elt1);
1872       else if (Name.contains(".mul."))
1873         EltOp = Builder.CreateFMul(Elt0, Elt1);
1874       else
1875         EltOp = Builder.CreateFDiv(Elt0, Elt1);
1876       Rep = Builder.CreateInsertElement(CI->getArgOperand(0), EltOp,
1877                                         ConstantInt::get(I32Ty, 0));
1878     } else if (IsX86 && Name.startswith("avx512.mask.pcmp")) {
1879       // "avx512.mask.pcmpeq." or "avx512.mask.pcmpgt."
1880       bool CmpEq = Name[16] == 'e';
1881       Rep = upgradeMaskedCompare(Builder, *CI, CmpEq ? 0 : 6, true);
1882     } else if (IsX86 && Name.startswith("avx512.mask.vpshufbitqmb.")) {
1883       Type *OpTy = CI->getArgOperand(0)->getType();
1884       unsigned VecWidth = OpTy->getPrimitiveSizeInBits();
1885       Intrinsic::ID IID;
1886       switch (VecWidth) {
1887       default: llvm_unreachable("Unexpected intrinsic");
1888       case 128: IID = Intrinsic::x86_avx512_vpshufbitqmb_128; break;
1889       case 256: IID = Intrinsic::x86_avx512_vpshufbitqmb_256; break;
1890       case 512: IID = Intrinsic::x86_avx512_vpshufbitqmb_512; break;
1891       }
1892 
1893       Rep = Builder.CreateCall(Intrinsic::getDeclaration(F->getParent(), IID),
1894                                { CI->getOperand(0), CI->getArgOperand(1) });
1895       Rep = ApplyX86MaskOn1BitsVec(Builder, Rep, CI->getArgOperand(2));
1896     } else if (IsX86 && Name.startswith("avx512.mask.fpclass.p")) {
1897       Type *OpTy = CI->getArgOperand(0)->getType();
1898       unsigned VecWidth = OpTy->getPrimitiveSizeInBits();
1899       unsigned EltWidth = OpTy->getScalarSizeInBits();
1900       Intrinsic::ID IID;
1901       if (VecWidth == 128 && EltWidth == 32)
1902         IID = Intrinsic::x86_avx512_fpclass_ps_128;
1903       else if (VecWidth == 256 && EltWidth == 32)
1904         IID = Intrinsic::x86_avx512_fpclass_ps_256;
1905       else if (VecWidth == 512 && EltWidth == 32)
1906         IID = Intrinsic::x86_avx512_fpclass_ps_512;
1907       else if (VecWidth == 128 && EltWidth == 64)
1908         IID = Intrinsic::x86_avx512_fpclass_pd_128;
1909       else if (VecWidth == 256 && EltWidth == 64)
1910         IID = Intrinsic::x86_avx512_fpclass_pd_256;
1911       else if (VecWidth == 512 && EltWidth == 64)
1912         IID = Intrinsic::x86_avx512_fpclass_pd_512;
1913       else
1914         llvm_unreachable("Unexpected intrinsic");
1915 
1916       Rep = Builder.CreateCall(Intrinsic::getDeclaration(F->getParent(), IID),
1917                                { CI->getOperand(0), CI->getArgOperand(1) });
1918       Rep = ApplyX86MaskOn1BitsVec(Builder, Rep, CI->getArgOperand(2));
1919     } else if (IsX86 && Name.startswith("avx512.mask.cmp.p")) {
1920       Type *OpTy = CI->getArgOperand(0)->getType();
1921       unsigned VecWidth = OpTy->getPrimitiveSizeInBits();
1922       unsigned EltWidth = OpTy->getScalarSizeInBits();
1923       Intrinsic::ID IID;
1924       if (VecWidth == 128 && EltWidth == 32)
1925         IID = Intrinsic::x86_avx512_cmp_ps_128;
1926       else if (VecWidth == 256 && EltWidth == 32)
1927         IID = Intrinsic::x86_avx512_cmp_ps_256;
1928       else if (VecWidth == 512 && EltWidth == 32)
1929         IID = Intrinsic::x86_avx512_cmp_ps_512;
1930       else if (VecWidth == 128 && EltWidth == 64)
1931         IID = Intrinsic::x86_avx512_cmp_pd_128;
1932       else if (VecWidth == 256 && EltWidth == 64)
1933         IID = Intrinsic::x86_avx512_cmp_pd_256;
1934       else if (VecWidth == 512 && EltWidth == 64)
1935         IID = Intrinsic::x86_avx512_cmp_pd_512;
1936       else
1937         llvm_unreachable("Unexpected intrinsic");
1938 
1939       SmallVector<Value *, 4> Args;
1940       Args.push_back(CI->getArgOperand(0));
1941       Args.push_back(CI->getArgOperand(1));
1942       Args.push_back(CI->getArgOperand(2));
1943       if (CI->getNumArgOperands() == 5)
1944         Args.push_back(CI->getArgOperand(4));
1945 
1946       Rep = Builder.CreateCall(Intrinsic::getDeclaration(F->getParent(), IID),
1947                                Args);
1948       Rep = ApplyX86MaskOn1BitsVec(Builder, Rep, CI->getArgOperand(3));
1949     } else if (IsX86 && Name.startswith("avx512.mask.cmp.") &&
1950                Name[16] != 'p') {
1951       // Integer compare intrinsics.
1952       unsigned Imm = cast<ConstantInt>(CI->getArgOperand(2))->getZExtValue();
1953       Rep = upgradeMaskedCompare(Builder, *CI, Imm, true);
1954     } else if (IsX86 && Name.startswith("avx512.mask.ucmp.")) {
1955       unsigned Imm = cast<ConstantInt>(CI->getArgOperand(2))->getZExtValue();
1956       Rep = upgradeMaskedCompare(Builder, *CI, Imm, false);
1957     } else if (IsX86 && (Name.startswith("avx512.cvtb2mask.") ||
1958                          Name.startswith("avx512.cvtw2mask.") ||
1959                          Name.startswith("avx512.cvtd2mask.") ||
1960                          Name.startswith("avx512.cvtq2mask."))) {
1961       Value *Op = CI->getArgOperand(0);
1962       Value *Zero = llvm::Constant::getNullValue(Op->getType());
1963       Rep = Builder.CreateICmp(ICmpInst::ICMP_SLT, Op, Zero);
1964       Rep = ApplyX86MaskOn1BitsVec(Builder, Rep, nullptr);
1965     } else if(IsX86 && (Name == "ssse3.pabs.b.128" ||
1966                         Name == "ssse3.pabs.w.128" ||
1967                         Name == "ssse3.pabs.d.128" ||
1968                         Name.startswith("avx2.pabs") ||
1969                         Name.startswith("avx512.mask.pabs"))) {
1970       Rep = upgradeAbs(Builder, *CI);
1971     } else if (IsX86 && (Name == "sse41.pmaxsb" ||
1972                          Name == "sse2.pmaxs.w" ||
1973                          Name == "sse41.pmaxsd" ||
1974                          Name.startswith("avx2.pmaxs") ||
1975                          Name.startswith("avx512.mask.pmaxs"))) {
1976       Rep = upgradeIntMinMax(Builder, *CI, ICmpInst::ICMP_SGT);
1977     } else if (IsX86 && (Name == "sse2.pmaxu.b" ||
1978                          Name == "sse41.pmaxuw" ||
1979                          Name == "sse41.pmaxud" ||
1980                          Name.startswith("avx2.pmaxu") ||
1981                          Name.startswith("avx512.mask.pmaxu"))) {
1982       Rep = upgradeIntMinMax(Builder, *CI, ICmpInst::ICMP_UGT);
1983     } else if (IsX86 && (Name == "sse41.pminsb" ||
1984                          Name == "sse2.pmins.w" ||
1985                          Name == "sse41.pminsd" ||
1986                          Name.startswith("avx2.pmins") ||
1987                          Name.startswith("avx512.mask.pmins"))) {
1988       Rep = upgradeIntMinMax(Builder, *CI, ICmpInst::ICMP_SLT);
1989     } else if (IsX86 && (Name == "sse2.pminu.b" ||
1990                          Name == "sse41.pminuw" ||
1991                          Name == "sse41.pminud" ||
1992                          Name.startswith("avx2.pminu") ||
1993                          Name.startswith("avx512.mask.pminu"))) {
1994       Rep = upgradeIntMinMax(Builder, *CI, ICmpInst::ICMP_ULT);
1995     } else if (IsX86 && (Name == "sse2.pmulu.dq" ||
1996                          Name == "avx2.pmulu.dq" ||
1997                          Name == "avx512.pmulu.dq.512" ||
1998                          Name.startswith("avx512.mask.pmulu.dq."))) {
1999       Rep = upgradePMULDQ(Builder, *CI, /*Signed*/false);
2000     } else if (IsX86 && (Name == "sse41.pmuldq" ||
2001                          Name == "avx2.pmul.dq" ||
2002                          Name == "avx512.pmul.dq.512" ||
2003                          Name.startswith("avx512.mask.pmul.dq."))) {
2004       Rep = upgradePMULDQ(Builder, *CI, /*Signed*/true);
2005     } else if (IsX86 && (Name == "sse.cvtsi2ss" ||
2006                          Name == "sse2.cvtsi2sd" ||
2007                          Name == "sse.cvtsi642ss" ||
2008                          Name == "sse2.cvtsi642sd")) {
2009       Rep = Builder.CreateSIToFP(CI->getArgOperand(1),
2010                                  CI->getType()->getVectorElementType());
2011       Rep = Builder.CreateInsertElement(CI->getArgOperand(0), Rep, (uint64_t)0);
2012     } else if (IsX86 && Name == "avx512.cvtusi2sd") {
2013       Rep = Builder.CreateUIToFP(CI->getArgOperand(1),
2014                                  CI->getType()->getVectorElementType());
2015       Rep = Builder.CreateInsertElement(CI->getArgOperand(0), Rep, (uint64_t)0);
2016     } else if (IsX86 && Name == "sse2.cvtss2sd") {
2017       Rep = Builder.CreateExtractElement(CI->getArgOperand(1), (uint64_t)0);
2018       Rep = Builder.CreateFPExt(Rep, CI->getType()->getVectorElementType());
2019       Rep = Builder.CreateInsertElement(CI->getArgOperand(0), Rep, (uint64_t)0);
2020     } else if (IsX86 && (Name == "sse2.cvtdq2pd" ||
2021                          Name == "sse2.cvtdq2ps" ||
2022                          Name == "avx.cvtdq2.pd.256" ||
2023                          Name == "avx.cvtdq2.ps.256" ||
2024                          Name.startswith("avx512.mask.cvtdq2pd.") ||
2025                          Name.startswith("avx512.mask.cvtudq2pd.") ||
2026                          Name.startswith("avx512.mask.cvtdq2ps.") ||
2027                          Name.startswith("avx512.mask.cvtudq2ps.") ||
2028                          Name.startswith("avx512.mask.cvtqq2pd.") ||
2029                          Name.startswith("avx512.mask.cvtuqq2pd.") ||
2030                          Name == "avx512.mask.cvtqq2ps.256" ||
2031                          Name == "avx512.mask.cvtqq2ps.512" ||
2032                          Name == "avx512.mask.cvtuqq2ps.256" ||
2033                          Name == "avx512.mask.cvtuqq2ps.512" ||
2034                          Name == "sse2.cvtps2pd" ||
2035                          Name == "avx.cvt.ps2.pd.256" ||
2036                          Name == "avx512.mask.cvtps2pd.128" ||
2037                          Name == "avx512.mask.cvtps2pd.256")) {
2038       Type *DstTy = CI->getType();
2039       Rep = CI->getArgOperand(0);
2040       Type *SrcTy = Rep->getType();
2041 
2042       unsigned NumDstElts = DstTy->getVectorNumElements();
2043       if (NumDstElts < SrcTy->getVectorNumElements()) {
2044         assert(NumDstElts == 2 && "Unexpected vector size");
2045         uint32_t ShuffleMask[2] = { 0, 1 };
2046         Rep = Builder.CreateShuffleVector(Rep, Rep, ShuffleMask);
2047       }
2048 
2049       bool IsPS2PD = SrcTy->getVectorElementType()->isFloatTy();
2050       bool IsUnsigned = (StringRef::npos != Name.find("cvtu"));
2051       if (IsPS2PD)
2052         Rep = Builder.CreateFPExt(Rep, DstTy, "cvtps2pd");
2053       else if (CI->getNumArgOperands() == 4 &&
2054                (!isa<ConstantInt>(CI->getArgOperand(3)) ||
2055                 cast<ConstantInt>(CI->getArgOperand(3))->getZExtValue() != 4)) {
2056         Intrinsic::ID IID = IsUnsigned ? Intrinsic::x86_avx512_uitofp_round
2057                                        : Intrinsic::x86_avx512_sitofp_round;
2058         Function *F = Intrinsic::getDeclaration(CI->getModule(), IID,
2059                                                 { DstTy, SrcTy });
2060         Rep = Builder.CreateCall(F, { Rep, CI->getArgOperand(3) });
2061       } else {
2062         Rep = IsUnsigned ? Builder.CreateUIToFP(Rep, DstTy, "cvt")
2063                          : Builder.CreateSIToFP(Rep, DstTy, "cvt");
2064       }
2065 
2066       if (CI->getNumArgOperands() >= 3)
2067         Rep = EmitX86Select(Builder, CI->getArgOperand(2), Rep,
2068                             CI->getArgOperand(1));
2069     } else if (IsX86 && (Name.startswith("avx512.mask.loadu."))) {
2070       Rep = UpgradeMaskedLoad(Builder, CI->getArgOperand(0),
2071                               CI->getArgOperand(1), CI->getArgOperand(2),
2072                               /*Aligned*/false);
2073     } else if (IsX86 && (Name.startswith("avx512.mask.load."))) {
2074       Rep = UpgradeMaskedLoad(Builder, CI->getArgOperand(0),
2075                               CI->getArgOperand(1),CI->getArgOperand(2),
2076                               /*Aligned*/true);
2077     } else if (IsX86 && Name.startswith("avx512.mask.expand.load.")) {
2078       Type *ResultTy = CI->getType();
2079       Type *PtrTy = ResultTy->getVectorElementType();
2080 
2081       // Cast the pointer to element type.
2082       Value *Ptr = Builder.CreateBitCast(CI->getOperand(0),
2083                                          llvm::PointerType::getUnqual(PtrTy));
2084 
2085       Value *MaskVec = getX86MaskVec(Builder, CI->getArgOperand(2),
2086                                      ResultTy->getVectorNumElements());
2087 
2088       Function *ELd = Intrinsic::getDeclaration(F->getParent(),
2089                                                 Intrinsic::masked_expandload,
2090                                                 ResultTy);
2091       Rep = Builder.CreateCall(ELd, { Ptr, MaskVec, CI->getOperand(1) });
2092     } else if (IsX86 && Name.startswith("avx512.mask.compress.store.")) {
2093       Type *ResultTy = CI->getArgOperand(1)->getType();
2094       Type *PtrTy = ResultTy->getVectorElementType();
2095 
2096       // Cast the pointer to element type.
2097       Value *Ptr = Builder.CreateBitCast(CI->getOperand(0),
2098                                          llvm::PointerType::getUnqual(PtrTy));
2099 
2100       Value *MaskVec = getX86MaskVec(Builder, CI->getArgOperand(2),
2101                                      ResultTy->getVectorNumElements());
2102 
2103       Function *CSt = Intrinsic::getDeclaration(F->getParent(),
2104                                                 Intrinsic::masked_compressstore,
2105                                                 ResultTy);
2106       Rep = Builder.CreateCall(CSt, { CI->getArgOperand(1), Ptr, MaskVec });
2107     } else if (IsX86 && (Name.startswith("avx512.mask.compress.") ||
2108                          Name.startswith("avx512.mask.expand."))) {
2109       Type *ResultTy = CI->getType();
2110 
2111       Value *MaskVec = getX86MaskVec(Builder, CI->getArgOperand(2),
2112                                      ResultTy->getVectorNumElements());
2113 
2114       bool IsCompress = Name[12] == 'c';
2115       Intrinsic::ID IID = IsCompress ? Intrinsic::x86_avx512_mask_compress
2116                                      : Intrinsic::x86_avx512_mask_expand;
2117       Function *Intr = Intrinsic::getDeclaration(F->getParent(), IID, ResultTy);
2118       Rep = Builder.CreateCall(Intr, { CI->getOperand(0), CI->getOperand(1),
2119                                        MaskVec });
2120     } else if (IsX86 && Name.startswith("xop.vpcom")) {
2121       bool IsSigned;
2122       if (Name.endswith("ub") || Name.endswith("uw") || Name.endswith("ud") ||
2123           Name.endswith("uq"))
2124         IsSigned = false;
2125       else if (Name.endswith("b") || Name.endswith("w") || Name.endswith("d") ||
2126                Name.endswith("q"))
2127         IsSigned = true;
2128       else
2129         llvm_unreachable("Unknown suffix");
2130 
2131       unsigned Imm;
2132       if (CI->getNumArgOperands() == 3) {
2133         Imm = cast<ConstantInt>(CI->getArgOperand(2))->getZExtValue();
2134       } else {
2135         Name = Name.substr(9); // strip off "xop.vpcom"
2136         if (Name.startswith("lt"))
2137           Imm = 0;
2138         else if (Name.startswith("le"))
2139           Imm = 1;
2140         else if (Name.startswith("gt"))
2141           Imm = 2;
2142         else if (Name.startswith("ge"))
2143           Imm = 3;
2144         else if (Name.startswith("eq"))
2145           Imm = 4;
2146         else if (Name.startswith("ne"))
2147           Imm = 5;
2148         else if (Name.startswith("false"))
2149           Imm = 6;
2150         else if (Name.startswith("true"))
2151           Imm = 7;
2152         else
2153           llvm_unreachable("Unknown condition");
2154       }
2155 
2156       Rep = upgradeX86vpcom(Builder, *CI, Imm, IsSigned);
2157     } else if (IsX86 && Name.startswith("xop.vpcmov")) {
2158       Value *Sel = CI->getArgOperand(2);
2159       Value *NotSel = Builder.CreateNot(Sel);
2160       Value *Sel0 = Builder.CreateAnd(CI->getArgOperand(0), Sel);
2161       Value *Sel1 = Builder.CreateAnd(CI->getArgOperand(1), NotSel);
2162       Rep = Builder.CreateOr(Sel0, Sel1);
2163     } else if (IsX86 && (Name.startswith("xop.vprot") ||
2164                          Name.startswith("avx512.prol") ||
2165                          Name.startswith("avx512.mask.prol"))) {
2166       Rep = upgradeX86Rotate(Builder, *CI, false);
2167     } else if (IsX86 && (Name.startswith("avx512.pror") ||
2168                          Name.startswith("avx512.mask.pror"))) {
2169       Rep = upgradeX86Rotate(Builder, *CI, true);
2170     } else if (IsX86 && (Name.startswith("avx512.vpshld.") ||
2171                          Name.startswith("avx512.mask.vpshld") ||
2172                          Name.startswith("avx512.maskz.vpshld"))) {
2173       bool ZeroMask = Name[11] == 'z';
2174       Rep = upgradeX86ConcatShift(Builder, *CI, false, ZeroMask);
2175     } else if (IsX86 && (Name.startswith("avx512.vpshrd.") ||
2176                          Name.startswith("avx512.mask.vpshrd") ||
2177                          Name.startswith("avx512.maskz.vpshrd"))) {
2178       bool ZeroMask = Name[11] == 'z';
2179       Rep = upgradeX86ConcatShift(Builder, *CI, true, ZeroMask);
2180     } else if (IsX86 && Name == "sse42.crc32.64.8") {
2181       Function *CRC32 = Intrinsic::getDeclaration(F->getParent(),
2182                                                Intrinsic::x86_sse42_crc32_32_8);
2183       Value *Trunc0 = Builder.CreateTrunc(CI->getArgOperand(0), Type::getInt32Ty(C));
2184       Rep = Builder.CreateCall(CRC32, {Trunc0, CI->getArgOperand(1)});
2185       Rep = Builder.CreateZExt(Rep, CI->getType(), "");
2186     } else if (IsX86 && (Name.startswith("avx.vbroadcast.s") ||
2187                          Name.startswith("avx512.vbroadcast.s"))) {
2188       // Replace broadcasts with a series of insertelements.
2189       Type *VecTy = CI->getType();
2190       Type *EltTy = VecTy->getVectorElementType();
2191       unsigned EltNum = VecTy->getVectorNumElements();
2192       Value *Cast = Builder.CreateBitCast(CI->getArgOperand(0),
2193                                           EltTy->getPointerTo());
2194       Value *Load = Builder.CreateLoad(EltTy, Cast);
2195       Type *I32Ty = Type::getInt32Ty(C);
2196       Rep = UndefValue::get(VecTy);
2197       for (unsigned I = 0; I < EltNum; ++I)
2198         Rep = Builder.CreateInsertElement(Rep, Load,
2199                                           ConstantInt::get(I32Ty, I));
2200     } else if (IsX86 && (Name.startswith("sse41.pmovsx") ||
2201                          Name.startswith("sse41.pmovzx") ||
2202                          Name.startswith("avx2.pmovsx") ||
2203                          Name.startswith("avx2.pmovzx") ||
2204                          Name.startswith("avx512.mask.pmovsx") ||
2205                          Name.startswith("avx512.mask.pmovzx"))) {
2206       VectorType *SrcTy = cast<VectorType>(CI->getArgOperand(0)->getType());
2207       VectorType *DstTy = cast<VectorType>(CI->getType());
2208       unsigned NumDstElts = DstTy->getNumElements();
2209 
2210       // Extract a subvector of the first NumDstElts lanes and sign/zero extend.
2211       SmallVector<uint32_t, 8> ShuffleMask(NumDstElts);
2212       for (unsigned i = 0; i != NumDstElts; ++i)
2213         ShuffleMask[i] = i;
2214 
2215       Value *SV = Builder.CreateShuffleVector(
2216           CI->getArgOperand(0), UndefValue::get(SrcTy), ShuffleMask);
2217 
2218       bool DoSext = (StringRef::npos != Name.find("pmovsx"));
2219       Rep = DoSext ? Builder.CreateSExt(SV, DstTy)
2220                    : Builder.CreateZExt(SV, DstTy);
2221       // If there are 3 arguments, it's a masked intrinsic so we need a select.
2222       if (CI->getNumArgOperands() == 3)
2223         Rep = EmitX86Select(Builder, CI->getArgOperand(2), Rep,
2224                             CI->getArgOperand(1));
2225     } else if (Name == "avx512.mask.pmov.qd.256" ||
2226                Name == "avx512.mask.pmov.qd.512" ||
2227                Name == "avx512.mask.pmov.wb.256" ||
2228                Name == "avx512.mask.pmov.wb.512") {
2229       Type *Ty = CI->getArgOperand(1)->getType();
2230       Rep = Builder.CreateTrunc(CI->getArgOperand(0), Ty);
2231       Rep = EmitX86Select(Builder, CI->getArgOperand(2), Rep,
2232                           CI->getArgOperand(1));
2233     } else if (IsX86 && (Name.startswith("avx.vbroadcastf128") ||
2234                          Name == "avx2.vbroadcasti128")) {
2235       // Replace vbroadcastf128/vbroadcasti128 with a vector load+shuffle.
2236       Type *EltTy = CI->getType()->getVectorElementType();
2237       unsigned NumSrcElts = 128 / EltTy->getPrimitiveSizeInBits();
2238       Type *VT = VectorType::get(EltTy, NumSrcElts);
2239       Value *Op = Builder.CreatePointerCast(CI->getArgOperand(0),
2240                                             PointerType::getUnqual(VT));
2241       Value *Load = Builder.CreateAlignedLoad(VT, Op, 1);
2242       if (NumSrcElts == 2)
2243         Rep = Builder.CreateShuffleVector(Load, UndefValue::get(Load->getType()),
2244                                           { 0, 1, 0, 1 });
2245       else
2246         Rep = Builder.CreateShuffleVector(Load, UndefValue::get(Load->getType()),
2247                                           { 0, 1, 2, 3, 0, 1, 2, 3 });
2248     } else if (IsX86 && (Name.startswith("avx512.mask.shuf.i") ||
2249                          Name.startswith("avx512.mask.shuf.f"))) {
2250       unsigned Imm = cast<ConstantInt>(CI->getArgOperand(2))->getZExtValue();
2251       Type *VT = CI->getType();
2252       unsigned NumLanes = VT->getPrimitiveSizeInBits() / 128;
2253       unsigned NumElementsInLane = 128 / VT->getScalarSizeInBits();
2254       unsigned ControlBitsMask = NumLanes - 1;
2255       unsigned NumControlBits = NumLanes / 2;
2256       SmallVector<uint32_t, 8> ShuffleMask(0);
2257 
2258       for (unsigned l = 0; l != NumLanes; ++l) {
2259         unsigned LaneMask = (Imm >> (l * NumControlBits)) & ControlBitsMask;
2260         // We actually need the other source.
2261         if (l >= NumLanes / 2)
2262           LaneMask += NumLanes;
2263         for (unsigned i = 0; i != NumElementsInLane; ++i)
2264           ShuffleMask.push_back(LaneMask * NumElementsInLane + i);
2265       }
2266       Rep = Builder.CreateShuffleVector(CI->getArgOperand(0),
2267                                         CI->getArgOperand(1), ShuffleMask);
2268       Rep = EmitX86Select(Builder, CI->getArgOperand(4), Rep,
2269                           CI->getArgOperand(3));
2270     }else if (IsX86 && (Name.startswith("avx512.mask.broadcastf") ||
2271                          Name.startswith("avx512.mask.broadcasti"))) {
2272       unsigned NumSrcElts =
2273                         CI->getArgOperand(0)->getType()->getVectorNumElements();
2274       unsigned NumDstElts = CI->getType()->getVectorNumElements();
2275 
2276       SmallVector<uint32_t, 8> ShuffleMask(NumDstElts);
2277       for (unsigned i = 0; i != NumDstElts; ++i)
2278         ShuffleMask[i] = i % NumSrcElts;
2279 
2280       Rep = Builder.CreateShuffleVector(CI->getArgOperand(0),
2281                                         CI->getArgOperand(0),
2282                                         ShuffleMask);
2283       Rep = EmitX86Select(Builder, CI->getArgOperand(2), Rep,
2284                           CI->getArgOperand(1));
2285     } else if (IsX86 && (Name.startswith("avx2.pbroadcast") ||
2286                          Name.startswith("avx2.vbroadcast") ||
2287                          Name.startswith("avx512.pbroadcast") ||
2288                          Name.startswith("avx512.mask.broadcast.s"))) {
2289       // Replace vp?broadcasts with a vector shuffle.
2290       Value *Op = CI->getArgOperand(0);
2291       unsigned NumElts = CI->getType()->getVectorNumElements();
2292       Type *MaskTy = VectorType::get(Type::getInt32Ty(C), NumElts);
2293       Rep = Builder.CreateShuffleVector(Op, UndefValue::get(Op->getType()),
2294                                         Constant::getNullValue(MaskTy));
2295 
2296       if (CI->getNumArgOperands() == 3)
2297         Rep = EmitX86Select(Builder, CI->getArgOperand(2), Rep,
2298                             CI->getArgOperand(1));
2299     } else if (IsX86 && (Name.startswith("sse2.padds.") ||
2300                          Name.startswith("sse2.psubs.") ||
2301                          Name.startswith("avx2.padds.") ||
2302                          Name.startswith("avx2.psubs.") ||
2303                          Name.startswith("avx512.padds.") ||
2304                          Name.startswith("avx512.psubs.") ||
2305                          Name.startswith("avx512.mask.padds.") ||
2306                          Name.startswith("avx512.mask.psubs."))) {
2307       bool IsAdd = Name.contains(".padds");
2308       Rep = UpgradeX86AddSubSatIntrinsics(Builder, *CI, true, IsAdd);
2309     } else if (IsX86 && (Name.startswith("sse2.paddus.") ||
2310                          Name.startswith("sse2.psubus.") ||
2311                          Name.startswith("avx2.paddus.") ||
2312                          Name.startswith("avx2.psubus.") ||
2313                          Name.startswith("avx512.mask.paddus.") ||
2314                          Name.startswith("avx512.mask.psubus."))) {
2315       bool IsAdd = Name.contains(".paddus");
2316       Rep = UpgradeX86AddSubSatIntrinsics(Builder, *CI, false, IsAdd);
2317     } else if (IsX86 && Name.startswith("avx512.mask.palignr.")) {
2318       Rep = UpgradeX86ALIGNIntrinsics(Builder, CI->getArgOperand(0),
2319                                       CI->getArgOperand(1),
2320                                       CI->getArgOperand(2),
2321                                       CI->getArgOperand(3),
2322                                       CI->getArgOperand(4),
2323                                       false);
2324     } else if (IsX86 && Name.startswith("avx512.mask.valign.")) {
2325       Rep = UpgradeX86ALIGNIntrinsics(Builder, CI->getArgOperand(0),
2326                                       CI->getArgOperand(1),
2327                                       CI->getArgOperand(2),
2328                                       CI->getArgOperand(3),
2329                                       CI->getArgOperand(4),
2330                                       true);
2331     } else if (IsX86 && (Name == "sse2.psll.dq" ||
2332                          Name == "avx2.psll.dq")) {
2333       // 128/256-bit shift left specified in bits.
2334       unsigned Shift = cast<ConstantInt>(CI->getArgOperand(1))->getZExtValue();
2335       Rep = UpgradeX86PSLLDQIntrinsics(Builder, CI->getArgOperand(0),
2336                                        Shift / 8); // Shift is in bits.
2337     } else if (IsX86 && (Name == "sse2.psrl.dq" ||
2338                          Name == "avx2.psrl.dq")) {
2339       // 128/256-bit shift right specified in bits.
2340       unsigned Shift = cast<ConstantInt>(CI->getArgOperand(1))->getZExtValue();
2341       Rep = UpgradeX86PSRLDQIntrinsics(Builder, CI->getArgOperand(0),
2342                                        Shift / 8); // Shift is in bits.
2343     } else if (IsX86 && (Name == "sse2.psll.dq.bs" ||
2344                          Name == "avx2.psll.dq.bs" ||
2345                          Name == "avx512.psll.dq.512")) {
2346       // 128/256/512-bit shift left specified in bytes.
2347       unsigned Shift = cast<ConstantInt>(CI->getArgOperand(1))->getZExtValue();
2348       Rep = UpgradeX86PSLLDQIntrinsics(Builder, CI->getArgOperand(0), Shift);
2349     } else if (IsX86 && (Name == "sse2.psrl.dq.bs" ||
2350                          Name == "avx2.psrl.dq.bs" ||
2351                          Name == "avx512.psrl.dq.512")) {
2352       // 128/256/512-bit shift right specified in bytes.
2353       unsigned Shift = cast<ConstantInt>(CI->getArgOperand(1))->getZExtValue();
2354       Rep = UpgradeX86PSRLDQIntrinsics(Builder, CI->getArgOperand(0), Shift);
2355     } else if (IsX86 && (Name == "sse41.pblendw" ||
2356                          Name.startswith("sse41.blendp") ||
2357                          Name.startswith("avx.blend.p") ||
2358                          Name == "avx2.pblendw" ||
2359                          Name.startswith("avx2.pblendd."))) {
2360       Value *Op0 = CI->getArgOperand(0);
2361       Value *Op1 = CI->getArgOperand(1);
2362       unsigned Imm = cast <ConstantInt>(CI->getArgOperand(2))->getZExtValue();
2363       VectorType *VecTy = cast<VectorType>(CI->getType());
2364       unsigned NumElts = VecTy->getNumElements();
2365 
2366       SmallVector<uint32_t, 16> Idxs(NumElts);
2367       for (unsigned i = 0; i != NumElts; ++i)
2368         Idxs[i] = ((Imm >> (i%8)) & 1) ? i + NumElts : i;
2369 
2370       Rep = Builder.CreateShuffleVector(Op0, Op1, Idxs);
2371     } else if (IsX86 && (Name.startswith("avx.vinsertf128.") ||
2372                          Name == "avx2.vinserti128" ||
2373                          Name.startswith("avx512.mask.insert"))) {
2374       Value *Op0 = CI->getArgOperand(0);
2375       Value *Op1 = CI->getArgOperand(1);
2376       unsigned Imm = cast<ConstantInt>(CI->getArgOperand(2))->getZExtValue();
2377       unsigned DstNumElts = CI->getType()->getVectorNumElements();
2378       unsigned SrcNumElts = Op1->getType()->getVectorNumElements();
2379       unsigned Scale = DstNumElts / SrcNumElts;
2380 
2381       // Mask off the high bits of the immediate value; hardware ignores those.
2382       Imm = Imm % Scale;
2383 
2384       // Extend the second operand into a vector the size of the destination.
2385       Value *UndefV = UndefValue::get(Op1->getType());
2386       SmallVector<uint32_t, 8> Idxs(DstNumElts);
2387       for (unsigned i = 0; i != SrcNumElts; ++i)
2388         Idxs[i] = i;
2389       for (unsigned i = SrcNumElts; i != DstNumElts; ++i)
2390         Idxs[i] = SrcNumElts;
2391       Rep = Builder.CreateShuffleVector(Op1, UndefV, Idxs);
2392 
2393       // Insert the second operand into the first operand.
2394 
2395       // Note that there is no guarantee that instruction lowering will actually
2396       // produce a vinsertf128 instruction for the created shuffles. In
2397       // particular, the 0 immediate case involves no lane changes, so it can
2398       // be handled as a blend.
2399 
2400       // Example of shuffle mask for 32-bit elements:
2401       // Imm = 1  <i32 0, i32 1, i32 2,  i32 3,  i32 8, i32 9, i32 10, i32 11>
2402       // Imm = 0  <i32 8, i32 9, i32 10, i32 11, i32 4, i32 5, i32 6,  i32 7 >
2403 
2404       // First fill with identify mask.
2405       for (unsigned i = 0; i != DstNumElts; ++i)
2406         Idxs[i] = i;
2407       // Then replace the elements where we need to insert.
2408       for (unsigned i = 0; i != SrcNumElts; ++i)
2409         Idxs[i + Imm * SrcNumElts] = i + DstNumElts;
2410       Rep = Builder.CreateShuffleVector(Op0, Rep, Idxs);
2411 
2412       // If the intrinsic has a mask operand, handle that.
2413       if (CI->getNumArgOperands() == 5)
2414         Rep = EmitX86Select(Builder, CI->getArgOperand(4), Rep,
2415                             CI->getArgOperand(3));
2416     } else if (IsX86 && (Name.startswith("avx.vextractf128.") ||
2417                          Name == "avx2.vextracti128" ||
2418                          Name.startswith("avx512.mask.vextract"))) {
2419       Value *Op0 = CI->getArgOperand(0);
2420       unsigned Imm = cast<ConstantInt>(CI->getArgOperand(1))->getZExtValue();
2421       unsigned DstNumElts = CI->getType()->getVectorNumElements();
2422       unsigned SrcNumElts = Op0->getType()->getVectorNumElements();
2423       unsigned Scale = SrcNumElts / DstNumElts;
2424 
2425       // Mask off the high bits of the immediate value; hardware ignores those.
2426       Imm = Imm % Scale;
2427 
2428       // Get indexes for the subvector of the input vector.
2429       SmallVector<uint32_t, 8> Idxs(DstNumElts);
2430       for (unsigned i = 0; i != DstNumElts; ++i) {
2431         Idxs[i] = i + (Imm * DstNumElts);
2432       }
2433       Rep = Builder.CreateShuffleVector(Op0, Op0, Idxs);
2434 
2435       // If the intrinsic has a mask operand, handle that.
2436       if (CI->getNumArgOperands() == 4)
2437         Rep = EmitX86Select(Builder, CI->getArgOperand(3), Rep,
2438                             CI->getArgOperand(2));
2439     } else if (!IsX86 && Name == "stackprotectorcheck") {
2440       Rep = nullptr;
2441     } else if (IsX86 && (Name.startswith("avx512.mask.perm.df.") ||
2442                          Name.startswith("avx512.mask.perm.di."))) {
2443       Value *Op0 = CI->getArgOperand(0);
2444       unsigned Imm = cast<ConstantInt>(CI->getArgOperand(1))->getZExtValue();
2445       VectorType *VecTy = cast<VectorType>(CI->getType());
2446       unsigned NumElts = VecTy->getNumElements();
2447 
2448       SmallVector<uint32_t, 8> Idxs(NumElts);
2449       for (unsigned i = 0; i != NumElts; ++i)
2450         Idxs[i] = (i & ~0x3) + ((Imm >> (2 * (i & 0x3))) & 3);
2451 
2452       Rep = Builder.CreateShuffleVector(Op0, Op0, Idxs);
2453 
2454       if (CI->getNumArgOperands() == 4)
2455         Rep = EmitX86Select(Builder, CI->getArgOperand(3), Rep,
2456                             CI->getArgOperand(2));
2457     } else if (IsX86 && (Name.startswith("avx.vperm2f128.") ||
2458                          Name == "avx2.vperm2i128")) {
2459       // The immediate permute control byte looks like this:
2460       //    [1:0] - select 128 bits from sources for low half of destination
2461       //    [2]   - ignore
2462       //    [3]   - zero low half of destination
2463       //    [5:4] - select 128 bits from sources for high half of destination
2464       //    [6]   - ignore
2465       //    [7]   - zero high half of destination
2466 
2467       uint8_t Imm = cast<ConstantInt>(CI->getArgOperand(2))->getZExtValue();
2468 
2469       unsigned NumElts = CI->getType()->getVectorNumElements();
2470       unsigned HalfSize = NumElts / 2;
2471       SmallVector<uint32_t, 8> ShuffleMask(NumElts);
2472 
2473       // Determine which operand(s) are actually in use for this instruction.
2474       Value *V0 = (Imm & 0x02) ? CI->getArgOperand(1) : CI->getArgOperand(0);
2475       Value *V1 = (Imm & 0x20) ? CI->getArgOperand(1) : CI->getArgOperand(0);
2476 
2477       // If needed, replace operands based on zero mask.
2478       V0 = (Imm & 0x08) ? ConstantAggregateZero::get(CI->getType()) : V0;
2479       V1 = (Imm & 0x80) ? ConstantAggregateZero::get(CI->getType()) : V1;
2480 
2481       // Permute low half of result.
2482       unsigned StartIndex = (Imm & 0x01) ? HalfSize : 0;
2483       for (unsigned i = 0; i < HalfSize; ++i)
2484         ShuffleMask[i] = StartIndex + i;
2485 
2486       // Permute high half of result.
2487       StartIndex = (Imm & 0x10) ? HalfSize : 0;
2488       for (unsigned i = 0; i < HalfSize; ++i)
2489         ShuffleMask[i + HalfSize] = NumElts + StartIndex + i;
2490 
2491       Rep = Builder.CreateShuffleVector(V0, V1, ShuffleMask);
2492 
2493     } else if (IsX86 && (Name.startswith("avx.vpermil.") ||
2494                          Name == "sse2.pshuf.d" ||
2495                          Name.startswith("avx512.mask.vpermil.p") ||
2496                          Name.startswith("avx512.mask.pshuf.d."))) {
2497       Value *Op0 = CI->getArgOperand(0);
2498       unsigned Imm = cast<ConstantInt>(CI->getArgOperand(1))->getZExtValue();
2499       VectorType *VecTy = cast<VectorType>(CI->getType());
2500       unsigned NumElts = VecTy->getNumElements();
2501       // Calculate the size of each index in the immediate.
2502       unsigned IdxSize = 64 / VecTy->getScalarSizeInBits();
2503       unsigned IdxMask = ((1 << IdxSize) - 1);
2504 
2505       SmallVector<uint32_t, 8> Idxs(NumElts);
2506       // Lookup the bits for this element, wrapping around the immediate every
2507       // 8-bits. Elements are grouped into sets of 2 or 4 elements so we need
2508       // to offset by the first index of each group.
2509       for (unsigned i = 0; i != NumElts; ++i)
2510         Idxs[i] = ((Imm >> ((i * IdxSize) % 8)) & IdxMask) | (i & ~IdxMask);
2511 
2512       Rep = Builder.CreateShuffleVector(Op0, Op0, Idxs);
2513 
2514       if (CI->getNumArgOperands() == 4)
2515         Rep = EmitX86Select(Builder, CI->getArgOperand(3), Rep,
2516                             CI->getArgOperand(2));
2517     } else if (IsX86 && (Name == "sse2.pshufl.w" ||
2518                          Name.startswith("avx512.mask.pshufl.w."))) {
2519       Value *Op0 = CI->getArgOperand(0);
2520       unsigned Imm = cast<ConstantInt>(CI->getArgOperand(1))->getZExtValue();
2521       unsigned NumElts = CI->getType()->getVectorNumElements();
2522 
2523       SmallVector<uint32_t, 16> Idxs(NumElts);
2524       for (unsigned l = 0; l != NumElts; l += 8) {
2525         for (unsigned i = 0; i != 4; ++i)
2526           Idxs[i + l] = ((Imm >> (2 * i)) & 0x3) + l;
2527         for (unsigned i = 4; i != 8; ++i)
2528           Idxs[i + l] = i + l;
2529       }
2530 
2531       Rep = Builder.CreateShuffleVector(Op0, Op0, Idxs);
2532 
2533       if (CI->getNumArgOperands() == 4)
2534         Rep = EmitX86Select(Builder, CI->getArgOperand(3), Rep,
2535                             CI->getArgOperand(2));
2536     } else if (IsX86 && (Name == "sse2.pshufh.w" ||
2537                          Name.startswith("avx512.mask.pshufh.w."))) {
2538       Value *Op0 = CI->getArgOperand(0);
2539       unsigned Imm = cast<ConstantInt>(CI->getArgOperand(1))->getZExtValue();
2540       unsigned NumElts = CI->getType()->getVectorNumElements();
2541 
2542       SmallVector<uint32_t, 16> Idxs(NumElts);
2543       for (unsigned l = 0; l != NumElts; l += 8) {
2544         for (unsigned i = 0; i != 4; ++i)
2545           Idxs[i + l] = i + l;
2546         for (unsigned i = 0; i != 4; ++i)
2547           Idxs[i + l + 4] = ((Imm >> (2 * i)) & 0x3) + 4 + l;
2548       }
2549 
2550       Rep = Builder.CreateShuffleVector(Op0, Op0, Idxs);
2551 
2552       if (CI->getNumArgOperands() == 4)
2553         Rep = EmitX86Select(Builder, CI->getArgOperand(3), Rep,
2554                             CI->getArgOperand(2));
2555     } else if (IsX86 && Name.startswith("avx512.mask.shuf.p")) {
2556       Value *Op0 = CI->getArgOperand(0);
2557       Value *Op1 = CI->getArgOperand(1);
2558       unsigned Imm = cast<ConstantInt>(CI->getArgOperand(2))->getZExtValue();
2559       unsigned NumElts = CI->getType()->getVectorNumElements();
2560 
2561       unsigned NumLaneElts = 128/CI->getType()->getScalarSizeInBits();
2562       unsigned HalfLaneElts = NumLaneElts / 2;
2563 
2564       SmallVector<uint32_t, 16> Idxs(NumElts);
2565       for (unsigned i = 0; i != NumElts; ++i) {
2566         // Base index is the starting element of the lane.
2567         Idxs[i] = i - (i % NumLaneElts);
2568         // If we are half way through the lane switch to the other source.
2569         if ((i % NumLaneElts) >= HalfLaneElts)
2570           Idxs[i] += NumElts;
2571         // Now select the specific element. By adding HalfLaneElts bits from
2572         // the immediate. Wrapping around the immediate every 8-bits.
2573         Idxs[i] += (Imm >> ((i * HalfLaneElts) % 8)) & ((1 << HalfLaneElts) - 1);
2574       }
2575 
2576       Rep = Builder.CreateShuffleVector(Op0, Op1, Idxs);
2577 
2578       Rep = EmitX86Select(Builder, CI->getArgOperand(4), Rep,
2579                           CI->getArgOperand(3));
2580     } else if (IsX86 && (Name.startswith("avx512.mask.movddup") ||
2581                          Name.startswith("avx512.mask.movshdup") ||
2582                          Name.startswith("avx512.mask.movsldup"))) {
2583       Value *Op0 = CI->getArgOperand(0);
2584       unsigned NumElts = CI->getType()->getVectorNumElements();
2585       unsigned NumLaneElts = 128/CI->getType()->getScalarSizeInBits();
2586 
2587       unsigned Offset = 0;
2588       if (Name.startswith("avx512.mask.movshdup."))
2589         Offset = 1;
2590 
2591       SmallVector<uint32_t, 16> Idxs(NumElts);
2592       for (unsigned l = 0; l != NumElts; l += NumLaneElts)
2593         for (unsigned i = 0; i != NumLaneElts; i += 2) {
2594           Idxs[i + l + 0] = i + l + Offset;
2595           Idxs[i + l + 1] = i + l + Offset;
2596         }
2597 
2598       Rep = Builder.CreateShuffleVector(Op0, Op0, Idxs);
2599 
2600       Rep = EmitX86Select(Builder, CI->getArgOperand(2), Rep,
2601                           CI->getArgOperand(1));
2602     } else if (IsX86 && (Name.startswith("avx512.mask.punpckl") ||
2603                          Name.startswith("avx512.mask.unpckl."))) {
2604       Value *Op0 = CI->getArgOperand(0);
2605       Value *Op1 = CI->getArgOperand(1);
2606       int NumElts = CI->getType()->getVectorNumElements();
2607       int NumLaneElts = 128/CI->getType()->getScalarSizeInBits();
2608 
2609       SmallVector<uint32_t, 64> Idxs(NumElts);
2610       for (int l = 0; l != NumElts; l += NumLaneElts)
2611         for (int i = 0; i != NumLaneElts; ++i)
2612           Idxs[i + l] = l + (i / 2) + NumElts * (i % 2);
2613 
2614       Rep = Builder.CreateShuffleVector(Op0, Op1, Idxs);
2615 
2616       Rep = EmitX86Select(Builder, CI->getArgOperand(3), Rep,
2617                           CI->getArgOperand(2));
2618     } else if (IsX86 && (Name.startswith("avx512.mask.punpckh") ||
2619                          Name.startswith("avx512.mask.unpckh."))) {
2620       Value *Op0 = CI->getArgOperand(0);
2621       Value *Op1 = CI->getArgOperand(1);
2622       int NumElts = CI->getType()->getVectorNumElements();
2623       int NumLaneElts = 128/CI->getType()->getScalarSizeInBits();
2624 
2625       SmallVector<uint32_t, 64> Idxs(NumElts);
2626       for (int l = 0; l != NumElts; l += NumLaneElts)
2627         for (int i = 0; i != NumLaneElts; ++i)
2628           Idxs[i + l] = (NumLaneElts / 2) + l + (i / 2) + NumElts * (i % 2);
2629 
2630       Rep = Builder.CreateShuffleVector(Op0, Op1, Idxs);
2631 
2632       Rep = EmitX86Select(Builder, CI->getArgOperand(3), Rep,
2633                           CI->getArgOperand(2));
2634     } else if (IsX86 && (Name.startswith("avx512.mask.and.") ||
2635                          Name.startswith("avx512.mask.pand."))) {
2636       VectorType *FTy = cast<VectorType>(CI->getType());
2637       VectorType *ITy = VectorType::getInteger(FTy);
2638       Rep = Builder.CreateAnd(Builder.CreateBitCast(CI->getArgOperand(0), ITy),
2639                               Builder.CreateBitCast(CI->getArgOperand(1), ITy));
2640       Rep = Builder.CreateBitCast(Rep, FTy);
2641       Rep = EmitX86Select(Builder, CI->getArgOperand(3), Rep,
2642                           CI->getArgOperand(2));
2643     } else if (IsX86 && (Name.startswith("avx512.mask.andn.") ||
2644                          Name.startswith("avx512.mask.pandn."))) {
2645       VectorType *FTy = cast<VectorType>(CI->getType());
2646       VectorType *ITy = VectorType::getInteger(FTy);
2647       Rep = Builder.CreateNot(Builder.CreateBitCast(CI->getArgOperand(0), ITy));
2648       Rep = Builder.CreateAnd(Rep,
2649                               Builder.CreateBitCast(CI->getArgOperand(1), ITy));
2650       Rep = Builder.CreateBitCast(Rep, FTy);
2651       Rep = EmitX86Select(Builder, CI->getArgOperand(3), Rep,
2652                           CI->getArgOperand(2));
2653     } else if (IsX86 && (Name.startswith("avx512.mask.or.") ||
2654                          Name.startswith("avx512.mask.por."))) {
2655       VectorType *FTy = cast<VectorType>(CI->getType());
2656       VectorType *ITy = VectorType::getInteger(FTy);
2657       Rep = Builder.CreateOr(Builder.CreateBitCast(CI->getArgOperand(0), ITy),
2658                              Builder.CreateBitCast(CI->getArgOperand(1), ITy));
2659       Rep = Builder.CreateBitCast(Rep, FTy);
2660       Rep = EmitX86Select(Builder, CI->getArgOperand(3), Rep,
2661                           CI->getArgOperand(2));
2662     } else if (IsX86 && (Name.startswith("avx512.mask.xor.") ||
2663                          Name.startswith("avx512.mask.pxor."))) {
2664       VectorType *FTy = cast<VectorType>(CI->getType());
2665       VectorType *ITy = VectorType::getInteger(FTy);
2666       Rep = Builder.CreateXor(Builder.CreateBitCast(CI->getArgOperand(0), ITy),
2667                               Builder.CreateBitCast(CI->getArgOperand(1), ITy));
2668       Rep = Builder.CreateBitCast(Rep, FTy);
2669       Rep = EmitX86Select(Builder, CI->getArgOperand(3), Rep,
2670                           CI->getArgOperand(2));
2671     } else if (IsX86 && Name.startswith("avx512.mask.padd.")) {
2672       Rep = Builder.CreateAdd(CI->getArgOperand(0), CI->getArgOperand(1));
2673       Rep = EmitX86Select(Builder, CI->getArgOperand(3), Rep,
2674                           CI->getArgOperand(2));
2675     } else if (IsX86 && Name.startswith("avx512.mask.psub.")) {
2676       Rep = Builder.CreateSub(CI->getArgOperand(0), CI->getArgOperand(1));
2677       Rep = EmitX86Select(Builder, CI->getArgOperand(3), Rep,
2678                           CI->getArgOperand(2));
2679     } else if (IsX86 && Name.startswith("avx512.mask.pmull.")) {
2680       Rep = Builder.CreateMul(CI->getArgOperand(0), CI->getArgOperand(1));
2681       Rep = EmitX86Select(Builder, CI->getArgOperand(3), Rep,
2682                           CI->getArgOperand(2));
2683     } else if (IsX86 && Name.startswith("avx512.mask.add.p")) {
2684       if (Name.endswith(".512")) {
2685         Intrinsic::ID IID;
2686         if (Name[17] == 's')
2687           IID = Intrinsic::x86_avx512_add_ps_512;
2688         else
2689           IID = Intrinsic::x86_avx512_add_pd_512;
2690 
2691         Rep = Builder.CreateCall(Intrinsic::getDeclaration(F->getParent(), IID),
2692                                  { CI->getArgOperand(0), CI->getArgOperand(1),
2693                                    CI->getArgOperand(4) });
2694       } else {
2695         Rep = Builder.CreateFAdd(CI->getArgOperand(0), CI->getArgOperand(1));
2696       }
2697       Rep = EmitX86Select(Builder, CI->getArgOperand(3), Rep,
2698                           CI->getArgOperand(2));
2699     } else if (IsX86 && Name.startswith("avx512.mask.div.p")) {
2700       if (Name.endswith(".512")) {
2701         Intrinsic::ID IID;
2702         if (Name[17] == 's')
2703           IID = Intrinsic::x86_avx512_div_ps_512;
2704         else
2705           IID = Intrinsic::x86_avx512_div_pd_512;
2706 
2707         Rep = Builder.CreateCall(Intrinsic::getDeclaration(F->getParent(), IID),
2708                                  { CI->getArgOperand(0), CI->getArgOperand(1),
2709                                    CI->getArgOperand(4) });
2710       } else {
2711         Rep = Builder.CreateFDiv(CI->getArgOperand(0), CI->getArgOperand(1));
2712       }
2713       Rep = EmitX86Select(Builder, CI->getArgOperand(3), Rep,
2714                           CI->getArgOperand(2));
2715     } else if (IsX86 && Name.startswith("avx512.mask.mul.p")) {
2716       if (Name.endswith(".512")) {
2717         Intrinsic::ID IID;
2718         if (Name[17] == 's')
2719           IID = Intrinsic::x86_avx512_mul_ps_512;
2720         else
2721           IID = Intrinsic::x86_avx512_mul_pd_512;
2722 
2723         Rep = Builder.CreateCall(Intrinsic::getDeclaration(F->getParent(), IID),
2724                                  { CI->getArgOperand(0), CI->getArgOperand(1),
2725                                    CI->getArgOperand(4) });
2726       } else {
2727         Rep = Builder.CreateFMul(CI->getArgOperand(0), CI->getArgOperand(1));
2728       }
2729       Rep = EmitX86Select(Builder, CI->getArgOperand(3), Rep,
2730                           CI->getArgOperand(2));
2731     } else if (IsX86 && Name.startswith("avx512.mask.sub.p")) {
2732       if (Name.endswith(".512")) {
2733         Intrinsic::ID IID;
2734         if (Name[17] == 's')
2735           IID = Intrinsic::x86_avx512_sub_ps_512;
2736         else
2737           IID = Intrinsic::x86_avx512_sub_pd_512;
2738 
2739         Rep = Builder.CreateCall(Intrinsic::getDeclaration(F->getParent(), IID),
2740                                  { CI->getArgOperand(0), CI->getArgOperand(1),
2741                                    CI->getArgOperand(4) });
2742       } else {
2743         Rep = Builder.CreateFSub(CI->getArgOperand(0), CI->getArgOperand(1));
2744       }
2745       Rep = EmitX86Select(Builder, CI->getArgOperand(3), Rep,
2746                           CI->getArgOperand(2));
2747     } else if (IsX86 && (Name.startswith("avx512.mask.max.p") ||
2748                          Name.startswith("avx512.mask.min.p")) &&
2749                Name.drop_front(18) == ".512") {
2750       bool IsDouble = Name[17] == 'd';
2751       bool IsMin = Name[13] == 'i';
2752       static const Intrinsic::ID MinMaxTbl[2][2] = {
2753         { Intrinsic::x86_avx512_max_ps_512, Intrinsic::x86_avx512_max_pd_512 },
2754         { Intrinsic::x86_avx512_min_ps_512, Intrinsic::x86_avx512_min_pd_512 }
2755       };
2756       Intrinsic::ID IID = MinMaxTbl[IsMin][IsDouble];
2757 
2758       Rep = Builder.CreateCall(Intrinsic::getDeclaration(F->getParent(), IID),
2759                                { CI->getArgOperand(0), CI->getArgOperand(1),
2760                                  CI->getArgOperand(4) });
2761       Rep = EmitX86Select(Builder, CI->getArgOperand(3), Rep,
2762                           CI->getArgOperand(2));
2763     } else if (IsX86 && Name.startswith("avx512.mask.lzcnt.")) {
2764       Rep = Builder.CreateCall(Intrinsic::getDeclaration(F->getParent(),
2765                                                          Intrinsic::ctlz,
2766                                                          CI->getType()),
2767                                { CI->getArgOperand(0), Builder.getInt1(false) });
2768       Rep = EmitX86Select(Builder, CI->getArgOperand(2), Rep,
2769                           CI->getArgOperand(1));
2770     } else if (IsX86 && Name.startswith("avx512.mask.psll")) {
2771       bool IsImmediate = Name[16] == 'i' ||
2772                          (Name.size() > 18 && Name[18] == 'i');
2773       bool IsVariable = Name[16] == 'v';
2774       char Size = Name[16] == '.' ? Name[17] :
2775                   Name[17] == '.' ? Name[18] :
2776                   Name[18] == '.' ? Name[19] :
2777                                     Name[20];
2778 
2779       Intrinsic::ID IID;
2780       if (IsVariable && Name[17] != '.') {
2781         if (Size == 'd' && Name[17] == '2') // avx512.mask.psllv2.di
2782           IID = Intrinsic::x86_avx2_psllv_q;
2783         else if (Size == 'd' && Name[17] == '4') // avx512.mask.psllv4.di
2784           IID = Intrinsic::x86_avx2_psllv_q_256;
2785         else if (Size == 's' && Name[17] == '4') // avx512.mask.psllv4.si
2786           IID = Intrinsic::x86_avx2_psllv_d;
2787         else if (Size == 's' && Name[17] == '8') // avx512.mask.psllv8.si
2788           IID = Intrinsic::x86_avx2_psllv_d_256;
2789         else if (Size == 'h' && Name[17] == '8') // avx512.mask.psllv8.hi
2790           IID = Intrinsic::x86_avx512_psllv_w_128;
2791         else if (Size == 'h' && Name[17] == '1') // avx512.mask.psllv16.hi
2792           IID = Intrinsic::x86_avx512_psllv_w_256;
2793         else if (Name[17] == '3' && Name[18] == '2') // avx512.mask.psllv32hi
2794           IID = Intrinsic::x86_avx512_psllv_w_512;
2795         else
2796           llvm_unreachable("Unexpected size");
2797       } else if (Name.endswith(".128")) {
2798         if (Size == 'd') // avx512.mask.psll.d.128, avx512.mask.psll.di.128
2799           IID = IsImmediate ? Intrinsic::x86_sse2_pslli_d
2800                             : Intrinsic::x86_sse2_psll_d;
2801         else if (Size == 'q') // avx512.mask.psll.q.128, avx512.mask.psll.qi.128
2802           IID = IsImmediate ? Intrinsic::x86_sse2_pslli_q
2803                             : Intrinsic::x86_sse2_psll_q;
2804         else if (Size == 'w') // avx512.mask.psll.w.128, avx512.mask.psll.wi.128
2805           IID = IsImmediate ? Intrinsic::x86_sse2_pslli_w
2806                             : Intrinsic::x86_sse2_psll_w;
2807         else
2808           llvm_unreachable("Unexpected size");
2809       } else if (Name.endswith(".256")) {
2810         if (Size == 'd') // avx512.mask.psll.d.256, avx512.mask.psll.di.256
2811           IID = IsImmediate ? Intrinsic::x86_avx2_pslli_d
2812                             : Intrinsic::x86_avx2_psll_d;
2813         else if (Size == 'q') // avx512.mask.psll.q.256, avx512.mask.psll.qi.256
2814           IID = IsImmediate ? Intrinsic::x86_avx2_pslli_q
2815                             : Intrinsic::x86_avx2_psll_q;
2816         else if (Size == 'w') // avx512.mask.psll.w.256, avx512.mask.psll.wi.256
2817           IID = IsImmediate ? Intrinsic::x86_avx2_pslli_w
2818                             : Intrinsic::x86_avx2_psll_w;
2819         else
2820           llvm_unreachable("Unexpected size");
2821       } else {
2822         if (Size == 'd') // psll.di.512, pslli.d, psll.d, psllv.d.512
2823           IID = IsImmediate ? Intrinsic::x86_avx512_pslli_d_512 :
2824                 IsVariable  ? Intrinsic::x86_avx512_psllv_d_512 :
2825                               Intrinsic::x86_avx512_psll_d_512;
2826         else if (Size == 'q') // psll.qi.512, pslli.q, psll.q, psllv.q.512
2827           IID = IsImmediate ? Intrinsic::x86_avx512_pslli_q_512 :
2828                 IsVariable  ? Intrinsic::x86_avx512_psllv_q_512 :
2829                               Intrinsic::x86_avx512_psll_q_512;
2830         else if (Size == 'w') // psll.wi.512, pslli.w, psll.w
2831           IID = IsImmediate ? Intrinsic::x86_avx512_pslli_w_512
2832                             : Intrinsic::x86_avx512_psll_w_512;
2833         else
2834           llvm_unreachable("Unexpected size");
2835       }
2836 
2837       Rep = UpgradeX86MaskedShift(Builder, *CI, IID);
2838     } else if (IsX86 && Name.startswith("avx512.mask.psrl")) {
2839       bool IsImmediate = Name[16] == 'i' ||
2840                          (Name.size() > 18 && Name[18] == 'i');
2841       bool IsVariable = Name[16] == 'v';
2842       char Size = Name[16] == '.' ? Name[17] :
2843                   Name[17] == '.' ? Name[18] :
2844                   Name[18] == '.' ? Name[19] :
2845                                     Name[20];
2846 
2847       Intrinsic::ID IID;
2848       if (IsVariable && Name[17] != '.') {
2849         if (Size == 'd' && Name[17] == '2') // avx512.mask.psrlv2.di
2850           IID = Intrinsic::x86_avx2_psrlv_q;
2851         else if (Size == 'd' && Name[17] == '4') // avx512.mask.psrlv4.di
2852           IID = Intrinsic::x86_avx2_psrlv_q_256;
2853         else if (Size == 's' && Name[17] == '4') // avx512.mask.psrlv4.si
2854           IID = Intrinsic::x86_avx2_psrlv_d;
2855         else if (Size == 's' && Name[17] == '8') // avx512.mask.psrlv8.si
2856           IID = Intrinsic::x86_avx2_psrlv_d_256;
2857         else if (Size == 'h' && Name[17] == '8') // avx512.mask.psrlv8.hi
2858           IID = Intrinsic::x86_avx512_psrlv_w_128;
2859         else if (Size == 'h' && Name[17] == '1') // avx512.mask.psrlv16.hi
2860           IID = Intrinsic::x86_avx512_psrlv_w_256;
2861         else if (Name[17] == '3' && Name[18] == '2') // avx512.mask.psrlv32hi
2862           IID = Intrinsic::x86_avx512_psrlv_w_512;
2863         else
2864           llvm_unreachable("Unexpected size");
2865       } else if (Name.endswith(".128")) {
2866         if (Size == 'd') // avx512.mask.psrl.d.128, avx512.mask.psrl.di.128
2867           IID = IsImmediate ? Intrinsic::x86_sse2_psrli_d
2868                             : Intrinsic::x86_sse2_psrl_d;
2869         else if (Size == 'q') // avx512.mask.psrl.q.128, avx512.mask.psrl.qi.128
2870           IID = IsImmediate ? Intrinsic::x86_sse2_psrli_q
2871                             : Intrinsic::x86_sse2_psrl_q;
2872         else if (Size == 'w') // avx512.mask.psrl.w.128, avx512.mask.psrl.wi.128
2873           IID = IsImmediate ? Intrinsic::x86_sse2_psrli_w
2874                             : Intrinsic::x86_sse2_psrl_w;
2875         else
2876           llvm_unreachable("Unexpected size");
2877       } else if (Name.endswith(".256")) {
2878         if (Size == 'd') // avx512.mask.psrl.d.256, avx512.mask.psrl.di.256
2879           IID = IsImmediate ? Intrinsic::x86_avx2_psrli_d
2880                             : Intrinsic::x86_avx2_psrl_d;
2881         else if (Size == 'q') // avx512.mask.psrl.q.256, avx512.mask.psrl.qi.256
2882           IID = IsImmediate ? Intrinsic::x86_avx2_psrli_q
2883                             : Intrinsic::x86_avx2_psrl_q;
2884         else if (Size == 'w') // avx512.mask.psrl.w.256, avx512.mask.psrl.wi.256
2885           IID = IsImmediate ? Intrinsic::x86_avx2_psrli_w
2886                             : Intrinsic::x86_avx2_psrl_w;
2887         else
2888           llvm_unreachable("Unexpected size");
2889       } else {
2890         if (Size == 'd') // psrl.di.512, psrli.d, psrl.d, psrl.d.512
2891           IID = IsImmediate ? Intrinsic::x86_avx512_psrli_d_512 :
2892                 IsVariable  ? Intrinsic::x86_avx512_psrlv_d_512 :
2893                               Intrinsic::x86_avx512_psrl_d_512;
2894         else if (Size == 'q') // psrl.qi.512, psrli.q, psrl.q, psrl.q.512
2895           IID = IsImmediate ? Intrinsic::x86_avx512_psrli_q_512 :
2896                 IsVariable  ? Intrinsic::x86_avx512_psrlv_q_512 :
2897                               Intrinsic::x86_avx512_psrl_q_512;
2898         else if (Size == 'w') // psrl.wi.512, psrli.w, psrl.w)
2899           IID = IsImmediate ? Intrinsic::x86_avx512_psrli_w_512
2900                             : Intrinsic::x86_avx512_psrl_w_512;
2901         else
2902           llvm_unreachable("Unexpected size");
2903       }
2904 
2905       Rep = UpgradeX86MaskedShift(Builder, *CI, IID);
2906     } else if (IsX86 && Name.startswith("avx512.mask.psra")) {
2907       bool IsImmediate = Name[16] == 'i' ||
2908                          (Name.size() > 18 && Name[18] == 'i');
2909       bool IsVariable = Name[16] == 'v';
2910       char Size = Name[16] == '.' ? Name[17] :
2911                   Name[17] == '.' ? Name[18] :
2912                   Name[18] == '.' ? Name[19] :
2913                                     Name[20];
2914 
2915       Intrinsic::ID IID;
2916       if (IsVariable && Name[17] != '.') {
2917         if (Size == 's' && Name[17] == '4') // avx512.mask.psrav4.si
2918           IID = Intrinsic::x86_avx2_psrav_d;
2919         else if (Size == 's' && Name[17] == '8') // avx512.mask.psrav8.si
2920           IID = Intrinsic::x86_avx2_psrav_d_256;
2921         else if (Size == 'h' && Name[17] == '8') // avx512.mask.psrav8.hi
2922           IID = Intrinsic::x86_avx512_psrav_w_128;
2923         else if (Size == 'h' && Name[17] == '1') // avx512.mask.psrav16.hi
2924           IID = Intrinsic::x86_avx512_psrav_w_256;
2925         else if (Name[17] == '3' && Name[18] == '2') // avx512.mask.psrav32hi
2926           IID = Intrinsic::x86_avx512_psrav_w_512;
2927         else
2928           llvm_unreachable("Unexpected size");
2929       } else if (Name.endswith(".128")) {
2930         if (Size == 'd') // avx512.mask.psra.d.128, avx512.mask.psra.di.128
2931           IID = IsImmediate ? Intrinsic::x86_sse2_psrai_d
2932                             : Intrinsic::x86_sse2_psra_d;
2933         else if (Size == 'q') // avx512.mask.psra.q.128, avx512.mask.psra.qi.128
2934           IID = IsImmediate ? Intrinsic::x86_avx512_psrai_q_128 :
2935                 IsVariable  ? Intrinsic::x86_avx512_psrav_q_128 :
2936                               Intrinsic::x86_avx512_psra_q_128;
2937         else if (Size == 'w') // avx512.mask.psra.w.128, avx512.mask.psra.wi.128
2938           IID = IsImmediate ? Intrinsic::x86_sse2_psrai_w
2939                             : Intrinsic::x86_sse2_psra_w;
2940         else
2941           llvm_unreachable("Unexpected size");
2942       } else if (Name.endswith(".256")) {
2943         if (Size == 'd') // avx512.mask.psra.d.256, avx512.mask.psra.di.256
2944           IID = IsImmediate ? Intrinsic::x86_avx2_psrai_d
2945                             : Intrinsic::x86_avx2_psra_d;
2946         else if (Size == 'q') // avx512.mask.psra.q.256, avx512.mask.psra.qi.256
2947           IID = IsImmediate ? Intrinsic::x86_avx512_psrai_q_256 :
2948                 IsVariable  ? Intrinsic::x86_avx512_psrav_q_256 :
2949                               Intrinsic::x86_avx512_psra_q_256;
2950         else if (Size == 'w') // avx512.mask.psra.w.256, avx512.mask.psra.wi.256
2951           IID = IsImmediate ? Intrinsic::x86_avx2_psrai_w
2952                             : Intrinsic::x86_avx2_psra_w;
2953         else
2954           llvm_unreachable("Unexpected size");
2955       } else {
2956         if (Size == 'd') // psra.di.512, psrai.d, psra.d, psrav.d.512
2957           IID = IsImmediate ? Intrinsic::x86_avx512_psrai_d_512 :
2958                 IsVariable  ? Intrinsic::x86_avx512_psrav_d_512 :
2959                               Intrinsic::x86_avx512_psra_d_512;
2960         else if (Size == 'q') // psra.qi.512, psrai.q, psra.q
2961           IID = IsImmediate ? Intrinsic::x86_avx512_psrai_q_512 :
2962                 IsVariable  ? Intrinsic::x86_avx512_psrav_q_512 :
2963                               Intrinsic::x86_avx512_psra_q_512;
2964         else if (Size == 'w') // psra.wi.512, psrai.w, psra.w
2965           IID = IsImmediate ? Intrinsic::x86_avx512_psrai_w_512
2966                             : Intrinsic::x86_avx512_psra_w_512;
2967         else
2968           llvm_unreachable("Unexpected size");
2969       }
2970 
2971       Rep = UpgradeX86MaskedShift(Builder, *CI, IID);
2972     } else if (IsX86 && Name.startswith("avx512.mask.move.s")) {
2973       Rep = upgradeMaskedMove(Builder, *CI);
2974     } else if (IsX86 && Name.startswith("avx512.cvtmask2")) {
2975       Rep = UpgradeMaskToInt(Builder, *CI);
2976     } else if (IsX86 && Name.endswith(".movntdqa")) {
2977       Module *M = F->getParent();
2978       MDNode *Node = MDNode::get(
2979           C, ConstantAsMetadata::get(ConstantInt::get(Type::getInt32Ty(C), 1)));
2980 
2981       Value *Ptr = CI->getArgOperand(0);
2982       VectorType *VTy = cast<VectorType>(CI->getType());
2983 
2984       // Convert the type of the pointer to a pointer to the stored type.
2985       Value *BC =
2986           Builder.CreateBitCast(Ptr, PointerType::getUnqual(VTy), "cast");
2987       LoadInst *LI = Builder.CreateAlignedLoad(VTy, BC, VTy->getBitWidth() / 8);
2988       LI->setMetadata(M->getMDKindID("nontemporal"), Node);
2989       Rep = LI;
2990     } else if (IsX86 && (Name.startswith("fma.vfmadd.") ||
2991                          Name.startswith("fma.vfmsub.") ||
2992                          Name.startswith("fma.vfnmadd.") ||
2993                          Name.startswith("fma.vfnmsub."))) {
2994       bool NegMul = Name[6] == 'n';
2995       bool NegAcc = NegMul ? Name[8] == 's' : Name[7] == 's';
2996       bool IsScalar = NegMul ? Name[12] == 's' : Name[11] == 's';
2997 
2998       Value *Ops[] = { CI->getArgOperand(0), CI->getArgOperand(1),
2999                        CI->getArgOperand(2) };
3000 
3001       if (IsScalar) {
3002         Ops[0] = Builder.CreateExtractElement(Ops[0], (uint64_t)0);
3003         Ops[1] = Builder.CreateExtractElement(Ops[1], (uint64_t)0);
3004         Ops[2] = Builder.CreateExtractElement(Ops[2], (uint64_t)0);
3005       }
3006 
3007       if (NegMul && !IsScalar)
3008         Ops[0] = Builder.CreateFNeg(Ops[0]);
3009       if (NegMul && IsScalar)
3010         Ops[1] = Builder.CreateFNeg(Ops[1]);
3011       if (NegAcc)
3012         Ops[2] = Builder.CreateFNeg(Ops[2]);
3013 
3014       Rep = Builder.CreateCall(Intrinsic::getDeclaration(CI->getModule(),
3015                                                          Intrinsic::fma,
3016                                                          Ops[0]->getType()),
3017                                Ops);
3018 
3019       if (IsScalar)
3020         Rep = Builder.CreateInsertElement(CI->getArgOperand(0), Rep,
3021                                           (uint64_t)0);
3022     } else if (IsX86 && Name.startswith("fma4.vfmadd.s")) {
3023       Value *Ops[] = { CI->getArgOperand(0), CI->getArgOperand(1),
3024                        CI->getArgOperand(2) };
3025 
3026       Ops[0] = Builder.CreateExtractElement(Ops[0], (uint64_t)0);
3027       Ops[1] = Builder.CreateExtractElement(Ops[1], (uint64_t)0);
3028       Ops[2] = Builder.CreateExtractElement(Ops[2], (uint64_t)0);
3029 
3030       Rep = Builder.CreateCall(Intrinsic::getDeclaration(CI->getModule(),
3031                                                          Intrinsic::fma,
3032                                                          Ops[0]->getType()),
3033                                Ops);
3034 
3035       Rep = Builder.CreateInsertElement(Constant::getNullValue(CI->getType()),
3036                                         Rep, (uint64_t)0);
3037     } else if (IsX86 && (Name.startswith("avx512.mask.vfmadd.s") ||
3038                          Name.startswith("avx512.maskz.vfmadd.s") ||
3039                          Name.startswith("avx512.mask3.vfmadd.s") ||
3040                          Name.startswith("avx512.mask3.vfmsub.s") ||
3041                          Name.startswith("avx512.mask3.vfnmsub.s"))) {
3042       bool IsMask3 = Name[11] == '3';
3043       bool IsMaskZ = Name[11] == 'z';
3044       // Drop the "avx512.mask." to make it easier.
3045       Name = Name.drop_front(IsMask3 || IsMaskZ ? 13 : 12);
3046       bool NegMul = Name[2] == 'n';
3047       bool NegAcc = NegMul ? Name[4] == 's' : Name[3] == 's';
3048 
3049       Value *A = CI->getArgOperand(0);
3050       Value *B = CI->getArgOperand(1);
3051       Value *C = CI->getArgOperand(2);
3052 
3053       if (NegMul && (IsMask3 || IsMaskZ))
3054         A = Builder.CreateFNeg(A);
3055       if (NegMul && !(IsMask3 || IsMaskZ))
3056         B = Builder.CreateFNeg(B);
3057       if (NegAcc)
3058         C = Builder.CreateFNeg(C);
3059 
3060       A = Builder.CreateExtractElement(A, (uint64_t)0);
3061       B = Builder.CreateExtractElement(B, (uint64_t)0);
3062       C = Builder.CreateExtractElement(C, (uint64_t)0);
3063 
3064       if (!isa<ConstantInt>(CI->getArgOperand(4)) ||
3065           cast<ConstantInt>(CI->getArgOperand(4))->getZExtValue() != 4) {
3066         Value *Ops[] = { A, B, C, CI->getArgOperand(4) };
3067 
3068         Intrinsic::ID IID;
3069         if (Name.back() == 'd')
3070           IID = Intrinsic::x86_avx512_vfmadd_f64;
3071         else
3072           IID = Intrinsic::x86_avx512_vfmadd_f32;
3073         Function *FMA = Intrinsic::getDeclaration(CI->getModule(), IID);
3074         Rep = Builder.CreateCall(FMA, Ops);
3075       } else {
3076         Function *FMA = Intrinsic::getDeclaration(CI->getModule(),
3077                                                   Intrinsic::fma,
3078                                                   A->getType());
3079         Rep = Builder.CreateCall(FMA, { A, B, C });
3080       }
3081 
3082       Value *PassThru = IsMaskZ ? Constant::getNullValue(Rep->getType()) :
3083                         IsMask3 ? C : A;
3084 
3085       // For Mask3 with NegAcc, we need to create a new extractelement that
3086       // avoids the negation above.
3087       if (NegAcc && IsMask3)
3088         PassThru = Builder.CreateExtractElement(CI->getArgOperand(2),
3089                                                 (uint64_t)0);
3090 
3091       Rep = EmitX86ScalarSelect(Builder, CI->getArgOperand(3),
3092                                 Rep, PassThru);
3093       Rep = Builder.CreateInsertElement(CI->getArgOperand(IsMask3 ? 2 : 0),
3094                                         Rep, (uint64_t)0);
3095     } else if (IsX86 && (Name.startswith("avx512.mask.vfmadd.p") ||
3096                          Name.startswith("avx512.mask.vfnmadd.p") ||
3097                          Name.startswith("avx512.mask.vfnmsub.p") ||
3098                          Name.startswith("avx512.mask3.vfmadd.p") ||
3099                          Name.startswith("avx512.mask3.vfmsub.p") ||
3100                          Name.startswith("avx512.mask3.vfnmsub.p") ||
3101                          Name.startswith("avx512.maskz.vfmadd.p"))) {
3102       bool IsMask3 = Name[11] == '3';
3103       bool IsMaskZ = Name[11] == 'z';
3104       // Drop the "avx512.mask." to make it easier.
3105       Name = Name.drop_front(IsMask3 || IsMaskZ ? 13 : 12);
3106       bool NegMul = Name[2] == 'n';
3107       bool NegAcc = NegMul ? Name[4] == 's' : Name[3] == 's';
3108 
3109       Value *A = CI->getArgOperand(0);
3110       Value *B = CI->getArgOperand(1);
3111       Value *C = CI->getArgOperand(2);
3112 
3113       if (NegMul && (IsMask3 || IsMaskZ))
3114         A = Builder.CreateFNeg(A);
3115       if (NegMul && !(IsMask3 || IsMaskZ))
3116         B = Builder.CreateFNeg(B);
3117       if (NegAcc)
3118         C = Builder.CreateFNeg(C);
3119 
3120       if (CI->getNumArgOperands() == 5 &&
3121           (!isa<ConstantInt>(CI->getArgOperand(4)) ||
3122            cast<ConstantInt>(CI->getArgOperand(4))->getZExtValue() != 4)) {
3123         Intrinsic::ID IID;
3124         // Check the character before ".512" in string.
3125         if (Name[Name.size()-5] == 's')
3126           IID = Intrinsic::x86_avx512_vfmadd_ps_512;
3127         else
3128           IID = Intrinsic::x86_avx512_vfmadd_pd_512;
3129 
3130         Rep = Builder.CreateCall(Intrinsic::getDeclaration(F->getParent(), IID),
3131                                  { A, B, C, CI->getArgOperand(4) });
3132       } else {
3133         Function *FMA = Intrinsic::getDeclaration(CI->getModule(),
3134                                                   Intrinsic::fma,
3135                                                   A->getType());
3136         Rep = Builder.CreateCall(FMA, { A, B, C });
3137       }
3138 
3139       Value *PassThru = IsMaskZ ? llvm::Constant::getNullValue(CI->getType()) :
3140                         IsMask3 ? CI->getArgOperand(2) :
3141                                   CI->getArgOperand(0);
3142 
3143       Rep = EmitX86Select(Builder, CI->getArgOperand(3), Rep, PassThru);
3144     } else if (IsX86 && (Name.startswith("fma.vfmaddsub.p") ||
3145                          Name.startswith("fma.vfmsubadd.p"))) {
3146       bool IsSubAdd = Name[7] == 's';
3147       int NumElts = CI->getType()->getVectorNumElements();
3148 
3149       Value *Ops[] = { CI->getArgOperand(0), CI->getArgOperand(1),
3150                        CI->getArgOperand(2) };
3151 
3152       Function *FMA = Intrinsic::getDeclaration(CI->getModule(), Intrinsic::fma,
3153                                                 Ops[0]->getType());
3154       Value *Odd = Builder.CreateCall(FMA, Ops);
3155       Ops[2] = Builder.CreateFNeg(Ops[2]);
3156       Value *Even = Builder.CreateCall(FMA, Ops);
3157 
3158       if (IsSubAdd)
3159         std::swap(Even, Odd);
3160 
3161       SmallVector<uint32_t, 32> Idxs(NumElts);
3162       for (int i = 0; i != NumElts; ++i)
3163         Idxs[i] = i + (i % 2) * NumElts;
3164 
3165       Rep = Builder.CreateShuffleVector(Even, Odd, Idxs);
3166     } else if (IsX86 && (Name.startswith("avx512.mask.vfmaddsub.p") ||
3167                          Name.startswith("avx512.mask3.vfmaddsub.p") ||
3168                          Name.startswith("avx512.maskz.vfmaddsub.p") ||
3169                          Name.startswith("avx512.mask3.vfmsubadd.p"))) {
3170       bool IsMask3 = Name[11] == '3';
3171       bool IsMaskZ = Name[11] == 'z';
3172       // Drop the "avx512.mask." to make it easier.
3173       Name = Name.drop_front(IsMask3 || IsMaskZ ? 13 : 12);
3174       bool IsSubAdd = Name[3] == 's';
3175       if (CI->getNumArgOperands() == 5 &&
3176           (!isa<ConstantInt>(CI->getArgOperand(4)) ||
3177            cast<ConstantInt>(CI->getArgOperand(4))->getZExtValue() != 4)) {
3178         Intrinsic::ID IID;
3179         // Check the character before ".512" in string.
3180         if (Name[Name.size()-5] == 's')
3181           IID = Intrinsic::x86_avx512_vfmaddsub_ps_512;
3182         else
3183           IID = Intrinsic::x86_avx512_vfmaddsub_pd_512;
3184 
3185         Value *Ops[] = { CI->getArgOperand(0), CI->getArgOperand(1),
3186                          CI->getArgOperand(2), CI->getArgOperand(4) };
3187         if (IsSubAdd)
3188           Ops[2] = Builder.CreateFNeg(Ops[2]);
3189 
3190         Rep = Builder.CreateCall(Intrinsic::getDeclaration(F->getParent(), IID),
3191                                  {CI->getArgOperand(0), CI->getArgOperand(1),
3192                                   CI->getArgOperand(2), CI->getArgOperand(4)});
3193       } else {
3194         int NumElts = CI->getType()->getVectorNumElements();
3195 
3196         Value *Ops[] = { CI->getArgOperand(0), CI->getArgOperand(1),
3197                          CI->getArgOperand(2) };
3198 
3199         Function *FMA = Intrinsic::getDeclaration(CI->getModule(), Intrinsic::fma,
3200                                                   Ops[0]->getType());
3201         Value *Odd = Builder.CreateCall(FMA, Ops);
3202         Ops[2] = Builder.CreateFNeg(Ops[2]);
3203         Value *Even = Builder.CreateCall(FMA, Ops);
3204 
3205         if (IsSubAdd)
3206           std::swap(Even, Odd);
3207 
3208         SmallVector<uint32_t, 32> Idxs(NumElts);
3209         for (int i = 0; i != NumElts; ++i)
3210           Idxs[i] = i + (i % 2) * NumElts;
3211 
3212         Rep = Builder.CreateShuffleVector(Even, Odd, Idxs);
3213       }
3214 
3215       Value *PassThru = IsMaskZ ? llvm::Constant::getNullValue(CI->getType()) :
3216                         IsMask3 ? CI->getArgOperand(2) :
3217                                   CI->getArgOperand(0);
3218 
3219       Rep = EmitX86Select(Builder, CI->getArgOperand(3), Rep, PassThru);
3220     } else if (IsX86 && (Name.startswith("avx512.mask.pternlog.") ||
3221                          Name.startswith("avx512.maskz.pternlog."))) {
3222       bool ZeroMask = Name[11] == 'z';
3223       unsigned VecWidth = CI->getType()->getPrimitiveSizeInBits();
3224       unsigned EltWidth = CI->getType()->getScalarSizeInBits();
3225       Intrinsic::ID IID;
3226       if (VecWidth == 128 && EltWidth == 32)
3227         IID = Intrinsic::x86_avx512_pternlog_d_128;
3228       else if (VecWidth == 256 && EltWidth == 32)
3229         IID = Intrinsic::x86_avx512_pternlog_d_256;
3230       else if (VecWidth == 512 && EltWidth == 32)
3231         IID = Intrinsic::x86_avx512_pternlog_d_512;
3232       else if (VecWidth == 128 && EltWidth == 64)
3233         IID = Intrinsic::x86_avx512_pternlog_q_128;
3234       else if (VecWidth == 256 && EltWidth == 64)
3235         IID = Intrinsic::x86_avx512_pternlog_q_256;
3236       else if (VecWidth == 512 && EltWidth == 64)
3237         IID = Intrinsic::x86_avx512_pternlog_q_512;
3238       else
3239         llvm_unreachable("Unexpected intrinsic");
3240 
3241       Value *Args[] = { CI->getArgOperand(0) , CI->getArgOperand(1),
3242                         CI->getArgOperand(2), CI->getArgOperand(3) };
3243       Rep = Builder.CreateCall(Intrinsic::getDeclaration(CI->getModule(), IID),
3244                                Args);
3245       Value *PassThru = ZeroMask ? ConstantAggregateZero::get(CI->getType())
3246                                  : CI->getArgOperand(0);
3247       Rep = EmitX86Select(Builder, CI->getArgOperand(4), Rep, PassThru);
3248     } else if (IsX86 && (Name.startswith("avx512.mask.vpmadd52") ||
3249                          Name.startswith("avx512.maskz.vpmadd52"))) {
3250       bool ZeroMask = Name[11] == 'z';
3251       bool High = Name[20] == 'h' || Name[21] == 'h';
3252       unsigned VecWidth = CI->getType()->getPrimitiveSizeInBits();
3253       Intrinsic::ID IID;
3254       if (VecWidth == 128 && !High)
3255         IID = Intrinsic::x86_avx512_vpmadd52l_uq_128;
3256       else if (VecWidth == 256 && !High)
3257         IID = Intrinsic::x86_avx512_vpmadd52l_uq_256;
3258       else if (VecWidth == 512 && !High)
3259         IID = Intrinsic::x86_avx512_vpmadd52l_uq_512;
3260       else if (VecWidth == 128 && High)
3261         IID = Intrinsic::x86_avx512_vpmadd52h_uq_128;
3262       else if (VecWidth == 256 && High)
3263         IID = Intrinsic::x86_avx512_vpmadd52h_uq_256;
3264       else if (VecWidth == 512 && High)
3265         IID = Intrinsic::x86_avx512_vpmadd52h_uq_512;
3266       else
3267         llvm_unreachable("Unexpected intrinsic");
3268 
3269       Value *Args[] = { CI->getArgOperand(0) , CI->getArgOperand(1),
3270                         CI->getArgOperand(2) };
3271       Rep = Builder.CreateCall(Intrinsic::getDeclaration(CI->getModule(), IID),
3272                                Args);
3273       Value *PassThru = ZeroMask ? ConstantAggregateZero::get(CI->getType())
3274                                  : CI->getArgOperand(0);
3275       Rep = EmitX86Select(Builder, CI->getArgOperand(3), Rep, PassThru);
3276     } else if (IsX86 && (Name.startswith("avx512.mask.vpermi2var.") ||
3277                          Name.startswith("avx512.mask.vpermt2var.") ||
3278                          Name.startswith("avx512.maskz.vpermt2var."))) {
3279       bool ZeroMask = Name[11] == 'z';
3280       bool IndexForm = Name[17] == 'i';
3281       Rep = UpgradeX86VPERMT2Intrinsics(Builder, *CI, ZeroMask, IndexForm);
3282     } else if (IsX86 && (Name.startswith("avx512.mask.vpdpbusd.") ||
3283                          Name.startswith("avx512.maskz.vpdpbusd.") ||
3284                          Name.startswith("avx512.mask.vpdpbusds.") ||
3285                          Name.startswith("avx512.maskz.vpdpbusds."))) {
3286       bool ZeroMask = Name[11] == 'z';
3287       bool IsSaturating = Name[ZeroMask ? 21 : 20] == 's';
3288       unsigned VecWidth = CI->getType()->getPrimitiveSizeInBits();
3289       Intrinsic::ID IID;
3290       if (VecWidth == 128 && !IsSaturating)
3291         IID = Intrinsic::x86_avx512_vpdpbusd_128;
3292       else if (VecWidth == 256 && !IsSaturating)
3293         IID = Intrinsic::x86_avx512_vpdpbusd_256;
3294       else if (VecWidth == 512 && !IsSaturating)
3295         IID = Intrinsic::x86_avx512_vpdpbusd_512;
3296       else if (VecWidth == 128 && IsSaturating)
3297         IID = Intrinsic::x86_avx512_vpdpbusds_128;
3298       else if (VecWidth == 256 && IsSaturating)
3299         IID = Intrinsic::x86_avx512_vpdpbusds_256;
3300       else if (VecWidth == 512 && IsSaturating)
3301         IID = Intrinsic::x86_avx512_vpdpbusds_512;
3302       else
3303         llvm_unreachable("Unexpected intrinsic");
3304 
3305       Value *Args[] = { CI->getArgOperand(0), CI->getArgOperand(1),
3306                         CI->getArgOperand(2)  };
3307       Rep = Builder.CreateCall(Intrinsic::getDeclaration(CI->getModule(), IID),
3308                                Args);
3309       Value *PassThru = ZeroMask ? ConstantAggregateZero::get(CI->getType())
3310                                  : CI->getArgOperand(0);
3311       Rep = EmitX86Select(Builder, CI->getArgOperand(3), Rep, PassThru);
3312     } else if (IsX86 && (Name.startswith("avx512.mask.vpdpwssd.") ||
3313                          Name.startswith("avx512.maskz.vpdpwssd.") ||
3314                          Name.startswith("avx512.mask.vpdpwssds.") ||
3315                          Name.startswith("avx512.maskz.vpdpwssds."))) {
3316       bool ZeroMask = Name[11] == 'z';
3317       bool IsSaturating = Name[ZeroMask ? 21 : 20] == 's';
3318       unsigned VecWidth = CI->getType()->getPrimitiveSizeInBits();
3319       Intrinsic::ID IID;
3320       if (VecWidth == 128 && !IsSaturating)
3321         IID = Intrinsic::x86_avx512_vpdpwssd_128;
3322       else if (VecWidth == 256 && !IsSaturating)
3323         IID = Intrinsic::x86_avx512_vpdpwssd_256;
3324       else if (VecWidth == 512 && !IsSaturating)
3325         IID = Intrinsic::x86_avx512_vpdpwssd_512;
3326       else if (VecWidth == 128 && IsSaturating)
3327         IID = Intrinsic::x86_avx512_vpdpwssds_128;
3328       else if (VecWidth == 256 && IsSaturating)
3329         IID = Intrinsic::x86_avx512_vpdpwssds_256;
3330       else if (VecWidth == 512 && IsSaturating)
3331         IID = Intrinsic::x86_avx512_vpdpwssds_512;
3332       else
3333         llvm_unreachable("Unexpected intrinsic");
3334 
3335       Value *Args[] = { CI->getArgOperand(0), CI->getArgOperand(1),
3336                         CI->getArgOperand(2)  };
3337       Rep = Builder.CreateCall(Intrinsic::getDeclaration(CI->getModule(), IID),
3338                                Args);
3339       Value *PassThru = ZeroMask ? ConstantAggregateZero::get(CI->getType())
3340                                  : CI->getArgOperand(0);
3341       Rep = EmitX86Select(Builder, CI->getArgOperand(3), Rep, PassThru);
3342     } else if (IsX86 && (Name == "addcarryx.u32" || Name == "addcarryx.u64" ||
3343                          Name == "addcarry.u32" || Name == "addcarry.u64" ||
3344                          Name == "subborrow.u32" || Name == "subborrow.u64")) {
3345       Intrinsic::ID IID;
3346       if (Name[0] == 'a' && Name.back() == '2')
3347         IID = Intrinsic::x86_addcarry_32;
3348       else if (Name[0] == 'a' && Name.back() == '4')
3349         IID = Intrinsic::x86_addcarry_64;
3350       else if (Name[0] == 's' && Name.back() == '2')
3351         IID = Intrinsic::x86_subborrow_32;
3352       else if (Name[0] == 's' && Name.back() == '4')
3353         IID = Intrinsic::x86_subborrow_64;
3354       else
3355         llvm_unreachable("Unexpected intrinsic");
3356 
3357       // Make a call with 3 operands.
3358       Value *Args[] = { CI->getArgOperand(0), CI->getArgOperand(1),
3359                         CI->getArgOperand(2)};
3360       Value *NewCall = Builder.CreateCall(
3361                                 Intrinsic::getDeclaration(CI->getModule(), IID),
3362                                 Args);
3363 
3364       // Extract the second result and store it.
3365       Value *Data = Builder.CreateExtractValue(NewCall, 1);
3366       // Cast the pointer to the right type.
3367       Value *Ptr = Builder.CreateBitCast(CI->getArgOperand(3),
3368                                  llvm::PointerType::getUnqual(Data->getType()));
3369       Builder.CreateAlignedStore(Data, Ptr, 1);
3370       // Replace the original call result with the first result of the new call.
3371       Value *CF = Builder.CreateExtractValue(NewCall, 0);
3372 
3373       CI->replaceAllUsesWith(CF);
3374       Rep = nullptr;
3375     } else if (IsX86 && Name.startswith("avx512.mask.") &&
3376                upgradeAVX512MaskToSelect(Name, Builder, *CI, Rep)) {
3377       // Rep will be updated by the call in the condition.
3378     } else if (IsNVVM && (Name == "abs.i" || Name == "abs.ll")) {
3379       Value *Arg = CI->getArgOperand(0);
3380       Value *Neg = Builder.CreateNeg(Arg, "neg");
3381       Value *Cmp = Builder.CreateICmpSGE(
3382           Arg, llvm::Constant::getNullValue(Arg->getType()), "abs.cond");
3383       Rep = Builder.CreateSelect(Cmp, Arg, Neg, "abs");
3384     } else if (IsNVVM && (Name == "max.i" || Name == "max.ll" ||
3385                           Name == "max.ui" || Name == "max.ull")) {
3386       Value *Arg0 = CI->getArgOperand(0);
3387       Value *Arg1 = CI->getArgOperand(1);
3388       Value *Cmp = Name.endswith(".ui") || Name.endswith(".ull")
3389                        ? Builder.CreateICmpUGE(Arg0, Arg1, "max.cond")
3390                        : Builder.CreateICmpSGE(Arg0, Arg1, "max.cond");
3391       Rep = Builder.CreateSelect(Cmp, Arg0, Arg1, "max");
3392     } else if (IsNVVM && (Name == "min.i" || Name == "min.ll" ||
3393                           Name == "min.ui" || Name == "min.ull")) {
3394       Value *Arg0 = CI->getArgOperand(0);
3395       Value *Arg1 = CI->getArgOperand(1);
3396       Value *Cmp = Name.endswith(".ui") || Name.endswith(".ull")
3397                        ? Builder.CreateICmpULE(Arg0, Arg1, "min.cond")
3398                        : Builder.CreateICmpSLE(Arg0, Arg1, "min.cond");
3399       Rep = Builder.CreateSelect(Cmp, Arg0, Arg1, "min");
3400     } else if (IsNVVM && Name == "clz.ll") {
3401       // llvm.nvvm.clz.ll returns an i32, but llvm.ctlz.i64 and returns an i64.
3402       Value *Arg = CI->getArgOperand(0);
3403       Value *Ctlz = Builder.CreateCall(
3404           Intrinsic::getDeclaration(F->getParent(), Intrinsic::ctlz,
3405                                     {Arg->getType()}),
3406           {Arg, Builder.getFalse()}, "ctlz");
3407       Rep = Builder.CreateTrunc(Ctlz, Builder.getInt32Ty(), "ctlz.trunc");
3408     } else if (IsNVVM && Name == "popc.ll") {
3409       // llvm.nvvm.popc.ll returns an i32, but llvm.ctpop.i64 and returns an
3410       // i64.
3411       Value *Arg = CI->getArgOperand(0);
3412       Value *Popc = Builder.CreateCall(
3413           Intrinsic::getDeclaration(F->getParent(), Intrinsic::ctpop,
3414                                     {Arg->getType()}),
3415           Arg, "ctpop");
3416       Rep = Builder.CreateTrunc(Popc, Builder.getInt32Ty(), "ctpop.trunc");
3417     } else if (IsNVVM && Name == "h2f") {
3418       Rep = Builder.CreateCall(Intrinsic::getDeclaration(
3419                                    F->getParent(), Intrinsic::convert_from_fp16,
3420                                    {Builder.getFloatTy()}),
3421                                CI->getArgOperand(0), "h2f");
3422     } else {
3423       llvm_unreachable("Unknown function for CallInst upgrade.");
3424     }
3425 
3426     if (Rep)
3427       CI->replaceAllUsesWith(Rep);
3428     CI->eraseFromParent();
3429     return;
3430   }
3431 
3432   const auto &DefaultCase = [&NewFn, &CI]() -> void {
3433     // Handle generic mangling change, but nothing else
3434     assert(
3435         (CI->getCalledFunction()->getName() != NewFn->getName()) &&
3436         "Unknown function for CallInst upgrade and isn't just a name change");
3437     CI->setCalledFunction(NewFn);
3438   };
3439   CallInst *NewCall = nullptr;
3440   switch (NewFn->getIntrinsicID()) {
3441   default: {
3442     DefaultCase();
3443     return;
3444   }
3445 
3446   case Intrinsic::arm_neon_vld1:
3447   case Intrinsic::arm_neon_vld2:
3448   case Intrinsic::arm_neon_vld3:
3449   case Intrinsic::arm_neon_vld4:
3450   case Intrinsic::arm_neon_vld2lane:
3451   case Intrinsic::arm_neon_vld3lane:
3452   case Intrinsic::arm_neon_vld4lane:
3453   case Intrinsic::arm_neon_vst1:
3454   case Intrinsic::arm_neon_vst2:
3455   case Intrinsic::arm_neon_vst3:
3456   case Intrinsic::arm_neon_vst4:
3457   case Intrinsic::arm_neon_vst2lane:
3458   case Intrinsic::arm_neon_vst3lane:
3459   case Intrinsic::arm_neon_vst4lane: {
3460     SmallVector<Value *, 4> Args(CI->arg_operands().begin(),
3461                                  CI->arg_operands().end());
3462     NewCall = Builder.CreateCall(NewFn, Args);
3463     break;
3464   }
3465 
3466   case Intrinsic::bitreverse:
3467     NewCall = Builder.CreateCall(NewFn, {CI->getArgOperand(0)});
3468     break;
3469 
3470   case Intrinsic::ctlz:
3471   case Intrinsic::cttz:
3472     assert(CI->getNumArgOperands() == 1 &&
3473            "Mismatch between function args and call args");
3474     NewCall =
3475         Builder.CreateCall(NewFn, {CI->getArgOperand(0), Builder.getFalse()});
3476     break;
3477 
3478   case Intrinsic::objectsize: {
3479     Value *NullIsUnknownSize = CI->getNumArgOperands() == 2
3480                                    ? Builder.getFalse()
3481                                    : CI->getArgOperand(2);
3482     Value *Dynamic =
3483         CI->getNumArgOperands() < 4 ? Builder.getFalse() : CI->getArgOperand(3);
3484     NewCall = Builder.CreateCall(
3485         NewFn, {CI->getArgOperand(0), CI->getArgOperand(1), NullIsUnknownSize, Dynamic});
3486     break;
3487   }
3488 
3489   case Intrinsic::ctpop:
3490     NewCall = Builder.CreateCall(NewFn, {CI->getArgOperand(0)});
3491     break;
3492 
3493   case Intrinsic::convert_from_fp16:
3494     NewCall = Builder.CreateCall(NewFn, {CI->getArgOperand(0)});
3495     break;
3496 
3497   case Intrinsic::dbg_value:
3498     // Upgrade from the old version that had an extra offset argument.
3499     assert(CI->getNumArgOperands() == 4);
3500     // Drop nonzero offsets instead of attempting to upgrade them.
3501     if (auto *Offset = dyn_cast_or_null<Constant>(CI->getArgOperand(1)))
3502       if (Offset->isZeroValue()) {
3503         NewCall = Builder.CreateCall(
3504             NewFn,
3505             {CI->getArgOperand(0), CI->getArgOperand(2), CI->getArgOperand(3)});
3506         break;
3507       }
3508     CI->eraseFromParent();
3509     return;
3510 
3511   case Intrinsic::x86_xop_vfrcz_ss:
3512   case Intrinsic::x86_xop_vfrcz_sd:
3513     NewCall = Builder.CreateCall(NewFn, {CI->getArgOperand(1)});
3514     break;
3515 
3516   case Intrinsic::x86_xop_vpermil2pd:
3517   case Intrinsic::x86_xop_vpermil2ps:
3518   case Intrinsic::x86_xop_vpermil2pd_256:
3519   case Intrinsic::x86_xop_vpermil2ps_256: {
3520     SmallVector<Value *, 4> Args(CI->arg_operands().begin(),
3521                                  CI->arg_operands().end());
3522     VectorType *FltIdxTy = cast<VectorType>(Args[2]->getType());
3523     VectorType *IntIdxTy = VectorType::getInteger(FltIdxTy);
3524     Args[2] = Builder.CreateBitCast(Args[2], IntIdxTy);
3525     NewCall = Builder.CreateCall(NewFn, Args);
3526     break;
3527   }
3528 
3529   case Intrinsic::x86_sse41_ptestc:
3530   case Intrinsic::x86_sse41_ptestz:
3531   case Intrinsic::x86_sse41_ptestnzc: {
3532     // The arguments for these intrinsics used to be v4f32, and changed
3533     // to v2i64. This is purely a nop, since those are bitwise intrinsics.
3534     // So, the only thing required is a bitcast for both arguments.
3535     // First, check the arguments have the old type.
3536     Value *Arg0 = CI->getArgOperand(0);
3537     if (Arg0->getType() != VectorType::get(Type::getFloatTy(C), 4))
3538       return;
3539 
3540     // Old intrinsic, add bitcasts
3541     Value *Arg1 = CI->getArgOperand(1);
3542 
3543     Type *NewVecTy = VectorType::get(Type::getInt64Ty(C), 2);
3544 
3545     Value *BC0 = Builder.CreateBitCast(Arg0, NewVecTy, "cast");
3546     Value *BC1 = Builder.CreateBitCast(Arg1, NewVecTy, "cast");
3547 
3548     NewCall = Builder.CreateCall(NewFn, {BC0, BC1});
3549     break;
3550   }
3551 
3552   case Intrinsic::x86_rdtscp: {
3553     // This used to take 1 arguments. If we have no arguments, it is already
3554     // upgraded.
3555     if (CI->getNumOperands() == 0)
3556       return;
3557 
3558     NewCall = Builder.CreateCall(NewFn);
3559     // Extract the second result and store it.
3560     Value *Data = Builder.CreateExtractValue(NewCall, 1);
3561     // Cast the pointer to the right type.
3562     Value *Ptr = Builder.CreateBitCast(CI->getArgOperand(0),
3563                                  llvm::PointerType::getUnqual(Data->getType()));
3564     Builder.CreateAlignedStore(Data, Ptr, 1);
3565     // Replace the original call result with the first result of the new call.
3566     Value *TSC = Builder.CreateExtractValue(NewCall, 0);
3567 
3568     std::string Name = CI->getName();
3569     if (!Name.empty()) {
3570       CI->setName(Name + ".old");
3571       NewCall->setName(Name);
3572     }
3573     CI->replaceAllUsesWith(TSC);
3574     CI->eraseFromParent();
3575     return;
3576   }
3577 
3578   case Intrinsic::x86_sse41_insertps:
3579   case Intrinsic::x86_sse41_dppd:
3580   case Intrinsic::x86_sse41_dpps:
3581   case Intrinsic::x86_sse41_mpsadbw:
3582   case Intrinsic::x86_avx_dp_ps_256:
3583   case Intrinsic::x86_avx2_mpsadbw: {
3584     // Need to truncate the last argument from i32 to i8 -- this argument models
3585     // an inherently 8-bit immediate operand to these x86 instructions.
3586     SmallVector<Value *, 4> Args(CI->arg_operands().begin(),
3587                                  CI->arg_operands().end());
3588 
3589     // Replace the last argument with a trunc.
3590     Args.back() = Builder.CreateTrunc(Args.back(), Type::getInt8Ty(C), "trunc");
3591     NewCall = Builder.CreateCall(NewFn, Args);
3592     break;
3593   }
3594 
3595   case Intrinsic::thread_pointer: {
3596     NewCall = Builder.CreateCall(NewFn, {});
3597     break;
3598   }
3599 
3600   case Intrinsic::invariant_start:
3601   case Intrinsic::invariant_end:
3602   case Intrinsic::masked_load:
3603   case Intrinsic::masked_store:
3604   case Intrinsic::masked_gather:
3605   case Intrinsic::masked_scatter: {
3606     SmallVector<Value *, 4> Args(CI->arg_operands().begin(),
3607                                  CI->arg_operands().end());
3608     NewCall = Builder.CreateCall(NewFn, Args);
3609     break;
3610   }
3611 
3612   case Intrinsic::memcpy:
3613   case Intrinsic::memmove:
3614   case Intrinsic::memset: {
3615     // We have to make sure that the call signature is what we're expecting.
3616     // We only want to change the old signatures by removing the alignment arg:
3617     //  @llvm.mem[cpy|move]...(i8*, i8*, i[32|i64], i32, i1)
3618     //    -> @llvm.mem[cpy|move]...(i8*, i8*, i[32|i64], i1)
3619     //  @llvm.memset...(i8*, i8, i[32|64], i32, i1)
3620     //    -> @llvm.memset...(i8*, i8, i[32|64], i1)
3621     // Note: i8*'s in the above can be any pointer type
3622     if (CI->getNumArgOperands() != 5) {
3623       DefaultCase();
3624       return;
3625     }
3626     // Remove alignment argument (3), and add alignment attributes to the
3627     // dest/src pointers.
3628     Value *Args[4] = {CI->getArgOperand(0), CI->getArgOperand(1),
3629                       CI->getArgOperand(2), CI->getArgOperand(4)};
3630     NewCall = Builder.CreateCall(NewFn, Args);
3631     auto *MemCI = cast<MemIntrinsic>(NewCall);
3632     // All mem intrinsics support dest alignment.
3633     const ConstantInt *Align = cast<ConstantInt>(CI->getArgOperand(3));
3634     MemCI->setDestAlignment(Align->getZExtValue());
3635     // Memcpy/Memmove also support source alignment.
3636     if (auto *MTI = dyn_cast<MemTransferInst>(MemCI))
3637       MTI->setSourceAlignment(Align->getZExtValue());
3638     break;
3639   }
3640   }
3641   assert(NewCall && "Should have either set this variable or returned through "
3642                     "the default case");
3643   std::string Name = CI->getName();
3644   if (!Name.empty()) {
3645     CI->setName(Name + ".old");
3646     NewCall->setName(Name);
3647   }
3648   CI->replaceAllUsesWith(NewCall);
3649   CI->eraseFromParent();
3650 }
3651 
3652 void llvm::UpgradeCallsToIntrinsic(Function *F) {
3653   assert(F && "Illegal attempt to upgrade a non-existent intrinsic.");
3654 
3655   // Check if this function should be upgraded and get the replacement function
3656   // if there is one.
3657   Function *NewFn;
3658   if (UpgradeIntrinsicFunction(F, NewFn)) {
3659     // Replace all users of the old function with the new function or new
3660     // instructions. This is not a range loop because the call is deleted.
3661     for (auto UI = F->user_begin(), UE = F->user_end(); UI != UE; )
3662       if (CallInst *CI = dyn_cast<CallInst>(*UI++))
3663         UpgradeIntrinsicCall(CI, NewFn);
3664 
3665     // Remove old function, no longer used, from the module.
3666     F->eraseFromParent();
3667   }
3668 }
3669 
3670 MDNode *llvm::UpgradeTBAANode(MDNode &MD) {
3671   // Check if the tag uses struct-path aware TBAA format.
3672   if (isa<MDNode>(MD.getOperand(0)) && MD.getNumOperands() >= 3)
3673     return &MD;
3674 
3675   auto &Context = MD.getContext();
3676   if (MD.getNumOperands() == 3) {
3677     Metadata *Elts[] = {MD.getOperand(0), MD.getOperand(1)};
3678     MDNode *ScalarType = MDNode::get(Context, Elts);
3679     // Create a MDNode <ScalarType, ScalarType, offset 0, const>
3680     Metadata *Elts2[] = {ScalarType, ScalarType,
3681                          ConstantAsMetadata::get(
3682                              Constant::getNullValue(Type::getInt64Ty(Context))),
3683                          MD.getOperand(2)};
3684     return MDNode::get(Context, Elts2);
3685   }
3686   // Create a MDNode <MD, MD, offset 0>
3687   Metadata *Elts[] = {&MD, &MD, ConstantAsMetadata::get(Constant::getNullValue(
3688                                     Type::getInt64Ty(Context)))};
3689   return MDNode::get(Context, Elts);
3690 }
3691 
3692 Instruction *llvm::UpgradeBitCastInst(unsigned Opc, Value *V, Type *DestTy,
3693                                       Instruction *&Temp) {
3694   if (Opc != Instruction::BitCast)
3695     return nullptr;
3696 
3697   Temp = nullptr;
3698   Type *SrcTy = V->getType();
3699   if (SrcTy->isPtrOrPtrVectorTy() && DestTy->isPtrOrPtrVectorTy() &&
3700       SrcTy->getPointerAddressSpace() != DestTy->getPointerAddressSpace()) {
3701     LLVMContext &Context = V->getContext();
3702 
3703     // We have no information about target data layout, so we assume that
3704     // the maximum pointer size is 64bit.
3705     Type *MidTy = Type::getInt64Ty(Context);
3706     Temp = CastInst::Create(Instruction::PtrToInt, V, MidTy);
3707 
3708     return CastInst::Create(Instruction::IntToPtr, Temp, DestTy);
3709   }
3710 
3711   return nullptr;
3712 }
3713 
3714 Value *llvm::UpgradeBitCastExpr(unsigned Opc, Constant *C, Type *DestTy) {
3715   if (Opc != Instruction::BitCast)
3716     return nullptr;
3717 
3718   Type *SrcTy = C->getType();
3719   if (SrcTy->isPtrOrPtrVectorTy() && DestTy->isPtrOrPtrVectorTy() &&
3720       SrcTy->getPointerAddressSpace() != DestTy->getPointerAddressSpace()) {
3721     LLVMContext &Context = C->getContext();
3722 
3723     // We have no information about target data layout, so we assume that
3724     // the maximum pointer size is 64bit.
3725     Type *MidTy = Type::getInt64Ty(Context);
3726 
3727     return ConstantExpr::getIntToPtr(ConstantExpr::getPtrToInt(C, MidTy),
3728                                      DestTy);
3729   }
3730 
3731   return nullptr;
3732 }
3733 
3734 /// Check the debug info version number, if it is out-dated, drop the debug
3735 /// info. Return true if module is modified.
3736 bool llvm::UpgradeDebugInfo(Module &M) {
3737   unsigned Version = getDebugMetadataVersionFromModule(M);
3738   if (Version == DEBUG_METADATA_VERSION) {
3739     bool BrokenDebugInfo = false;
3740     if (verifyModule(M, &llvm::errs(), &BrokenDebugInfo))
3741       report_fatal_error("Broken module found, compilation aborted!");
3742     if (!BrokenDebugInfo)
3743       // Everything is ok.
3744       return false;
3745     else {
3746       // Diagnose malformed debug info.
3747       DiagnosticInfoIgnoringInvalidDebugMetadata Diag(M);
3748       M.getContext().diagnose(Diag);
3749     }
3750   }
3751   bool Modified = StripDebugInfo(M);
3752   if (Modified && Version != DEBUG_METADATA_VERSION) {
3753     // Diagnose a version mismatch.
3754     DiagnosticInfoDebugMetadataVersion DiagVersion(M, Version);
3755     M.getContext().diagnose(DiagVersion);
3756   }
3757   return Modified;
3758 }
3759 
3760 bool llvm::UpgradeRetainReleaseMarker(Module &M) {
3761   bool Changed = false;
3762   const char *MarkerKey = "clang.arc.retainAutoreleasedReturnValueMarker";
3763   NamedMDNode *ModRetainReleaseMarker = M.getNamedMetadata(MarkerKey);
3764   if (ModRetainReleaseMarker) {
3765     MDNode *Op = ModRetainReleaseMarker->getOperand(0);
3766     if (Op) {
3767       MDString *ID = dyn_cast_or_null<MDString>(Op->getOperand(0));
3768       if (ID) {
3769         SmallVector<StringRef, 4> ValueComp;
3770         ID->getString().split(ValueComp, "#");
3771         if (ValueComp.size() == 2) {
3772           std::string NewValue = ValueComp[0].str() + ";" + ValueComp[1].str();
3773           ID = MDString::get(M.getContext(), NewValue);
3774         }
3775         M.addModuleFlag(Module::Error, MarkerKey, ID);
3776         M.eraseNamedMetadata(ModRetainReleaseMarker);
3777         Changed = true;
3778       }
3779     }
3780   }
3781   return Changed;
3782 }
3783 
3784 bool llvm::UpgradeModuleFlags(Module &M) {
3785   NamedMDNode *ModFlags = M.getModuleFlagsMetadata();
3786   if (!ModFlags)
3787     return false;
3788 
3789   bool HasObjCFlag = false, HasClassProperties = false, Changed = false;
3790   for (unsigned I = 0, E = ModFlags->getNumOperands(); I != E; ++I) {
3791     MDNode *Op = ModFlags->getOperand(I);
3792     if (Op->getNumOperands() != 3)
3793       continue;
3794     MDString *ID = dyn_cast_or_null<MDString>(Op->getOperand(1));
3795     if (!ID)
3796       continue;
3797     if (ID->getString() == "Objective-C Image Info Version")
3798       HasObjCFlag = true;
3799     if (ID->getString() == "Objective-C Class Properties")
3800       HasClassProperties = true;
3801     // Upgrade PIC/PIE Module Flags. The module flag behavior for these two
3802     // field was Error and now they are Max.
3803     if (ID->getString() == "PIC Level" || ID->getString() == "PIE Level") {
3804       if (auto *Behavior =
3805               mdconst::dyn_extract_or_null<ConstantInt>(Op->getOperand(0))) {
3806         if (Behavior->getLimitedValue() == Module::Error) {
3807           Type *Int32Ty = Type::getInt32Ty(M.getContext());
3808           Metadata *Ops[3] = {
3809               ConstantAsMetadata::get(ConstantInt::get(Int32Ty, Module::Max)),
3810               MDString::get(M.getContext(), ID->getString()),
3811               Op->getOperand(2)};
3812           ModFlags->setOperand(I, MDNode::get(M.getContext(), Ops));
3813           Changed = true;
3814         }
3815       }
3816     }
3817     // Upgrade Objective-C Image Info Section. Removed the whitespce in the
3818     // section name so that llvm-lto will not complain about mismatching
3819     // module flags that is functionally the same.
3820     if (ID->getString() == "Objective-C Image Info Section") {
3821       if (auto *Value = dyn_cast_or_null<MDString>(Op->getOperand(2))) {
3822         SmallVector<StringRef, 4> ValueComp;
3823         Value->getString().split(ValueComp, " ");
3824         if (ValueComp.size() != 1) {
3825           std::string NewValue;
3826           for (auto &S : ValueComp)
3827             NewValue += S.str();
3828           Metadata *Ops[3] = {Op->getOperand(0), Op->getOperand(1),
3829                               MDString::get(M.getContext(), NewValue)};
3830           ModFlags->setOperand(I, MDNode::get(M.getContext(), Ops));
3831           Changed = true;
3832         }
3833       }
3834     }
3835   }
3836 
3837   // "Objective-C Class Properties" is recently added for Objective-C. We
3838   // upgrade ObjC bitcodes to contain a "Objective-C Class Properties" module
3839   // flag of value 0, so we can correclty downgrade this flag when trying to
3840   // link an ObjC bitcode without this module flag with an ObjC bitcode with
3841   // this module flag.
3842   if (HasObjCFlag && !HasClassProperties) {
3843     M.addModuleFlag(llvm::Module::Override, "Objective-C Class Properties",
3844                     (uint32_t)0);
3845     Changed = true;
3846   }
3847 
3848   return Changed;
3849 }
3850 
3851 void llvm::UpgradeSectionAttributes(Module &M) {
3852   auto TrimSpaces = [](StringRef Section) -> std::string {
3853     SmallVector<StringRef, 5> Components;
3854     Section.split(Components, ',');
3855 
3856     SmallString<32> Buffer;
3857     raw_svector_ostream OS(Buffer);
3858 
3859     for (auto Component : Components)
3860       OS << ',' << Component.trim();
3861 
3862     return OS.str().substr(1);
3863   };
3864 
3865   for (auto &GV : M.globals()) {
3866     if (!GV.hasSection())
3867       continue;
3868 
3869     StringRef Section = GV.getSection();
3870 
3871     if (!Section.startswith("__DATA, __objc_catlist"))
3872       continue;
3873 
3874     // __DATA, __objc_catlist, regular, no_dead_strip
3875     // __DATA,__objc_catlist,regular,no_dead_strip
3876     GV.setSection(TrimSpaces(Section));
3877   }
3878 }
3879 
3880 static bool isOldLoopArgument(Metadata *MD) {
3881   auto *T = dyn_cast_or_null<MDTuple>(MD);
3882   if (!T)
3883     return false;
3884   if (T->getNumOperands() < 1)
3885     return false;
3886   auto *S = dyn_cast_or_null<MDString>(T->getOperand(0));
3887   if (!S)
3888     return false;
3889   return S->getString().startswith("llvm.vectorizer.");
3890 }
3891 
3892 static MDString *upgradeLoopTag(LLVMContext &C, StringRef OldTag) {
3893   StringRef OldPrefix = "llvm.vectorizer.";
3894   assert(OldTag.startswith(OldPrefix) && "Expected old prefix");
3895 
3896   if (OldTag == "llvm.vectorizer.unroll")
3897     return MDString::get(C, "llvm.loop.interleave.count");
3898 
3899   return MDString::get(
3900       C, (Twine("llvm.loop.vectorize.") + OldTag.drop_front(OldPrefix.size()))
3901              .str());
3902 }
3903 
3904 static Metadata *upgradeLoopArgument(Metadata *MD) {
3905   auto *T = dyn_cast_or_null<MDTuple>(MD);
3906   if (!T)
3907     return MD;
3908   if (T->getNumOperands() < 1)
3909     return MD;
3910   auto *OldTag = dyn_cast_or_null<MDString>(T->getOperand(0));
3911   if (!OldTag)
3912     return MD;
3913   if (!OldTag->getString().startswith("llvm.vectorizer."))
3914     return MD;
3915 
3916   // This has an old tag.  Upgrade it.
3917   SmallVector<Metadata *, 8> Ops;
3918   Ops.reserve(T->getNumOperands());
3919   Ops.push_back(upgradeLoopTag(T->getContext(), OldTag->getString()));
3920   for (unsigned I = 1, E = T->getNumOperands(); I != E; ++I)
3921     Ops.push_back(T->getOperand(I));
3922 
3923   return MDTuple::get(T->getContext(), Ops);
3924 }
3925 
3926 MDNode *llvm::upgradeInstructionLoopAttachment(MDNode &N) {
3927   auto *T = dyn_cast<MDTuple>(&N);
3928   if (!T)
3929     return &N;
3930 
3931   if (none_of(T->operands(), isOldLoopArgument))
3932     return &N;
3933 
3934   SmallVector<Metadata *, 8> Ops;
3935   Ops.reserve(T->getNumOperands());
3936   for (Metadata *MD : T->operands())
3937     Ops.push_back(upgradeLoopArgument(MD));
3938 
3939   return MDTuple::get(T->getContext(), Ops);
3940 }
3941