1 //===-- AutoUpgrade.cpp - Implement auto-upgrade helper functions ---------===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 // This file implements the auto-upgrade helper functions.
10 // This is where deprecated IR intrinsics and other IR features are updated to
11 // current specifications.
12 //
13 //===----------------------------------------------------------------------===//
14 
15 #include "llvm/IR/AutoUpgrade.h"
16 #include "llvm/ADT/StringSwitch.h"
17 #include "llvm/IR/Constants.h"
18 #include "llvm/IR/DIBuilder.h"
19 #include "llvm/IR/DebugInfo.h"
20 #include "llvm/IR/DiagnosticInfo.h"
21 #include "llvm/IR/Function.h"
22 #include "llvm/IR/IRBuilder.h"
23 #include "llvm/IR/Instruction.h"
24 #include "llvm/IR/IntrinsicInst.h"
25 #include "llvm/IR/LLVMContext.h"
26 #include "llvm/IR/Module.h"
27 #include "llvm/IR/Verifier.h"
28 #include "llvm/Support/ErrorHandling.h"
29 #include "llvm/Support/Regex.h"
30 #include <cstring>
31 using namespace llvm;
32 
33 static void rename(GlobalValue *GV) { GV->setName(GV->getName() + ".old"); }
34 
35 // Upgrade the declarations of the SSE4.1 ptest intrinsics whose arguments have
36 // changed their type from v4f32 to v2i64.
37 static bool UpgradePTESTIntrinsic(Function* F, Intrinsic::ID IID,
38                                   Function *&NewFn) {
39   // Check whether this is an old version of the function, which received
40   // v4f32 arguments.
41   Type *Arg0Type = F->getFunctionType()->getParamType(0);
42   if (Arg0Type != VectorType::get(Type::getFloatTy(F->getContext()), 4))
43     return false;
44 
45   // Yes, it's old, replace it with new version.
46   rename(F);
47   NewFn = Intrinsic::getDeclaration(F->getParent(), IID);
48   return true;
49 }
50 
51 // Upgrade the declarations of intrinsic functions whose 8-bit immediate mask
52 // arguments have changed their type from i32 to i8.
53 static bool UpgradeX86IntrinsicsWith8BitMask(Function *F, Intrinsic::ID IID,
54                                              Function *&NewFn) {
55   // Check that the last argument is an i32.
56   Type *LastArgType = F->getFunctionType()->getParamType(
57      F->getFunctionType()->getNumParams() - 1);
58   if (!LastArgType->isIntegerTy(32))
59     return false;
60 
61   // Move this function aside and map down.
62   rename(F);
63   NewFn = Intrinsic::getDeclaration(F->getParent(), IID);
64   return true;
65 }
66 
67 static bool ShouldUpgradeX86Intrinsic(Function *F, StringRef Name) {
68   // All of the intrinsics matches below should be marked with which llvm
69   // version started autoupgrading them. At some point in the future we would
70   // like to use this information to remove upgrade code for some older
71   // intrinsics. It is currently undecided how we will determine that future
72   // point.
73   if (Name == "addcarryx.u32" || // Added in 8.0
74       Name == "addcarryx.u64" || // Added in 8.0
75       Name == "addcarry.u32" || // Added in 8.0
76       Name == "addcarry.u64" || // Added in 8.0
77       Name == "subborrow.u32" || // Added in 8.0
78       Name == "subborrow.u64" || // Added in 8.0
79       Name.startswith("sse2.padds.") || // Added in 8.0
80       Name.startswith("sse2.psubs.") || // Added in 8.0
81       Name.startswith("sse2.paddus.") || // Added in 8.0
82       Name.startswith("sse2.psubus.") || // Added in 8.0
83       Name.startswith("avx2.padds.") || // Added in 8.0
84       Name.startswith("avx2.psubs.") || // Added in 8.0
85       Name.startswith("avx2.paddus.") || // Added in 8.0
86       Name.startswith("avx2.psubus.") || // Added in 8.0
87       Name.startswith("avx512.padds.") || // Added in 8.0
88       Name.startswith("avx512.psubs.") || // Added in 8.0
89       Name.startswith("avx512.mask.padds.") || // Added in 8.0
90       Name.startswith("avx512.mask.psubs.") || // Added in 8.0
91       Name.startswith("avx512.mask.paddus.") || // Added in 8.0
92       Name.startswith("avx512.mask.psubus.") || // Added in 8.0
93       Name=="ssse3.pabs.b.128" || // Added in 6.0
94       Name=="ssse3.pabs.w.128" || // Added in 6.0
95       Name=="ssse3.pabs.d.128" || // Added in 6.0
96       Name.startswith("fma4.vfmadd.s") || // Added in 7.0
97       Name.startswith("fma.vfmadd.") || // Added in 7.0
98       Name.startswith("fma.vfmsub.") || // Added in 7.0
99       Name.startswith("fma.vfmaddsub.") || // Added in 7.0
100       Name.startswith("fma.vfmsubadd.") || // Added in 7.0
101       Name.startswith("fma.vfnmadd.") || // Added in 7.0
102       Name.startswith("fma.vfnmsub.") || // Added in 7.0
103       Name.startswith("avx512.mask.vfmadd.") || // Added in 7.0
104       Name.startswith("avx512.mask.vfnmadd.") || // Added in 7.0
105       Name.startswith("avx512.mask.vfnmsub.") || // Added in 7.0
106       Name.startswith("avx512.mask3.vfmadd.") || // Added in 7.0
107       Name.startswith("avx512.maskz.vfmadd.") || // Added in 7.0
108       Name.startswith("avx512.mask3.vfmsub.") || // Added in 7.0
109       Name.startswith("avx512.mask3.vfnmsub.") || // Added in 7.0
110       Name.startswith("avx512.mask.vfmaddsub.") || // Added in 7.0
111       Name.startswith("avx512.maskz.vfmaddsub.") || // Added in 7.0
112       Name.startswith("avx512.mask3.vfmaddsub.") || // Added in 7.0
113       Name.startswith("avx512.mask3.vfmsubadd.") || // Added in 7.0
114       Name.startswith("avx512.mask.shuf.i") || // Added in 6.0
115       Name.startswith("avx512.mask.shuf.f") || // Added in 6.0
116       Name.startswith("avx512.kunpck") || //added in 6.0
117       Name.startswith("avx2.pabs.") || // Added in 6.0
118       Name.startswith("avx512.mask.pabs.") || // Added in 6.0
119       Name.startswith("avx512.broadcastm") || // Added in 6.0
120       Name == "sse.sqrt.ss" || // Added in 7.0
121       Name == "sse2.sqrt.sd" || // Added in 7.0
122       Name.startswith("avx512.mask.sqrt.p") || // Added in 7.0
123       Name.startswith("avx.sqrt.p") || // Added in 7.0
124       Name.startswith("sse2.sqrt.p") || // Added in 7.0
125       Name.startswith("sse.sqrt.p") || // Added in 7.0
126       Name.startswith("avx512.mask.pbroadcast") || // Added in 6.0
127       Name.startswith("sse2.pcmpeq.") || // Added in 3.1
128       Name.startswith("sse2.pcmpgt.") || // Added in 3.1
129       Name.startswith("avx2.pcmpeq.") || // Added in 3.1
130       Name.startswith("avx2.pcmpgt.") || // Added in 3.1
131       Name.startswith("avx512.mask.pcmpeq.") || // Added in 3.9
132       Name.startswith("avx512.mask.pcmpgt.") || // Added in 3.9
133       Name.startswith("avx.vperm2f128.") || // Added in 6.0
134       Name == "avx2.vperm2i128" || // Added in 6.0
135       Name == "sse.add.ss" || // Added in 4.0
136       Name == "sse2.add.sd" || // Added in 4.0
137       Name == "sse.sub.ss" || // Added in 4.0
138       Name == "sse2.sub.sd" || // Added in 4.0
139       Name == "sse.mul.ss" || // Added in 4.0
140       Name == "sse2.mul.sd" || // Added in 4.0
141       Name == "sse.div.ss" || // Added in 4.0
142       Name == "sse2.div.sd" || // Added in 4.0
143       Name == "sse41.pmaxsb" || // Added in 3.9
144       Name == "sse2.pmaxs.w" || // Added in 3.9
145       Name == "sse41.pmaxsd" || // Added in 3.9
146       Name == "sse2.pmaxu.b" || // Added in 3.9
147       Name == "sse41.pmaxuw" || // Added in 3.9
148       Name == "sse41.pmaxud" || // Added in 3.9
149       Name == "sse41.pminsb" || // Added in 3.9
150       Name == "sse2.pmins.w" || // Added in 3.9
151       Name == "sse41.pminsd" || // Added in 3.9
152       Name == "sse2.pminu.b" || // Added in 3.9
153       Name == "sse41.pminuw" || // Added in 3.9
154       Name == "sse41.pminud" || // Added in 3.9
155       Name == "avx512.kand.w" || // Added in 7.0
156       Name == "avx512.kandn.w" || // Added in 7.0
157       Name == "avx512.knot.w" || // Added in 7.0
158       Name == "avx512.kor.w" || // Added in 7.0
159       Name == "avx512.kxor.w" || // Added in 7.0
160       Name == "avx512.kxnor.w" || // Added in 7.0
161       Name == "avx512.kortestc.w" || // Added in 7.0
162       Name == "avx512.kortestz.w" || // Added in 7.0
163       Name.startswith("avx512.mask.pshuf.b.") || // Added in 4.0
164       Name.startswith("avx2.pmax") || // Added in 3.9
165       Name.startswith("avx2.pmin") || // Added in 3.9
166       Name.startswith("avx512.mask.pmax") || // Added in 4.0
167       Name.startswith("avx512.mask.pmin") || // Added in 4.0
168       Name.startswith("avx2.vbroadcast") || // Added in 3.8
169       Name.startswith("avx2.pbroadcast") || // Added in 3.8
170       Name.startswith("avx.vpermil.") || // Added in 3.1
171       Name.startswith("sse2.pshuf") || // Added in 3.9
172       Name.startswith("avx512.pbroadcast") || // Added in 3.9
173       Name.startswith("avx512.mask.broadcast.s") || // Added in 3.9
174       Name.startswith("avx512.mask.movddup") || // Added in 3.9
175       Name.startswith("avx512.mask.movshdup") || // Added in 3.9
176       Name.startswith("avx512.mask.movsldup") || // Added in 3.9
177       Name.startswith("avx512.mask.pshuf.d.") || // Added in 3.9
178       Name.startswith("avx512.mask.pshufl.w.") || // Added in 3.9
179       Name.startswith("avx512.mask.pshufh.w.") || // Added in 3.9
180       Name.startswith("avx512.mask.shuf.p") || // Added in 4.0
181       Name.startswith("avx512.mask.vpermil.p") || // Added in 3.9
182       Name.startswith("avx512.mask.perm.df.") || // Added in 3.9
183       Name.startswith("avx512.mask.perm.di.") || // Added in 3.9
184       Name.startswith("avx512.mask.punpckl") || // Added in 3.9
185       Name.startswith("avx512.mask.punpckh") || // Added in 3.9
186       Name.startswith("avx512.mask.unpckl.") || // Added in 3.9
187       Name.startswith("avx512.mask.unpckh.") || // Added in 3.9
188       Name.startswith("avx512.mask.pand.") || // Added in 3.9
189       Name.startswith("avx512.mask.pandn.") || // Added in 3.9
190       Name.startswith("avx512.mask.por.") || // Added in 3.9
191       Name.startswith("avx512.mask.pxor.") || // Added in 3.9
192       Name.startswith("avx512.mask.and.") || // Added in 3.9
193       Name.startswith("avx512.mask.andn.") || // Added in 3.9
194       Name.startswith("avx512.mask.or.") || // Added in 3.9
195       Name.startswith("avx512.mask.xor.") || // Added in 3.9
196       Name.startswith("avx512.mask.padd.") || // Added in 4.0
197       Name.startswith("avx512.mask.psub.") || // Added in 4.0
198       Name.startswith("avx512.mask.pmull.") || // Added in 4.0
199       Name.startswith("avx512.mask.cvtdq2pd.") || // Added in 4.0
200       Name.startswith("avx512.mask.cvtudq2pd.") || // Added in 4.0
201       Name.startswith("avx512.mask.cvtudq2ps.") || // Added in 7.0 updated 9.0
202       Name.startswith("avx512.mask.cvtqq2pd.") || // Added in 7.0 updated 9.0
203       Name.startswith("avx512.mask.cvtuqq2pd.") || // Added in 7.0 updated 9.0
204       Name.startswith("avx512.mask.cvtdq2ps.") || // Added in 7.0 updated 9.0
205       Name == "avx512.mask.cvtqq2ps.256" || // Added in 9.0
206       Name == "avx512.mask.cvtqq2ps.512" || // Added in 9.0
207       Name == "avx512.mask.cvtuqq2ps.256" || // Added in 9.0
208       Name == "avx512.mask.cvtuqq2ps.512" || // Added in 9.0
209       Name == "avx512.mask.cvtpd2dq.256" || // Added in 7.0
210       Name == "avx512.mask.cvtpd2ps.256" || // Added in 7.0
211       Name == "avx512.mask.cvttpd2dq.256" || // Added in 7.0
212       Name == "avx512.mask.cvttps2dq.128" || // Added in 7.0
213       Name == "avx512.mask.cvttps2dq.256" || // Added in 7.0
214       Name == "avx512.mask.cvtps2pd.128" || // Added in 7.0
215       Name == "avx512.mask.cvtps2pd.256" || // Added in 7.0
216       Name == "avx512.cvtusi2sd" || // Added in 7.0
217       Name.startswith("avx512.mask.permvar.") || // Added in 7.0
218       Name.startswith("avx512.mask.permvar.") || // Added in 7.0
219       Name == "sse2.pmulu.dq" || // Added in 7.0
220       Name == "sse41.pmuldq" || // Added in 7.0
221       Name == "avx2.pmulu.dq" || // Added in 7.0
222       Name == "avx2.pmul.dq" || // Added in 7.0
223       Name == "avx512.pmulu.dq.512" || // Added in 7.0
224       Name == "avx512.pmul.dq.512" || // Added in 7.0
225       Name.startswith("avx512.mask.pmul.dq.") || // Added in 4.0
226       Name.startswith("avx512.mask.pmulu.dq.") || // Added in 4.0
227       Name.startswith("avx512.mask.pmul.hr.sw.") || // Added in 7.0
228       Name.startswith("avx512.mask.pmulh.w.") || // Added in 7.0
229       Name.startswith("avx512.mask.pmulhu.w.") || // Added in 7.0
230       Name.startswith("avx512.mask.pmaddw.d.") || // Added in 7.0
231       Name.startswith("avx512.mask.pmaddubs.w.") || // Added in 7.0
232       Name.startswith("avx512.mask.packsswb.") || // Added in 5.0
233       Name.startswith("avx512.mask.packssdw.") || // Added in 5.0
234       Name.startswith("avx512.mask.packuswb.") || // Added in 5.0
235       Name.startswith("avx512.mask.packusdw.") || // Added in 5.0
236       Name.startswith("avx512.mask.cmp.b") || // Added in 5.0
237       Name.startswith("avx512.mask.cmp.d") || // Added in 5.0
238       Name.startswith("avx512.mask.cmp.q") || // Added in 5.0
239       Name.startswith("avx512.mask.cmp.w") || // Added in 5.0
240       Name.startswith("avx512.mask.cmp.p") || // Added in 7.0
241       Name.startswith("avx512.mask.ucmp.") || // Added in 5.0
242       Name.startswith("avx512.cvtb2mask.") || // Added in 7.0
243       Name.startswith("avx512.cvtw2mask.") || // Added in 7.0
244       Name.startswith("avx512.cvtd2mask.") || // Added in 7.0
245       Name.startswith("avx512.cvtq2mask.") || // Added in 7.0
246       Name.startswith("avx512.mask.vpermilvar.") || // Added in 4.0
247       Name.startswith("avx512.mask.psll.d") || // Added in 4.0
248       Name.startswith("avx512.mask.psll.q") || // Added in 4.0
249       Name.startswith("avx512.mask.psll.w") || // Added in 4.0
250       Name.startswith("avx512.mask.psra.d") || // Added in 4.0
251       Name.startswith("avx512.mask.psra.q") || // Added in 4.0
252       Name.startswith("avx512.mask.psra.w") || // Added in 4.0
253       Name.startswith("avx512.mask.psrl.d") || // Added in 4.0
254       Name.startswith("avx512.mask.psrl.q") || // Added in 4.0
255       Name.startswith("avx512.mask.psrl.w") || // Added in 4.0
256       Name.startswith("avx512.mask.pslli") || // Added in 4.0
257       Name.startswith("avx512.mask.psrai") || // Added in 4.0
258       Name.startswith("avx512.mask.psrli") || // Added in 4.0
259       Name.startswith("avx512.mask.psllv") || // Added in 4.0
260       Name.startswith("avx512.mask.psrav") || // Added in 4.0
261       Name.startswith("avx512.mask.psrlv") || // Added in 4.0
262       Name.startswith("sse41.pmovsx") || // Added in 3.8
263       Name.startswith("sse41.pmovzx") || // Added in 3.9
264       Name.startswith("avx2.pmovsx") || // Added in 3.9
265       Name.startswith("avx2.pmovzx") || // Added in 3.9
266       Name.startswith("avx512.mask.pmovsx") || // Added in 4.0
267       Name.startswith("avx512.mask.pmovzx") || // Added in 4.0
268       Name.startswith("avx512.mask.lzcnt.") || // Added in 5.0
269       Name.startswith("avx512.mask.pternlog.") || // Added in 7.0
270       Name.startswith("avx512.maskz.pternlog.") || // Added in 7.0
271       Name.startswith("avx512.mask.vpmadd52") || // Added in 7.0
272       Name.startswith("avx512.maskz.vpmadd52") || // Added in 7.0
273       Name.startswith("avx512.mask.vpermi2var.") || // Added in 7.0
274       Name.startswith("avx512.mask.vpermt2var.") || // Added in 7.0
275       Name.startswith("avx512.maskz.vpermt2var.") || // Added in 7.0
276       Name.startswith("avx512.mask.vpdpbusd.") || // Added in 7.0
277       Name.startswith("avx512.maskz.vpdpbusd.") || // Added in 7.0
278       Name.startswith("avx512.mask.vpdpbusds.") || // Added in 7.0
279       Name.startswith("avx512.maskz.vpdpbusds.") || // Added in 7.0
280       Name.startswith("avx512.mask.vpdpwssd.") || // Added in 7.0
281       Name.startswith("avx512.maskz.vpdpwssd.") || // Added in 7.0
282       Name.startswith("avx512.mask.vpdpwssds.") || // Added in 7.0
283       Name.startswith("avx512.maskz.vpdpwssds.") || // Added in 7.0
284       Name.startswith("avx512.mask.dbpsadbw.") || // Added in 7.0
285       Name.startswith("avx512.mask.vpshld.") || // Added in 7.0
286       Name.startswith("avx512.mask.vpshrd.") || // Added in 7.0
287       Name.startswith("avx512.mask.vpshldv.") || // Added in 8.0
288       Name.startswith("avx512.mask.vpshrdv.") || // Added in 8.0
289       Name.startswith("avx512.maskz.vpshldv.") || // Added in 8.0
290       Name.startswith("avx512.maskz.vpshrdv.") || // Added in 8.0
291       Name.startswith("avx512.vpshld.") || // Added in 8.0
292       Name.startswith("avx512.vpshrd.") || // Added in 8.0
293       Name.startswith("avx512.mask.add.p") || // Added in 7.0. 128/256 in 4.0
294       Name.startswith("avx512.mask.sub.p") || // Added in 7.0. 128/256 in 4.0
295       Name.startswith("avx512.mask.mul.p") || // Added in 7.0. 128/256 in 4.0
296       Name.startswith("avx512.mask.div.p") || // Added in 7.0. 128/256 in 4.0
297       Name.startswith("avx512.mask.max.p") || // Added in 7.0. 128/256 in 5.0
298       Name.startswith("avx512.mask.min.p") || // Added in 7.0. 128/256 in 5.0
299       Name.startswith("avx512.mask.fpclass.p") || // Added in 7.0
300       Name.startswith("avx512.mask.vpshufbitqmb.") || // Added in 8.0
301       Name.startswith("avx512.mask.pmultishift.qb.") || // Added in 8.0
302       Name.startswith("avx512.mask.conflict.") || // Added in 9.0
303       Name == "avx512.mask.pmov.qd.256" || // Added in 9.0
304       Name == "avx512.mask.pmov.qd.512" || // Added in 9.0
305       Name == "avx512.mask.pmov.wb.256" || // Added in 9.0
306       Name == "avx512.mask.pmov.wb.512" || // Added in 9.0
307       Name == "sse.cvtsi2ss" || // Added in 7.0
308       Name == "sse.cvtsi642ss" || // Added in 7.0
309       Name == "sse2.cvtsi2sd" || // Added in 7.0
310       Name == "sse2.cvtsi642sd" || // Added in 7.0
311       Name == "sse2.cvtss2sd" || // Added in 7.0
312       Name == "sse2.cvtdq2pd" || // Added in 3.9
313       Name == "sse2.cvtdq2ps" || // Added in 7.0
314       Name == "sse2.cvtps2pd" || // Added in 3.9
315       Name == "avx.cvtdq2.pd.256" || // Added in 3.9
316       Name == "avx.cvtdq2.ps.256" || // Added in 7.0
317       Name == "avx.cvt.ps2.pd.256" || // Added in 3.9
318       Name.startswith("avx.vinsertf128.") || // Added in 3.7
319       Name == "avx2.vinserti128" || // Added in 3.7
320       Name.startswith("avx512.mask.insert") || // Added in 4.0
321       Name.startswith("avx.vextractf128.") || // Added in 3.7
322       Name == "avx2.vextracti128" || // Added in 3.7
323       Name.startswith("avx512.mask.vextract") || // Added in 4.0
324       Name.startswith("sse4a.movnt.") || // Added in 3.9
325       Name.startswith("avx.movnt.") || // Added in 3.2
326       Name.startswith("avx512.storent.") || // Added in 3.9
327       Name == "sse41.movntdqa" || // Added in 5.0
328       Name == "avx2.movntdqa" || // Added in 5.0
329       Name == "avx512.movntdqa" || // Added in 5.0
330       Name == "sse2.storel.dq" || // Added in 3.9
331       Name.startswith("sse.storeu.") || // Added in 3.9
332       Name.startswith("sse2.storeu.") || // Added in 3.9
333       Name.startswith("avx.storeu.") || // Added in 3.9
334       Name.startswith("avx512.mask.storeu.") || // Added in 3.9
335       Name.startswith("avx512.mask.store.p") || // Added in 3.9
336       Name.startswith("avx512.mask.store.b.") || // Added in 3.9
337       Name.startswith("avx512.mask.store.w.") || // Added in 3.9
338       Name.startswith("avx512.mask.store.d.") || // Added in 3.9
339       Name.startswith("avx512.mask.store.q.") || // Added in 3.9
340       Name == "avx512.mask.store.ss" || // Added in 7.0
341       Name.startswith("avx512.mask.loadu.") || // Added in 3.9
342       Name.startswith("avx512.mask.load.") || // Added in 3.9
343       Name.startswith("avx512.mask.expand.load.") || // Added in 7.0
344       Name.startswith("avx512.mask.compress.store.") || // Added in 7.0
345       Name.startswith("avx512.mask.expand.b") || // Added in 9.0
346       Name.startswith("avx512.mask.expand.w") || // Added in 9.0
347       Name.startswith("avx512.mask.expand.d") || // Added in 9.0
348       Name.startswith("avx512.mask.expand.q") || // Added in 9.0
349       Name.startswith("avx512.mask.expand.p") || // Added in 9.0
350       Name.startswith("avx512.mask.compress.b") || // Added in 9.0
351       Name.startswith("avx512.mask.compress.w") || // Added in 9.0
352       Name.startswith("avx512.mask.compress.d") || // Added in 9.0
353       Name.startswith("avx512.mask.compress.q") || // Added in 9.0
354       Name.startswith("avx512.mask.compress.p") || // Added in 9.0
355       Name == "sse42.crc32.64.8" || // Added in 3.4
356       Name.startswith("avx.vbroadcast.s") || // Added in 3.5
357       Name.startswith("avx512.vbroadcast.s") || // Added in 7.0
358       Name.startswith("avx512.mask.palignr.") || // Added in 3.9
359       Name.startswith("avx512.mask.valign.") || // Added in 4.0
360       Name.startswith("sse2.psll.dq") || // Added in 3.7
361       Name.startswith("sse2.psrl.dq") || // Added in 3.7
362       Name.startswith("avx2.psll.dq") || // Added in 3.7
363       Name.startswith("avx2.psrl.dq") || // Added in 3.7
364       Name.startswith("avx512.psll.dq") || // Added in 3.9
365       Name.startswith("avx512.psrl.dq") || // Added in 3.9
366       Name == "sse41.pblendw" || // Added in 3.7
367       Name.startswith("sse41.blendp") || // Added in 3.7
368       Name.startswith("avx.blend.p") || // Added in 3.7
369       Name == "avx2.pblendw" || // Added in 3.7
370       Name.startswith("avx2.pblendd.") || // Added in 3.7
371       Name.startswith("avx.vbroadcastf128") || // Added in 4.0
372       Name == "avx2.vbroadcasti128" || // Added in 3.7
373       Name.startswith("avx512.mask.broadcastf") || // Added in 6.0
374       Name.startswith("avx512.mask.broadcasti") || // Added in 6.0
375       Name == "xop.vpcmov" || // Added in 3.8
376       Name == "xop.vpcmov.256" || // Added in 5.0
377       Name.startswith("avx512.mask.move.s") || // Added in 4.0
378       Name.startswith("avx512.cvtmask2") || // Added in 5.0
379       Name.startswith("xop.vpcom") || // Added in 3.2, Updated in 9.0
380       Name.startswith("xop.vprot") || // Added in 8.0
381       Name.startswith("avx512.prol") || // Added in 8.0
382       Name.startswith("avx512.pror") || // Added in 8.0
383       Name.startswith("avx512.mask.prorv.") || // Added in 8.0
384       Name.startswith("avx512.mask.pror.") ||  // Added in 8.0
385       Name.startswith("avx512.mask.prolv.") || // Added in 8.0
386       Name.startswith("avx512.mask.prol.") ||  // Added in 8.0
387       Name.startswith("avx512.ptestm") || //Added in 6.0
388       Name.startswith("avx512.ptestnm") || //Added in 6.0
389       Name.startswith("sse2.pavg") || // Added in 6.0
390       Name.startswith("avx2.pavg") || // Added in 6.0
391       Name.startswith("avx512.mask.pavg")) // Added in 6.0
392     return true;
393 
394   return false;
395 }
396 
397 static bool UpgradeX86IntrinsicFunction(Function *F, StringRef Name,
398                                         Function *&NewFn) {
399   // Only handle intrinsics that start with "x86.".
400   if (!Name.startswith("x86."))
401     return false;
402   // Remove "x86." prefix.
403   Name = Name.substr(4);
404 
405   if (ShouldUpgradeX86Intrinsic(F, Name)) {
406     NewFn = nullptr;
407     return true;
408   }
409 
410   if (Name == "rdtscp") { // Added in 8.0
411     // If this intrinsic has 0 operands, it's the new version.
412     if (F->getFunctionType()->getNumParams() == 0)
413       return false;
414 
415     rename(F);
416     NewFn = Intrinsic::getDeclaration(F->getParent(),
417                                       Intrinsic::x86_rdtscp);
418     return true;
419   }
420 
421   // SSE4.1 ptest functions may have an old signature.
422   if (Name.startswith("sse41.ptest")) { // Added in 3.2
423     if (Name.substr(11) == "c")
424       return UpgradePTESTIntrinsic(F, Intrinsic::x86_sse41_ptestc, NewFn);
425     if (Name.substr(11) == "z")
426       return UpgradePTESTIntrinsic(F, Intrinsic::x86_sse41_ptestz, NewFn);
427     if (Name.substr(11) == "nzc")
428       return UpgradePTESTIntrinsic(F, Intrinsic::x86_sse41_ptestnzc, NewFn);
429   }
430   // Several blend and other instructions with masks used the wrong number of
431   // bits.
432   if (Name == "sse41.insertps") // Added in 3.6
433     return UpgradeX86IntrinsicsWith8BitMask(F, Intrinsic::x86_sse41_insertps,
434                                             NewFn);
435   if (Name == "sse41.dppd") // Added in 3.6
436     return UpgradeX86IntrinsicsWith8BitMask(F, Intrinsic::x86_sse41_dppd,
437                                             NewFn);
438   if (Name == "sse41.dpps") // Added in 3.6
439     return UpgradeX86IntrinsicsWith8BitMask(F, Intrinsic::x86_sse41_dpps,
440                                             NewFn);
441   if (Name == "sse41.mpsadbw") // Added in 3.6
442     return UpgradeX86IntrinsicsWith8BitMask(F, Intrinsic::x86_sse41_mpsadbw,
443                                             NewFn);
444   if (Name == "avx.dp.ps.256") // Added in 3.6
445     return UpgradeX86IntrinsicsWith8BitMask(F, Intrinsic::x86_avx_dp_ps_256,
446                                             NewFn);
447   if (Name == "avx2.mpsadbw") // Added in 3.6
448     return UpgradeX86IntrinsicsWith8BitMask(F, Intrinsic::x86_avx2_mpsadbw,
449                                             NewFn);
450 
451   // frcz.ss/sd may need to have an argument dropped. Added in 3.2
452   if (Name.startswith("xop.vfrcz.ss") && F->arg_size() == 2) {
453     rename(F);
454     NewFn = Intrinsic::getDeclaration(F->getParent(),
455                                       Intrinsic::x86_xop_vfrcz_ss);
456     return true;
457   }
458   if (Name.startswith("xop.vfrcz.sd") && F->arg_size() == 2) {
459     rename(F);
460     NewFn = Intrinsic::getDeclaration(F->getParent(),
461                                       Intrinsic::x86_xop_vfrcz_sd);
462     return true;
463   }
464   // Upgrade any XOP PERMIL2 index operand still using a float/double vector.
465   if (Name.startswith("xop.vpermil2")) { // Added in 3.9
466     auto Idx = F->getFunctionType()->getParamType(2);
467     if (Idx->isFPOrFPVectorTy()) {
468       rename(F);
469       unsigned IdxSize = Idx->getPrimitiveSizeInBits();
470       unsigned EltSize = Idx->getScalarSizeInBits();
471       Intrinsic::ID Permil2ID;
472       if (EltSize == 64 && IdxSize == 128)
473         Permil2ID = Intrinsic::x86_xop_vpermil2pd;
474       else if (EltSize == 32 && IdxSize == 128)
475         Permil2ID = Intrinsic::x86_xop_vpermil2ps;
476       else if (EltSize == 64 && IdxSize == 256)
477         Permil2ID = Intrinsic::x86_xop_vpermil2pd_256;
478       else
479         Permil2ID = Intrinsic::x86_xop_vpermil2ps_256;
480       NewFn = Intrinsic::getDeclaration(F->getParent(), Permil2ID);
481       return true;
482     }
483   }
484 
485   if (Name == "seh.recoverfp") {
486     NewFn = Intrinsic::getDeclaration(F->getParent(), Intrinsic::eh_recoverfp);
487     return true;
488   }
489 
490   return false;
491 }
492 
493 static bool UpgradeIntrinsicFunction1(Function *F, Function *&NewFn) {
494   assert(F && "Illegal to upgrade a non-existent Function.");
495 
496   // Upgrade intrinsics "clang.arc.use" which doesn't start with "llvm.".
497   if (F->getName() == "clang.arc.use") {
498     NewFn = nullptr;
499     return true;
500   }
501 
502   // Quickly eliminate it, if it's not a candidate.
503   StringRef Name = F->getName();
504   if (Name.size() <= 8 || !Name.startswith("llvm."))
505     return false;
506   Name = Name.substr(5); // Strip off "llvm."
507 
508   switch (Name[0]) {
509   default: break;
510   case 'a': {
511     if (Name.startswith("arm.rbit") || Name.startswith("aarch64.rbit")) {
512       NewFn = Intrinsic::getDeclaration(F->getParent(), Intrinsic::bitreverse,
513                                         F->arg_begin()->getType());
514       return true;
515     }
516     if (Name.startswith("arm.neon.vclz")) {
517       Type* args[2] = {
518         F->arg_begin()->getType(),
519         Type::getInt1Ty(F->getContext())
520       };
521       // Can't use Intrinsic::getDeclaration here as it adds a ".i1" to
522       // the end of the name. Change name from llvm.arm.neon.vclz.* to
523       //  llvm.ctlz.*
524       FunctionType* fType = FunctionType::get(F->getReturnType(), args, false);
525       NewFn = Function::Create(fType, F->getLinkage(), F->getAddressSpace(),
526                                "llvm.ctlz." + Name.substr(14), F->getParent());
527       return true;
528     }
529     if (Name.startswith("arm.neon.vcnt")) {
530       NewFn = Intrinsic::getDeclaration(F->getParent(), Intrinsic::ctpop,
531                                         F->arg_begin()->getType());
532       return true;
533     }
534     Regex vldRegex("^arm\\.neon\\.vld([1234]|[234]lane)\\.v[a-z0-9]*$");
535     if (vldRegex.match(Name)) {
536       auto fArgs = F->getFunctionType()->params();
537       SmallVector<Type *, 4> Tys(fArgs.begin(), fArgs.end());
538       // Can't use Intrinsic::getDeclaration here as the return types might
539       // then only be structurally equal.
540       FunctionType* fType = FunctionType::get(F->getReturnType(), Tys, false);
541       NewFn = Function::Create(fType, F->getLinkage(), F->getAddressSpace(),
542                                "llvm." + Name + ".p0i8", F->getParent());
543       return true;
544     }
545     Regex vstRegex("^arm\\.neon\\.vst([1234]|[234]lane)\\.v[a-z0-9]*$");
546     if (vstRegex.match(Name)) {
547       static const Intrinsic::ID StoreInts[] = {Intrinsic::arm_neon_vst1,
548                                                 Intrinsic::arm_neon_vst2,
549                                                 Intrinsic::arm_neon_vst3,
550                                                 Intrinsic::arm_neon_vst4};
551 
552       static const Intrinsic::ID StoreLaneInts[] = {
553         Intrinsic::arm_neon_vst2lane, Intrinsic::arm_neon_vst3lane,
554         Intrinsic::arm_neon_vst4lane
555       };
556 
557       auto fArgs = F->getFunctionType()->params();
558       Type *Tys[] = {fArgs[0], fArgs[1]};
559       if (Name.find("lane") == StringRef::npos)
560         NewFn = Intrinsic::getDeclaration(F->getParent(),
561                                           StoreInts[fArgs.size() - 3], Tys);
562       else
563         NewFn = Intrinsic::getDeclaration(F->getParent(),
564                                           StoreLaneInts[fArgs.size() - 5], Tys);
565       return true;
566     }
567     if (Name == "aarch64.thread.pointer" || Name == "arm.thread.pointer") {
568       NewFn = Intrinsic::getDeclaration(F->getParent(), Intrinsic::thread_pointer);
569       return true;
570     }
571     break;
572   }
573 
574   case 'c': {
575     if (Name.startswith("ctlz.") && F->arg_size() == 1) {
576       rename(F);
577       NewFn = Intrinsic::getDeclaration(F->getParent(), Intrinsic::ctlz,
578                                         F->arg_begin()->getType());
579       return true;
580     }
581     if (Name.startswith("cttz.") && F->arg_size() == 1) {
582       rename(F);
583       NewFn = Intrinsic::getDeclaration(F->getParent(), Intrinsic::cttz,
584                                         F->arg_begin()->getType());
585       return true;
586     }
587     break;
588   }
589   case 'd': {
590     if (Name == "dbg.value" && F->arg_size() == 4) {
591       rename(F);
592       NewFn = Intrinsic::getDeclaration(F->getParent(), Intrinsic::dbg_value);
593       return true;
594     }
595     break;
596   }
597   case 'i':
598   case 'l': {
599     bool IsLifetimeStart = Name.startswith("lifetime.start");
600     if (IsLifetimeStart || Name.startswith("invariant.start")) {
601       Intrinsic::ID ID = IsLifetimeStart ?
602         Intrinsic::lifetime_start : Intrinsic::invariant_start;
603       auto Args = F->getFunctionType()->params();
604       Type* ObjectPtr[1] = {Args[1]};
605       if (F->getName() != Intrinsic::getName(ID, ObjectPtr)) {
606         rename(F);
607         NewFn = Intrinsic::getDeclaration(F->getParent(), ID, ObjectPtr);
608         return true;
609       }
610     }
611 
612     bool IsLifetimeEnd = Name.startswith("lifetime.end");
613     if (IsLifetimeEnd || Name.startswith("invariant.end")) {
614       Intrinsic::ID ID = IsLifetimeEnd ?
615         Intrinsic::lifetime_end : Intrinsic::invariant_end;
616 
617       auto Args = F->getFunctionType()->params();
618       Type* ObjectPtr[1] = {Args[IsLifetimeEnd ? 1 : 2]};
619       if (F->getName() != Intrinsic::getName(ID, ObjectPtr)) {
620         rename(F);
621         NewFn = Intrinsic::getDeclaration(F->getParent(), ID, ObjectPtr);
622         return true;
623       }
624     }
625     if (Name.startswith("invariant.group.barrier")) {
626       // Rename invariant.group.barrier to launder.invariant.group
627       auto Args = F->getFunctionType()->params();
628       Type* ObjectPtr[1] = {Args[0]};
629       rename(F);
630       NewFn = Intrinsic::getDeclaration(F->getParent(),
631           Intrinsic::launder_invariant_group, ObjectPtr);
632       return true;
633 
634     }
635 
636     break;
637   }
638   case 'm': {
639     if (Name.startswith("masked.load.")) {
640       Type *Tys[] = { F->getReturnType(), F->arg_begin()->getType() };
641       if (F->getName() != Intrinsic::getName(Intrinsic::masked_load, Tys)) {
642         rename(F);
643         NewFn = Intrinsic::getDeclaration(F->getParent(),
644                                           Intrinsic::masked_load,
645                                           Tys);
646         return true;
647       }
648     }
649     if (Name.startswith("masked.store.")) {
650       auto Args = F->getFunctionType()->params();
651       Type *Tys[] = { Args[0], Args[1] };
652       if (F->getName() != Intrinsic::getName(Intrinsic::masked_store, Tys)) {
653         rename(F);
654         NewFn = Intrinsic::getDeclaration(F->getParent(),
655                                           Intrinsic::masked_store,
656                                           Tys);
657         return true;
658       }
659     }
660     // Renaming gather/scatter intrinsics with no address space overloading
661     // to the new overload which includes an address space
662     if (Name.startswith("masked.gather.")) {
663       Type *Tys[] = {F->getReturnType(), F->arg_begin()->getType()};
664       if (F->getName() != Intrinsic::getName(Intrinsic::masked_gather, Tys)) {
665         rename(F);
666         NewFn = Intrinsic::getDeclaration(F->getParent(),
667                                           Intrinsic::masked_gather, Tys);
668         return true;
669       }
670     }
671     if (Name.startswith("masked.scatter.")) {
672       auto Args = F->getFunctionType()->params();
673       Type *Tys[] = {Args[0], Args[1]};
674       if (F->getName() != Intrinsic::getName(Intrinsic::masked_scatter, Tys)) {
675         rename(F);
676         NewFn = Intrinsic::getDeclaration(F->getParent(),
677                                           Intrinsic::masked_scatter, Tys);
678         return true;
679       }
680     }
681     // Updating the memory intrinsics (memcpy/memmove/memset) that have an
682     // alignment parameter to embedding the alignment as an attribute of
683     // the pointer args.
684     if (Name.startswith("memcpy.") && F->arg_size() == 5) {
685       rename(F);
686       // Get the types of dest, src, and len
687       ArrayRef<Type *> ParamTypes = F->getFunctionType()->params().slice(0, 3);
688       NewFn = Intrinsic::getDeclaration(F->getParent(), Intrinsic::memcpy,
689                                         ParamTypes);
690       return true;
691     }
692     if (Name.startswith("memmove.") && F->arg_size() == 5) {
693       rename(F);
694       // Get the types of dest, src, and len
695       ArrayRef<Type *> ParamTypes = F->getFunctionType()->params().slice(0, 3);
696       NewFn = Intrinsic::getDeclaration(F->getParent(), Intrinsic::memmove,
697                                         ParamTypes);
698       return true;
699     }
700     if (Name.startswith("memset.") && F->arg_size() == 5) {
701       rename(F);
702       // Get the types of dest, and len
703       const auto *FT = F->getFunctionType();
704       Type *ParamTypes[2] = {
705           FT->getParamType(0), // Dest
706           FT->getParamType(2)  // len
707       };
708       NewFn = Intrinsic::getDeclaration(F->getParent(), Intrinsic::memset,
709                                         ParamTypes);
710       return true;
711     }
712     break;
713   }
714   case 'n': {
715     if (Name.startswith("nvvm.")) {
716       Name = Name.substr(5);
717 
718       // The following nvvm intrinsics correspond exactly to an LLVM intrinsic.
719       Intrinsic::ID IID = StringSwitch<Intrinsic::ID>(Name)
720                               .Cases("brev32", "brev64", Intrinsic::bitreverse)
721                               .Case("clz.i", Intrinsic::ctlz)
722                               .Case("popc.i", Intrinsic::ctpop)
723                               .Default(Intrinsic::not_intrinsic);
724       if (IID != Intrinsic::not_intrinsic && F->arg_size() == 1) {
725         NewFn = Intrinsic::getDeclaration(F->getParent(), IID,
726                                           {F->getReturnType()});
727         return true;
728       }
729 
730       // The following nvvm intrinsics correspond exactly to an LLVM idiom, but
731       // not to an intrinsic alone.  We expand them in UpgradeIntrinsicCall.
732       //
733       // TODO: We could add lohi.i2d.
734       bool Expand = StringSwitch<bool>(Name)
735                         .Cases("abs.i", "abs.ll", true)
736                         .Cases("clz.ll", "popc.ll", "h2f", true)
737                         .Cases("max.i", "max.ll", "max.ui", "max.ull", true)
738                         .Cases("min.i", "min.ll", "min.ui", "min.ull", true)
739                         .Default(false);
740       if (Expand) {
741         NewFn = nullptr;
742         return true;
743       }
744     }
745     break;
746   }
747   case 'o':
748     // We only need to change the name to match the mangling including the
749     // address space.
750     if (Name.startswith("objectsize.")) {
751       Type *Tys[2] = { F->getReturnType(), F->arg_begin()->getType() };
752       if (F->arg_size() == 2 || F->arg_size() == 3 ||
753           F->getName() != Intrinsic::getName(Intrinsic::objectsize, Tys)) {
754         rename(F);
755         NewFn = Intrinsic::getDeclaration(F->getParent(), Intrinsic::objectsize,
756                                           Tys);
757         return true;
758       }
759     }
760     break;
761 
762   case 's':
763     if (Name == "stackprotectorcheck") {
764       NewFn = nullptr;
765       return true;
766     }
767     break;
768 
769   case 'x':
770     if (UpgradeX86IntrinsicFunction(F, Name, NewFn))
771       return true;
772   }
773   // Remangle our intrinsic since we upgrade the mangling
774   auto Result = llvm::Intrinsic::remangleIntrinsicFunction(F);
775   if (Result != None) {
776     NewFn = Result.getValue();
777     return true;
778   }
779 
780   //  This may not belong here. This function is effectively being overloaded
781   //  to both detect an intrinsic which needs upgrading, and to provide the
782   //  upgraded form of the intrinsic. We should perhaps have two separate
783   //  functions for this.
784   return false;
785 }
786 
787 bool llvm::UpgradeIntrinsicFunction(Function *F, Function *&NewFn) {
788   NewFn = nullptr;
789   bool Upgraded = UpgradeIntrinsicFunction1(F, NewFn);
790   assert(F != NewFn && "Intrinsic function upgraded to the same function");
791 
792   // Upgrade intrinsic attributes.  This does not change the function.
793   if (NewFn)
794     F = NewFn;
795   if (Intrinsic::ID id = F->getIntrinsicID())
796     F->setAttributes(Intrinsic::getAttributes(F->getContext(), id));
797   return Upgraded;
798 }
799 
800 bool llvm::UpgradeGlobalVariable(GlobalVariable *GV) {
801   // Nothing to do yet.
802   return false;
803 }
804 
805 // Handles upgrading SSE2/AVX2/AVX512BW PSLLDQ intrinsics by converting them
806 // to byte shuffles.
807 static Value *UpgradeX86PSLLDQIntrinsics(IRBuilder<> &Builder,
808                                          Value *Op, unsigned Shift) {
809   Type *ResultTy = Op->getType();
810   unsigned NumElts = ResultTy->getVectorNumElements() * 8;
811 
812   // Bitcast from a 64-bit element type to a byte element type.
813   Type *VecTy = VectorType::get(Builder.getInt8Ty(), NumElts);
814   Op = Builder.CreateBitCast(Op, VecTy, "cast");
815 
816   // We'll be shuffling in zeroes.
817   Value *Res = Constant::getNullValue(VecTy);
818 
819   // If shift is less than 16, emit a shuffle to move the bytes. Otherwise,
820   // we'll just return the zero vector.
821   if (Shift < 16) {
822     uint32_t Idxs[64];
823     // 256/512-bit version is split into 2/4 16-byte lanes.
824     for (unsigned l = 0; l != NumElts; l += 16)
825       for (unsigned i = 0; i != 16; ++i) {
826         unsigned Idx = NumElts + i - Shift;
827         if (Idx < NumElts)
828           Idx -= NumElts - 16; // end of lane, switch operand.
829         Idxs[l + i] = Idx + l;
830       }
831 
832     Res = Builder.CreateShuffleVector(Res, Op, makeArrayRef(Idxs, NumElts));
833   }
834 
835   // Bitcast back to a 64-bit element type.
836   return Builder.CreateBitCast(Res, ResultTy, "cast");
837 }
838 
839 // Handles upgrading SSE2/AVX2/AVX512BW PSRLDQ intrinsics by converting them
840 // to byte shuffles.
841 static Value *UpgradeX86PSRLDQIntrinsics(IRBuilder<> &Builder, Value *Op,
842                                          unsigned Shift) {
843   Type *ResultTy = Op->getType();
844   unsigned NumElts = ResultTy->getVectorNumElements() * 8;
845 
846   // Bitcast from a 64-bit element type to a byte element type.
847   Type *VecTy = VectorType::get(Builder.getInt8Ty(), NumElts);
848   Op = Builder.CreateBitCast(Op, VecTy, "cast");
849 
850   // We'll be shuffling in zeroes.
851   Value *Res = Constant::getNullValue(VecTy);
852 
853   // If shift is less than 16, emit a shuffle to move the bytes. Otherwise,
854   // we'll just return the zero vector.
855   if (Shift < 16) {
856     uint32_t Idxs[64];
857     // 256/512-bit version is split into 2/4 16-byte lanes.
858     for (unsigned l = 0; l != NumElts; l += 16)
859       for (unsigned i = 0; i != 16; ++i) {
860         unsigned Idx = i + Shift;
861         if (Idx >= 16)
862           Idx += NumElts - 16; // end of lane, switch operand.
863         Idxs[l + i] = Idx + l;
864       }
865 
866     Res = Builder.CreateShuffleVector(Op, Res, makeArrayRef(Idxs, NumElts));
867   }
868 
869   // Bitcast back to a 64-bit element type.
870   return Builder.CreateBitCast(Res, ResultTy, "cast");
871 }
872 
873 static Value *getX86MaskVec(IRBuilder<> &Builder, Value *Mask,
874                             unsigned NumElts) {
875   llvm::VectorType *MaskTy = llvm::VectorType::get(Builder.getInt1Ty(),
876                              cast<IntegerType>(Mask->getType())->getBitWidth());
877   Mask = Builder.CreateBitCast(Mask, MaskTy);
878 
879   // If we have less than 8 elements, then the starting mask was an i8 and
880   // we need to extract down to the right number of elements.
881   if (NumElts < 8) {
882     uint32_t Indices[4];
883     for (unsigned i = 0; i != NumElts; ++i)
884       Indices[i] = i;
885     Mask = Builder.CreateShuffleVector(Mask, Mask,
886                                        makeArrayRef(Indices, NumElts),
887                                        "extract");
888   }
889 
890   return Mask;
891 }
892 
893 static Value *EmitX86Select(IRBuilder<> &Builder, Value *Mask,
894                             Value *Op0, Value *Op1) {
895   // If the mask is all ones just emit the first operation.
896   if (const auto *C = dyn_cast<Constant>(Mask))
897     if (C->isAllOnesValue())
898       return Op0;
899 
900   Mask = getX86MaskVec(Builder, Mask, Op0->getType()->getVectorNumElements());
901   return Builder.CreateSelect(Mask, Op0, Op1);
902 }
903 
904 static Value *EmitX86ScalarSelect(IRBuilder<> &Builder, Value *Mask,
905                                   Value *Op0, Value *Op1) {
906   // If the mask is all ones just emit the first operation.
907   if (const auto *C = dyn_cast<Constant>(Mask))
908     if (C->isAllOnesValue())
909       return Op0;
910 
911   llvm::VectorType *MaskTy =
912     llvm::VectorType::get(Builder.getInt1Ty(),
913                           Mask->getType()->getIntegerBitWidth());
914   Mask = Builder.CreateBitCast(Mask, MaskTy);
915   Mask = Builder.CreateExtractElement(Mask, (uint64_t)0);
916   return Builder.CreateSelect(Mask, Op0, Op1);
917 }
918 
919 // Handle autoupgrade for masked PALIGNR and VALIGND/Q intrinsics.
920 // PALIGNR handles large immediates by shifting while VALIGN masks the immediate
921 // so we need to handle both cases. VALIGN also doesn't have 128-bit lanes.
922 static Value *UpgradeX86ALIGNIntrinsics(IRBuilder<> &Builder, Value *Op0,
923                                         Value *Op1, Value *Shift,
924                                         Value *Passthru, Value *Mask,
925                                         bool IsVALIGN) {
926   unsigned ShiftVal = cast<llvm::ConstantInt>(Shift)->getZExtValue();
927 
928   unsigned NumElts = Op0->getType()->getVectorNumElements();
929   assert((IsVALIGN || NumElts % 16 == 0) && "Illegal NumElts for PALIGNR!");
930   assert((!IsVALIGN || NumElts <= 16) && "NumElts too large for VALIGN!");
931   assert(isPowerOf2_32(NumElts) && "NumElts not a power of 2!");
932 
933   // Mask the immediate for VALIGN.
934   if (IsVALIGN)
935     ShiftVal &= (NumElts - 1);
936 
937   // If palignr is shifting the pair of vectors more than the size of two
938   // lanes, emit zero.
939   if (ShiftVal >= 32)
940     return llvm::Constant::getNullValue(Op0->getType());
941 
942   // If palignr is shifting the pair of input vectors more than one lane,
943   // but less than two lanes, convert to shifting in zeroes.
944   if (ShiftVal > 16) {
945     ShiftVal -= 16;
946     Op1 = Op0;
947     Op0 = llvm::Constant::getNullValue(Op0->getType());
948   }
949 
950   uint32_t Indices[64];
951   // 256-bit palignr operates on 128-bit lanes so we need to handle that
952   for (unsigned l = 0; l < NumElts; l += 16) {
953     for (unsigned i = 0; i != 16; ++i) {
954       unsigned Idx = ShiftVal + i;
955       if (!IsVALIGN && Idx >= 16) // Disable wrap for VALIGN.
956         Idx += NumElts - 16; // End of lane, switch operand.
957       Indices[l + i] = Idx + l;
958     }
959   }
960 
961   Value *Align = Builder.CreateShuffleVector(Op1, Op0,
962                                              makeArrayRef(Indices, NumElts),
963                                              "palignr");
964 
965   return EmitX86Select(Builder, Mask, Align, Passthru);
966 }
967 
968 static Value *UpgradeX86VPERMT2Intrinsics(IRBuilder<> &Builder, CallInst &CI,
969                                           bool ZeroMask, bool IndexForm) {
970   Type *Ty = CI.getType();
971   unsigned VecWidth = Ty->getPrimitiveSizeInBits();
972   unsigned EltWidth = Ty->getScalarSizeInBits();
973   bool IsFloat = Ty->isFPOrFPVectorTy();
974   Intrinsic::ID IID;
975   if (VecWidth == 128 && EltWidth == 32 && IsFloat)
976     IID = Intrinsic::x86_avx512_vpermi2var_ps_128;
977   else if (VecWidth == 128 && EltWidth == 32 && !IsFloat)
978     IID = Intrinsic::x86_avx512_vpermi2var_d_128;
979   else if (VecWidth == 128 && EltWidth == 64 && IsFloat)
980     IID = Intrinsic::x86_avx512_vpermi2var_pd_128;
981   else if (VecWidth == 128 && EltWidth == 64 && !IsFloat)
982     IID = Intrinsic::x86_avx512_vpermi2var_q_128;
983   else if (VecWidth == 256 && EltWidth == 32 && IsFloat)
984     IID = Intrinsic::x86_avx512_vpermi2var_ps_256;
985   else if (VecWidth == 256 && EltWidth == 32 && !IsFloat)
986     IID = Intrinsic::x86_avx512_vpermi2var_d_256;
987   else if (VecWidth == 256 && EltWidth == 64 && IsFloat)
988     IID = Intrinsic::x86_avx512_vpermi2var_pd_256;
989   else if (VecWidth == 256 && EltWidth == 64 && !IsFloat)
990     IID = Intrinsic::x86_avx512_vpermi2var_q_256;
991   else if (VecWidth == 512 && EltWidth == 32 && IsFloat)
992     IID = Intrinsic::x86_avx512_vpermi2var_ps_512;
993   else if (VecWidth == 512 && EltWidth == 32 && !IsFloat)
994     IID = Intrinsic::x86_avx512_vpermi2var_d_512;
995   else if (VecWidth == 512 && EltWidth == 64 && IsFloat)
996     IID = Intrinsic::x86_avx512_vpermi2var_pd_512;
997   else if (VecWidth == 512 && EltWidth == 64 && !IsFloat)
998     IID = Intrinsic::x86_avx512_vpermi2var_q_512;
999   else if (VecWidth == 128 && EltWidth == 16)
1000     IID = Intrinsic::x86_avx512_vpermi2var_hi_128;
1001   else if (VecWidth == 256 && EltWidth == 16)
1002     IID = Intrinsic::x86_avx512_vpermi2var_hi_256;
1003   else if (VecWidth == 512 && EltWidth == 16)
1004     IID = Intrinsic::x86_avx512_vpermi2var_hi_512;
1005   else if (VecWidth == 128 && EltWidth == 8)
1006     IID = Intrinsic::x86_avx512_vpermi2var_qi_128;
1007   else if (VecWidth == 256 && EltWidth == 8)
1008     IID = Intrinsic::x86_avx512_vpermi2var_qi_256;
1009   else if (VecWidth == 512 && EltWidth == 8)
1010     IID = Intrinsic::x86_avx512_vpermi2var_qi_512;
1011   else
1012     llvm_unreachable("Unexpected intrinsic");
1013 
1014   Value *Args[] = { CI.getArgOperand(0) , CI.getArgOperand(1),
1015                     CI.getArgOperand(2) };
1016 
1017   // If this isn't index form we need to swap operand 0 and 1.
1018   if (!IndexForm)
1019     std::swap(Args[0], Args[1]);
1020 
1021   Value *V = Builder.CreateCall(Intrinsic::getDeclaration(CI.getModule(), IID),
1022                                 Args);
1023   Value *PassThru = ZeroMask ? ConstantAggregateZero::get(Ty)
1024                              : Builder.CreateBitCast(CI.getArgOperand(1),
1025                                                      Ty);
1026   return EmitX86Select(Builder, CI.getArgOperand(3), V, PassThru);
1027 }
1028 
1029 static Value *UpgradeX86AddSubSatIntrinsics(IRBuilder<> &Builder, CallInst &CI,
1030                                             bool IsSigned, bool IsAddition) {
1031   Type *Ty = CI.getType();
1032   Value *Op0 = CI.getOperand(0);
1033   Value *Op1 = CI.getOperand(1);
1034 
1035   Intrinsic::ID IID =
1036       IsSigned ? (IsAddition ? Intrinsic::sadd_sat : Intrinsic::ssub_sat)
1037                : (IsAddition ? Intrinsic::uadd_sat : Intrinsic::usub_sat);
1038   Function *Intrin = Intrinsic::getDeclaration(CI.getModule(), IID, Ty);
1039   Value *Res = Builder.CreateCall(Intrin, {Op0, Op1});
1040 
1041   if (CI.getNumArgOperands() == 4) { // For masked intrinsics.
1042     Value *VecSrc = CI.getOperand(2);
1043     Value *Mask = CI.getOperand(3);
1044     Res = EmitX86Select(Builder, Mask, Res, VecSrc);
1045   }
1046   return Res;
1047 }
1048 
1049 static Value *upgradeX86Rotate(IRBuilder<> &Builder, CallInst &CI,
1050                                bool IsRotateRight) {
1051   Type *Ty = CI.getType();
1052   Value *Src = CI.getArgOperand(0);
1053   Value *Amt = CI.getArgOperand(1);
1054 
1055   // Amount may be scalar immediate, in which case create a splat vector.
1056   // Funnel shifts amounts are treated as modulo and types are all power-of-2 so
1057   // we only care about the lowest log2 bits anyway.
1058   if (Amt->getType() != Ty) {
1059     unsigned NumElts = Ty->getVectorNumElements();
1060     Amt = Builder.CreateIntCast(Amt, Ty->getScalarType(), false);
1061     Amt = Builder.CreateVectorSplat(NumElts, Amt);
1062   }
1063 
1064   Intrinsic::ID IID = IsRotateRight ? Intrinsic::fshr : Intrinsic::fshl;
1065   Function *Intrin = Intrinsic::getDeclaration(CI.getModule(), IID, Ty);
1066   Value *Res = Builder.CreateCall(Intrin, {Src, Src, Amt});
1067 
1068   if (CI.getNumArgOperands() == 4) { // For masked intrinsics.
1069     Value *VecSrc = CI.getOperand(2);
1070     Value *Mask = CI.getOperand(3);
1071     Res = EmitX86Select(Builder, Mask, Res, VecSrc);
1072   }
1073   return Res;
1074 }
1075 
1076 static Value *upgradeX86vpcom(IRBuilder<> &Builder, CallInst &CI, unsigned Imm,
1077                               bool IsSigned) {
1078   Type *Ty = CI.getType();
1079   Value *LHS = CI.getArgOperand(0);
1080   Value *RHS = CI.getArgOperand(1);
1081 
1082   CmpInst::Predicate Pred;
1083   switch (Imm) {
1084   case 0x0:
1085     Pred = IsSigned ? ICmpInst::ICMP_SLT : ICmpInst::ICMP_ULT;
1086     break;
1087   case 0x1:
1088     Pred = IsSigned ? ICmpInst::ICMP_SLE : ICmpInst::ICMP_ULE;
1089     break;
1090   case 0x2:
1091     Pred = IsSigned ? ICmpInst::ICMP_SGT : ICmpInst::ICMP_UGT;
1092     break;
1093   case 0x3:
1094     Pred = IsSigned ? ICmpInst::ICMP_SGE : ICmpInst::ICMP_UGE;
1095     break;
1096   case 0x4:
1097     Pred = ICmpInst::ICMP_EQ;
1098     break;
1099   case 0x5:
1100     Pred = ICmpInst::ICMP_NE;
1101     break;
1102   case 0x6:
1103     return Constant::getNullValue(Ty); // FALSE
1104   case 0x7:
1105     return Constant::getAllOnesValue(Ty); // TRUE
1106   default:
1107     llvm_unreachable("Unknown XOP vpcom/vpcomu predicate");
1108   }
1109 
1110   Value *Cmp = Builder.CreateICmp(Pred, LHS, RHS);
1111   Value *Ext = Builder.CreateSExt(Cmp, Ty);
1112   return Ext;
1113 }
1114 
1115 static Value *upgradeX86ConcatShift(IRBuilder<> &Builder, CallInst &CI,
1116                                     bool IsShiftRight, bool ZeroMask) {
1117   Type *Ty = CI.getType();
1118   Value *Op0 = CI.getArgOperand(0);
1119   Value *Op1 = CI.getArgOperand(1);
1120   Value *Amt = CI.getArgOperand(2);
1121 
1122   if (IsShiftRight)
1123     std::swap(Op0, Op1);
1124 
1125   // Amount may be scalar immediate, in which case create a splat vector.
1126   // Funnel shifts amounts are treated as modulo and types are all power-of-2 so
1127   // we only care about the lowest log2 bits anyway.
1128   if (Amt->getType() != Ty) {
1129     unsigned NumElts = Ty->getVectorNumElements();
1130     Amt = Builder.CreateIntCast(Amt, Ty->getScalarType(), false);
1131     Amt = Builder.CreateVectorSplat(NumElts, Amt);
1132   }
1133 
1134   Intrinsic::ID IID = IsShiftRight ? Intrinsic::fshr : Intrinsic::fshl;
1135   Function *Intrin = Intrinsic::getDeclaration(CI.getModule(), IID, Ty);
1136   Value *Res = Builder.CreateCall(Intrin, {Op0, Op1, Amt});
1137 
1138   unsigned NumArgs = CI.getNumArgOperands();
1139   if (NumArgs >= 4) { // For masked intrinsics.
1140     Value *VecSrc = NumArgs == 5 ? CI.getArgOperand(3) :
1141                     ZeroMask     ? ConstantAggregateZero::get(CI.getType()) :
1142                                    CI.getArgOperand(0);
1143     Value *Mask = CI.getOperand(NumArgs - 1);
1144     Res = EmitX86Select(Builder, Mask, Res, VecSrc);
1145   }
1146   return Res;
1147 }
1148 
1149 static Value *UpgradeMaskedStore(IRBuilder<> &Builder,
1150                                  Value *Ptr, Value *Data, Value *Mask,
1151                                  bool Aligned) {
1152   // Cast the pointer to the right type.
1153   Ptr = Builder.CreateBitCast(Ptr,
1154                               llvm::PointerType::getUnqual(Data->getType()));
1155   unsigned Align =
1156     Aligned ? cast<VectorType>(Data->getType())->getBitWidth() / 8 : 1;
1157 
1158   // If the mask is all ones just emit a regular store.
1159   if (const auto *C = dyn_cast<Constant>(Mask))
1160     if (C->isAllOnesValue())
1161       return Builder.CreateAlignedStore(Data, Ptr, Align);
1162 
1163   // Convert the mask from an integer type to a vector of i1.
1164   unsigned NumElts = Data->getType()->getVectorNumElements();
1165   Mask = getX86MaskVec(Builder, Mask, NumElts);
1166   return Builder.CreateMaskedStore(Data, Ptr, Align, Mask);
1167 }
1168 
1169 static Value *UpgradeMaskedLoad(IRBuilder<> &Builder,
1170                                 Value *Ptr, Value *Passthru, Value *Mask,
1171                                 bool Aligned) {
1172   Type *ValTy = Passthru->getType();
1173   // Cast the pointer to the right type.
1174   Ptr = Builder.CreateBitCast(Ptr, llvm::PointerType::getUnqual(ValTy));
1175   unsigned Align =
1176     Aligned ? cast<VectorType>(Passthru->getType())->getBitWidth() / 8 : 1;
1177 
1178   // If the mask is all ones just emit a regular store.
1179   if (const auto *C = dyn_cast<Constant>(Mask))
1180     if (C->isAllOnesValue())
1181       return Builder.CreateAlignedLoad(ValTy, Ptr, Align);
1182 
1183   // Convert the mask from an integer type to a vector of i1.
1184   unsigned NumElts = Passthru->getType()->getVectorNumElements();
1185   Mask = getX86MaskVec(Builder, Mask, NumElts);
1186   return Builder.CreateMaskedLoad(Ptr, Align, Mask, Passthru);
1187 }
1188 
1189 static Value *upgradeAbs(IRBuilder<> &Builder, CallInst &CI) {
1190   Value *Op0 = CI.getArgOperand(0);
1191   llvm::Type *Ty = Op0->getType();
1192   Value *Zero = llvm::Constant::getNullValue(Ty);
1193   Value *Cmp = Builder.CreateICmp(ICmpInst::ICMP_SGT, Op0, Zero);
1194   Value *Neg = Builder.CreateNeg(Op0);
1195   Value *Res = Builder.CreateSelect(Cmp, Op0, Neg);
1196 
1197   if (CI.getNumArgOperands() == 3)
1198     Res = EmitX86Select(Builder,CI.getArgOperand(2), Res, CI.getArgOperand(1));
1199 
1200   return Res;
1201 }
1202 
1203 static Value *upgradeIntMinMax(IRBuilder<> &Builder, CallInst &CI,
1204                                ICmpInst::Predicate Pred) {
1205   Value *Op0 = CI.getArgOperand(0);
1206   Value *Op1 = CI.getArgOperand(1);
1207   Value *Cmp = Builder.CreateICmp(Pred, Op0, Op1);
1208   Value *Res = Builder.CreateSelect(Cmp, Op0, Op1);
1209 
1210   if (CI.getNumArgOperands() == 4)
1211     Res = EmitX86Select(Builder, CI.getArgOperand(3), Res, CI.getArgOperand(2));
1212 
1213   return Res;
1214 }
1215 
1216 static Value *upgradePMULDQ(IRBuilder<> &Builder, CallInst &CI, bool IsSigned) {
1217   Type *Ty = CI.getType();
1218 
1219   // Arguments have a vXi32 type so cast to vXi64.
1220   Value *LHS = Builder.CreateBitCast(CI.getArgOperand(0), Ty);
1221   Value *RHS = Builder.CreateBitCast(CI.getArgOperand(1), Ty);
1222 
1223   if (IsSigned) {
1224     // Shift left then arithmetic shift right.
1225     Constant *ShiftAmt = ConstantInt::get(Ty, 32);
1226     LHS = Builder.CreateShl(LHS, ShiftAmt);
1227     LHS = Builder.CreateAShr(LHS, ShiftAmt);
1228     RHS = Builder.CreateShl(RHS, ShiftAmt);
1229     RHS = Builder.CreateAShr(RHS, ShiftAmt);
1230   } else {
1231     // Clear the upper bits.
1232     Constant *Mask = ConstantInt::get(Ty, 0xffffffff);
1233     LHS = Builder.CreateAnd(LHS, Mask);
1234     RHS = Builder.CreateAnd(RHS, Mask);
1235   }
1236 
1237   Value *Res = Builder.CreateMul(LHS, RHS);
1238 
1239   if (CI.getNumArgOperands() == 4)
1240     Res = EmitX86Select(Builder, CI.getArgOperand(3), Res, CI.getArgOperand(2));
1241 
1242   return Res;
1243 }
1244 
1245 // Applying mask on vector of i1's and make sure result is at least 8 bits wide.
1246 static Value *ApplyX86MaskOn1BitsVec(IRBuilder<> &Builder, Value *Vec,
1247                                      Value *Mask) {
1248   unsigned NumElts = Vec->getType()->getVectorNumElements();
1249   if (Mask) {
1250     const auto *C = dyn_cast<Constant>(Mask);
1251     if (!C || !C->isAllOnesValue())
1252       Vec = Builder.CreateAnd(Vec, getX86MaskVec(Builder, Mask, NumElts));
1253   }
1254 
1255   if (NumElts < 8) {
1256     uint32_t Indices[8];
1257     for (unsigned i = 0; i != NumElts; ++i)
1258       Indices[i] = i;
1259     for (unsigned i = NumElts; i != 8; ++i)
1260       Indices[i] = NumElts + i % NumElts;
1261     Vec = Builder.CreateShuffleVector(Vec,
1262                                       Constant::getNullValue(Vec->getType()),
1263                                       Indices);
1264   }
1265   return Builder.CreateBitCast(Vec, Builder.getIntNTy(std::max(NumElts, 8U)));
1266 }
1267 
1268 static Value *upgradeMaskedCompare(IRBuilder<> &Builder, CallInst &CI,
1269                                    unsigned CC, bool Signed) {
1270   Value *Op0 = CI.getArgOperand(0);
1271   unsigned NumElts = Op0->getType()->getVectorNumElements();
1272 
1273   Value *Cmp;
1274   if (CC == 3) {
1275     Cmp = Constant::getNullValue(llvm::VectorType::get(Builder.getInt1Ty(), NumElts));
1276   } else if (CC == 7) {
1277     Cmp = Constant::getAllOnesValue(llvm::VectorType::get(Builder.getInt1Ty(), NumElts));
1278   } else {
1279     ICmpInst::Predicate Pred;
1280     switch (CC) {
1281     default: llvm_unreachable("Unknown condition code");
1282     case 0: Pred = ICmpInst::ICMP_EQ;  break;
1283     case 1: Pred = Signed ? ICmpInst::ICMP_SLT : ICmpInst::ICMP_ULT; break;
1284     case 2: Pred = Signed ? ICmpInst::ICMP_SLE : ICmpInst::ICMP_ULE; break;
1285     case 4: Pred = ICmpInst::ICMP_NE;  break;
1286     case 5: Pred = Signed ? ICmpInst::ICMP_SGE : ICmpInst::ICMP_UGE; break;
1287     case 6: Pred = Signed ? ICmpInst::ICMP_SGT : ICmpInst::ICMP_UGT; break;
1288     }
1289     Cmp = Builder.CreateICmp(Pred, Op0, CI.getArgOperand(1));
1290   }
1291 
1292   Value *Mask = CI.getArgOperand(CI.getNumArgOperands() - 1);
1293 
1294   return ApplyX86MaskOn1BitsVec(Builder, Cmp, Mask);
1295 }
1296 
1297 // Replace a masked intrinsic with an older unmasked intrinsic.
1298 static Value *UpgradeX86MaskedShift(IRBuilder<> &Builder, CallInst &CI,
1299                                     Intrinsic::ID IID) {
1300   Function *Intrin = Intrinsic::getDeclaration(CI.getModule(), IID);
1301   Value *Rep = Builder.CreateCall(Intrin,
1302                                  { CI.getArgOperand(0), CI.getArgOperand(1) });
1303   return EmitX86Select(Builder, CI.getArgOperand(3), Rep, CI.getArgOperand(2));
1304 }
1305 
1306 static Value* upgradeMaskedMove(IRBuilder<> &Builder, CallInst &CI) {
1307   Value* A = CI.getArgOperand(0);
1308   Value* B = CI.getArgOperand(1);
1309   Value* Src = CI.getArgOperand(2);
1310   Value* Mask = CI.getArgOperand(3);
1311 
1312   Value* AndNode = Builder.CreateAnd(Mask, APInt(8, 1));
1313   Value* Cmp = Builder.CreateIsNotNull(AndNode);
1314   Value* Extract1 = Builder.CreateExtractElement(B, (uint64_t)0);
1315   Value* Extract2 = Builder.CreateExtractElement(Src, (uint64_t)0);
1316   Value* Select = Builder.CreateSelect(Cmp, Extract1, Extract2);
1317   return Builder.CreateInsertElement(A, Select, (uint64_t)0);
1318 }
1319 
1320 
1321 static Value* UpgradeMaskToInt(IRBuilder<> &Builder, CallInst &CI) {
1322   Value* Op = CI.getArgOperand(0);
1323   Type* ReturnOp = CI.getType();
1324   unsigned NumElts = CI.getType()->getVectorNumElements();
1325   Value *Mask = getX86MaskVec(Builder, Op, NumElts);
1326   return Builder.CreateSExt(Mask, ReturnOp, "vpmovm2");
1327 }
1328 
1329 // Replace intrinsic with unmasked version and a select.
1330 static bool upgradeAVX512MaskToSelect(StringRef Name, IRBuilder<> &Builder,
1331                                       CallInst &CI, Value *&Rep) {
1332   Name = Name.substr(12); // Remove avx512.mask.
1333 
1334   unsigned VecWidth = CI.getType()->getPrimitiveSizeInBits();
1335   unsigned EltWidth = CI.getType()->getScalarSizeInBits();
1336   Intrinsic::ID IID;
1337   if (Name.startswith("max.p")) {
1338     if (VecWidth == 128 && EltWidth == 32)
1339       IID = Intrinsic::x86_sse_max_ps;
1340     else if (VecWidth == 128 && EltWidth == 64)
1341       IID = Intrinsic::x86_sse2_max_pd;
1342     else if (VecWidth == 256 && EltWidth == 32)
1343       IID = Intrinsic::x86_avx_max_ps_256;
1344     else if (VecWidth == 256 && EltWidth == 64)
1345       IID = Intrinsic::x86_avx_max_pd_256;
1346     else
1347       llvm_unreachable("Unexpected intrinsic");
1348   } else if (Name.startswith("min.p")) {
1349     if (VecWidth == 128 && EltWidth == 32)
1350       IID = Intrinsic::x86_sse_min_ps;
1351     else if (VecWidth == 128 && EltWidth == 64)
1352       IID = Intrinsic::x86_sse2_min_pd;
1353     else if (VecWidth == 256 && EltWidth == 32)
1354       IID = Intrinsic::x86_avx_min_ps_256;
1355     else if (VecWidth == 256 && EltWidth == 64)
1356       IID = Intrinsic::x86_avx_min_pd_256;
1357     else
1358       llvm_unreachable("Unexpected intrinsic");
1359   } else if (Name.startswith("pshuf.b.")) {
1360     if (VecWidth == 128)
1361       IID = Intrinsic::x86_ssse3_pshuf_b_128;
1362     else if (VecWidth == 256)
1363       IID = Intrinsic::x86_avx2_pshuf_b;
1364     else if (VecWidth == 512)
1365       IID = Intrinsic::x86_avx512_pshuf_b_512;
1366     else
1367       llvm_unreachable("Unexpected intrinsic");
1368   } else if (Name.startswith("pmul.hr.sw.")) {
1369     if (VecWidth == 128)
1370       IID = Intrinsic::x86_ssse3_pmul_hr_sw_128;
1371     else if (VecWidth == 256)
1372       IID = Intrinsic::x86_avx2_pmul_hr_sw;
1373     else if (VecWidth == 512)
1374       IID = Intrinsic::x86_avx512_pmul_hr_sw_512;
1375     else
1376       llvm_unreachable("Unexpected intrinsic");
1377   } else if (Name.startswith("pmulh.w.")) {
1378     if (VecWidth == 128)
1379       IID = Intrinsic::x86_sse2_pmulh_w;
1380     else if (VecWidth == 256)
1381       IID = Intrinsic::x86_avx2_pmulh_w;
1382     else if (VecWidth == 512)
1383       IID = Intrinsic::x86_avx512_pmulh_w_512;
1384     else
1385       llvm_unreachable("Unexpected intrinsic");
1386   } else if (Name.startswith("pmulhu.w.")) {
1387     if (VecWidth == 128)
1388       IID = Intrinsic::x86_sse2_pmulhu_w;
1389     else if (VecWidth == 256)
1390       IID = Intrinsic::x86_avx2_pmulhu_w;
1391     else if (VecWidth == 512)
1392       IID = Intrinsic::x86_avx512_pmulhu_w_512;
1393     else
1394       llvm_unreachable("Unexpected intrinsic");
1395   } else if (Name.startswith("pmaddw.d.")) {
1396     if (VecWidth == 128)
1397       IID = Intrinsic::x86_sse2_pmadd_wd;
1398     else if (VecWidth == 256)
1399       IID = Intrinsic::x86_avx2_pmadd_wd;
1400     else if (VecWidth == 512)
1401       IID = Intrinsic::x86_avx512_pmaddw_d_512;
1402     else
1403       llvm_unreachable("Unexpected intrinsic");
1404   } else if (Name.startswith("pmaddubs.w.")) {
1405     if (VecWidth == 128)
1406       IID = Intrinsic::x86_ssse3_pmadd_ub_sw_128;
1407     else if (VecWidth == 256)
1408       IID = Intrinsic::x86_avx2_pmadd_ub_sw;
1409     else if (VecWidth == 512)
1410       IID = Intrinsic::x86_avx512_pmaddubs_w_512;
1411     else
1412       llvm_unreachable("Unexpected intrinsic");
1413   } else if (Name.startswith("packsswb.")) {
1414     if (VecWidth == 128)
1415       IID = Intrinsic::x86_sse2_packsswb_128;
1416     else if (VecWidth == 256)
1417       IID = Intrinsic::x86_avx2_packsswb;
1418     else if (VecWidth == 512)
1419       IID = Intrinsic::x86_avx512_packsswb_512;
1420     else
1421       llvm_unreachable("Unexpected intrinsic");
1422   } else if (Name.startswith("packssdw.")) {
1423     if (VecWidth == 128)
1424       IID = Intrinsic::x86_sse2_packssdw_128;
1425     else if (VecWidth == 256)
1426       IID = Intrinsic::x86_avx2_packssdw;
1427     else if (VecWidth == 512)
1428       IID = Intrinsic::x86_avx512_packssdw_512;
1429     else
1430       llvm_unreachable("Unexpected intrinsic");
1431   } else if (Name.startswith("packuswb.")) {
1432     if (VecWidth == 128)
1433       IID = Intrinsic::x86_sse2_packuswb_128;
1434     else if (VecWidth == 256)
1435       IID = Intrinsic::x86_avx2_packuswb;
1436     else if (VecWidth == 512)
1437       IID = Intrinsic::x86_avx512_packuswb_512;
1438     else
1439       llvm_unreachable("Unexpected intrinsic");
1440   } else if (Name.startswith("packusdw.")) {
1441     if (VecWidth == 128)
1442       IID = Intrinsic::x86_sse41_packusdw;
1443     else if (VecWidth == 256)
1444       IID = Intrinsic::x86_avx2_packusdw;
1445     else if (VecWidth == 512)
1446       IID = Intrinsic::x86_avx512_packusdw_512;
1447     else
1448       llvm_unreachable("Unexpected intrinsic");
1449   } else if (Name.startswith("vpermilvar.")) {
1450     if (VecWidth == 128 && EltWidth == 32)
1451       IID = Intrinsic::x86_avx_vpermilvar_ps;
1452     else if (VecWidth == 128 && EltWidth == 64)
1453       IID = Intrinsic::x86_avx_vpermilvar_pd;
1454     else if (VecWidth == 256 && EltWidth == 32)
1455       IID = Intrinsic::x86_avx_vpermilvar_ps_256;
1456     else if (VecWidth == 256 && EltWidth == 64)
1457       IID = Intrinsic::x86_avx_vpermilvar_pd_256;
1458     else if (VecWidth == 512 && EltWidth == 32)
1459       IID = Intrinsic::x86_avx512_vpermilvar_ps_512;
1460     else if (VecWidth == 512 && EltWidth == 64)
1461       IID = Intrinsic::x86_avx512_vpermilvar_pd_512;
1462     else
1463       llvm_unreachable("Unexpected intrinsic");
1464   } else if (Name == "cvtpd2dq.256") {
1465     IID = Intrinsic::x86_avx_cvt_pd2dq_256;
1466   } else if (Name == "cvtpd2ps.256") {
1467     IID = Intrinsic::x86_avx_cvt_pd2_ps_256;
1468   } else if (Name == "cvttpd2dq.256") {
1469     IID = Intrinsic::x86_avx_cvtt_pd2dq_256;
1470   } else if (Name == "cvttps2dq.128") {
1471     IID = Intrinsic::x86_sse2_cvttps2dq;
1472   } else if (Name == "cvttps2dq.256") {
1473     IID = Intrinsic::x86_avx_cvtt_ps2dq_256;
1474   } else if (Name.startswith("permvar.")) {
1475     bool IsFloat = CI.getType()->isFPOrFPVectorTy();
1476     if (VecWidth == 256 && EltWidth == 32 && IsFloat)
1477       IID = Intrinsic::x86_avx2_permps;
1478     else if (VecWidth == 256 && EltWidth == 32 && !IsFloat)
1479       IID = Intrinsic::x86_avx2_permd;
1480     else if (VecWidth == 256 && EltWidth == 64 && IsFloat)
1481       IID = Intrinsic::x86_avx512_permvar_df_256;
1482     else if (VecWidth == 256 && EltWidth == 64 && !IsFloat)
1483       IID = Intrinsic::x86_avx512_permvar_di_256;
1484     else if (VecWidth == 512 && EltWidth == 32 && IsFloat)
1485       IID = Intrinsic::x86_avx512_permvar_sf_512;
1486     else if (VecWidth == 512 && EltWidth == 32 && !IsFloat)
1487       IID = Intrinsic::x86_avx512_permvar_si_512;
1488     else if (VecWidth == 512 && EltWidth == 64 && IsFloat)
1489       IID = Intrinsic::x86_avx512_permvar_df_512;
1490     else if (VecWidth == 512 && EltWidth == 64 && !IsFloat)
1491       IID = Intrinsic::x86_avx512_permvar_di_512;
1492     else if (VecWidth == 128 && EltWidth == 16)
1493       IID = Intrinsic::x86_avx512_permvar_hi_128;
1494     else if (VecWidth == 256 && EltWidth == 16)
1495       IID = Intrinsic::x86_avx512_permvar_hi_256;
1496     else if (VecWidth == 512 && EltWidth == 16)
1497       IID = Intrinsic::x86_avx512_permvar_hi_512;
1498     else if (VecWidth == 128 && EltWidth == 8)
1499       IID = Intrinsic::x86_avx512_permvar_qi_128;
1500     else if (VecWidth == 256 && EltWidth == 8)
1501       IID = Intrinsic::x86_avx512_permvar_qi_256;
1502     else if (VecWidth == 512 && EltWidth == 8)
1503       IID = Intrinsic::x86_avx512_permvar_qi_512;
1504     else
1505       llvm_unreachable("Unexpected intrinsic");
1506   } else if (Name.startswith("dbpsadbw.")) {
1507     if (VecWidth == 128)
1508       IID = Intrinsic::x86_avx512_dbpsadbw_128;
1509     else if (VecWidth == 256)
1510       IID = Intrinsic::x86_avx512_dbpsadbw_256;
1511     else if (VecWidth == 512)
1512       IID = Intrinsic::x86_avx512_dbpsadbw_512;
1513     else
1514       llvm_unreachable("Unexpected intrinsic");
1515   } else if (Name.startswith("pmultishift.qb.")) {
1516     if (VecWidth == 128)
1517       IID = Intrinsic::x86_avx512_pmultishift_qb_128;
1518     else if (VecWidth == 256)
1519       IID = Intrinsic::x86_avx512_pmultishift_qb_256;
1520     else if (VecWidth == 512)
1521       IID = Intrinsic::x86_avx512_pmultishift_qb_512;
1522     else
1523       llvm_unreachable("Unexpected intrinsic");
1524   } else if (Name.startswith("conflict.")) {
1525     if (Name[9] == 'd' && VecWidth == 128)
1526       IID = Intrinsic::x86_avx512_conflict_d_128;
1527     else if (Name[9] == 'd' && VecWidth == 256)
1528       IID = Intrinsic::x86_avx512_conflict_d_256;
1529     else if (Name[9] == 'd' && VecWidth == 512)
1530       IID = Intrinsic::x86_avx512_conflict_d_512;
1531     else if (Name[9] == 'q' && VecWidth == 128)
1532       IID = Intrinsic::x86_avx512_conflict_q_128;
1533     else if (Name[9] == 'q' && VecWidth == 256)
1534       IID = Intrinsic::x86_avx512_conflict_q_256;
1535     else if (Name[9] == 'q' && VecWidth == 512)
1536       IID = Intrinsic::x86_avx512_conflict_q_512;
1537     else
1538       llvm_unreachable("Unexpected intrinsic");
1539   } else
1540     return false;
1541 
1542   SmallVector<Value *, 4> Args(CI.arg_operands().begin(),
1543                                CI.arg_operands().end());
1544   Args.pop_back();
1545   Args.pop_back();
1546   Rep = Builder.CreateCall(Intrinsic::getDeclaration(CI.getModule(), IID),
1547                            Args);
1548   unsigned NumArgs = CI.getNumArgOperands();
1549   Rep = EmitX86Select(Builder, CI.getArgOperand(NumArgs - 1), Rep,
1550                       CI.getArgOperand(NumArgs - 2));
1551   return true;
1552 }
1553 
1554 /// Upgrade comment in call to inline asm that represents an objc retain release
1555 /// marker.
1556 void llvm::UpgradeInlineAsmString(std::string *AsmStr) {
1557   size_t Pos;
1558   if (AsmStr->find("mov\tfp") == 0 &&
1559       AsmStr->find("objc_retainAutoreleaseReturnValue") != std::string::npos &&
1560       (Pos = AsmStr->find("# marker")) != std::string::npos) {
1561     AsmStr->replace(Pos, 1, ";");
1562   }
1563   return;
1564 }
1565 
1566 /// Upgrade a call to an old intrinsic. All argument and return casting must be
1567 /// provided to seamlessly integrate with existing context.
1568 void llvm::UpgradeIntrinsicCall(CallInst *CI, Function *NewFn) {
1569   Function *F = CI->getCalledFunction();
1570   LLVMContext &C = CI->getContext();
1571   IRBuilder<> Builder(C);
1572   Builder.SetInsertPoint(CI->getParent(), CI->getIterator());
1573 
1574   assert(F && "Intrinsic call is not direct?");
1575 
1576   if (!NewFn) {
1577     // Get the Function's name.
1578     StringRef Name = F->getName();
1579 
1580     // clang.arc.use is an old name for llvm.arc.clang.arc.use. It is dropped
1581     // from upgrader because the optimizer now only recognizes intrinsics for
1582     // ARC runtime calls.
1583     if (Name == "clang.arc.use") {
1584       CI->eraseFromParent();
1585       return;
1586     }
1587 
1588     assert(Name.startswith("llvm.") && "Intrinsic doesn't start with 'llvm.'");
1589     Name = Name.substr(5);
1590 
1591     bool IsX86 = Name.startswith("x86.");
1592     if (IsX86)
1593       Name = Name.substr(4);
1594     bool IsNVVM = Name.startswith("nvvm.");
1595     if (IsNVVM)
1596       Name = Name.substr(5);
1597 
1598     if (IsX86 && Name.startswith("sse4a.movnt.")) {
1599       Module *M = F->getParent();
1600       SmallVector<Metadata *, 1> Elts;
1601       Elts.push_back(
1602           ConstantAsMetadata::get(ConstantInt::get(Type::getInt32Ty(C), 1)));
1603       MDNode *Node = MDNode::get(C, Elts);
1604 
1605       Value *Arg0 = CI->getArgOperand(0);
1606       Value *Arg1 = CI->getArgOperand(1);
1607 
1608       // Nontemporal (unaligned) store of the 0'th element of the float/double
1609       // vector.
1610       Type *SrcEltTy = cast<VectorType>(Arg1->getType())->getElementType();
1611       PointerType *EltPtrTy = PointerType::getUnqual(SrcEltTy);
1612       Value *Addr = Builder.CreateBitCast(Arg0, EltPtrTy, "cast");
1613       Value *Extract =
1614           Builder.CreateExtractElement(Arg1, (uint64_t)0, "extractelement");
1615 
1616       StoreInst *SI = Builder.CreateAlignedStore(Extract, Addr, 1);
1617       SI->setMetadata(M->getMDKindID("nontemporal"), Node);
1618 
1619       // Remove intrinsic.
1620       CI->eraseFromParent();
1621       return;
1622     }
1623 
1624     if (IsX86 && (Name.startswith("avx.movnt.") ||
1625                   Name.startswith("avx512.storent."))) {
1626       Module *M = F->getParent();
1627       SmallVector<Metadata *, 1> Elts;
1628       Elts.push_back(
1629           ConstantAsMetadata::get(ConstantInt::get(Type::getInt32Ty(C), 1)));
1630       MDNode *Node = MDNode::get(C, Elts);
1631 
1632       Value *Arg0 = CI->getArgOperand(0);
1633       Value *Arg1 = CI->getArgOperand(1);
1634 
1635       // Convert the type of the pointer to a pointer to the stored type.
1636       Value *BC = Builder.CreateBitCast(Arg0,
1637                                         PointerType::getUnqual(Arg1->getType()),
1638                                         "cast");
1639       VectorType *VTy = cast<VectorType>(Arg1->getType());
1640       StoreInst *SI = Builder.CreateAlignedStore(Arg1, BC,
1641                                                  VTy->getBitWidth() / 8);
1642       SI->setMetadata(M->getMDKindID("nontemporal"), Node);
1643 
1644       // Remove intrinsic.
1645       CI->eraseFromParent();
1646       return;
1647     }
1648 
1649     if (IsX86 && Name == "sse2.storel.dq") {
1650       Value *Arg0 = CI->getArgOperand(0);
1651       Value *Arg1 = CI->getArgOperand(1);
1652 
1653       Type *NewVecTy = VectorType::get(Type::getInt64Ty(C), 2);
1654       Value *BC0 = Builder.CreateBitCast(Arg1, NewVecTy, "cast");
1655       Value *Elt = Builder.CreateExtractElement(BC0, (uint64_t)0);
1656       Value *BC = Builder.CreateBitCast(Arg0,
1657                                         PointerType::getUnqual(Elt->getType()),
1658                                         "cast");
1659       Builder.CreateAlignedStore(Elt, BC, 1);
1660 
1661       // Remove intrinsic.
1662       CI->eraseFromParent();
1663       return;
1664     }
1665 
1666     if (IsX86 && (Name.startswith("sse.storeu.") ||
1667                   Name.startswith("sse2.storeu.") ||
1668                   Name.startswith("avx.storeu."))) {
1669       Value *Arg0 = CI->getArgOperand(0);
1670       Value *Arg1 = CI->getArgOperand(1);
1671 
1672       Arg0 = Builder.CreateBitCast(Arg0,
1673                                    PointerType::getUnqual(Arg1->getType()),
1674                                    "cast");
1675       Builder.CreateAlignedStore(Arg1, Arg0, 1);
1676 
1677       // Remove intrinsic.
1678       CI->eraseFromParent();
1679       return;
1680     }
1681 
1682     if (IsX86 && Name == "avx512.mask.store.ss") {
1683       Value *Mask = Builder.CreateAnd(CI->getArgOperand(2), Builder.getInt8(1));
1684       UpgradeMaskedStore(Builder, CI->getArgOperand(0), CI->getArgOperand(1),
1685                          Mask, false);
1686 
1687       // Remove intrinsic.
1688       CI->eraseFromParent();
1689       return;
1690     }
1691 
1692     if (IsX86 && (Name.startswith("avx512.mask.store"))) {
1693       // "avx512.mask.storeu." or "avx512.mask.store."
1694       bool Aligned = Name[17] != 'u'; // "avx512.mask.storeu".
1695       UpgradeMaskedStore(Builder, CI->getArgOperand(0), CI->getArgOperand(1),
1696                          CI->getArgOperand(2), Aligned);
1697 
1698       // Remove intrinsic.
1699       CI->eraseFromParent();
1700       return;
1701     }
1702 
1703     Value *Rep;
1704     // Upgrade packed integer vector compare intrinsics to compare instructions.
1705     if (IsX86 && (Name.startswith("sse2.pcmp") ||
1706                   Name.startswith("avx2.pcmp"))) {
1707       // "sse2.pcpmpeq." "sse2.pcmpgt." "avx2.pcmpeq." or "avx2.pcmpgt."
1708       bool CmpEq = Name[9] == 'e';
1709       Rep = Builder.CreateICmp(CmpEq ? ICmpInst::ICMP_EQ : ICmpInst::ICMP_SGT,
1710                                CI->getArgOperand(0), CI->getArgOperand(1));
1711       Rep = Builder.CreateSExt(Rep, CI->getType(), "");
1712     } else if (IsX86 && (Name.startswith("avx512.broadcastm"))) {
1713       Type *ExtTy = Type::getInt32Ty(C);
1714       if (CI->getOperand(0)->getType()->isIntegerTy(8))
1715         ExtTy = Type::getInt64Ty(C);
1716       unsigned NumElts = CI->getType()->getPrimitiveSizeInBits() /
1717                          ExtTy->getPrimitiveSizeInBits();
1718       Rep = Builder.CreateZExt(CI->getArgOperand(0), ExtTy);
1719       Rep = Builder.CreateVectorSplat(NumElts, Rep);
1720     } else if (IsX86 && (Name == "sse.sqrt.ss" ||
1721                          Name == "sse2.sqrt.sd")) {
1722       Value *Vec = CI->getArgOperand(0);
1723       Value *Elt0 = Builder.CreateExtractElement(Vec, (uint64_t)0);
1724       Function *Intr = Intrinsic::getDeclaration(F->getParent(),
1725                                                  Intrinsic::sqrt, Elt0->getType());
1726       Elt0 = Builder.CreateCall(Intr, Elt0);
1727       Rep = Builder.CreateInsertElement(Vec, Elt0, (uint64_t)0);
1728     } else if (IsX86 && (Name.startswith("avx.sqrt.p") ||
1729                          Name.startswith("sse2.sqrt.p") ||
1730                          Name.startswith("sse.sqrt.p"))) {
1731       Rep = Builder.CreateCall(Intrinsic::getDeclaration(F->getParent(),
1732                                                          Intrinsic::sqrt,
1733                                                          CI->getType()),
1734                                {CI->getArgOperand(0)});
1735     } else if (IsX86 && (Name.startswith("avx512.mask.sqrt.p"))) {
1736       if (CI->getNumArgOperands() == 4 &&
1737           (!isa<ConstantInt>(CI->getArgOperand(3)) ||
1738            cast<ConstantInt>(CI->getArgOperand(3))->getZExtValue() != 4)) {
1739         Intrinsic::ID IID = Name[18] == 's' ? Intrinsic::x86_avx512_sqrt_ps_512
1740                                             : Intrinsic::x86_avx512_sqrt_pd_512;
1741 
1742         Value *Args[] = { CI->getArgOperand(0), CI->getArgOperand(3) };
1743         Rep = Builder.CreateCall(Intrinsic::getDeclaration(CI->getModule(),
1744                                                            IID), Args);
1745       } else {
1746         Rep = Builder.CreateCall(Intrinsic::getDeclaration(F->getParent(),
1747                                                            Intrinsic::sqrt,
1748                                                            CI->getType()),
1749                                  {CI->getArgOperand(0)});
1750       }
1751       Rep = EmitX86Select(Builder, CI->getArgOperand(2), Rep,
1752                           CI->getArgOperand(1));
1753     } else if (IsX86 && (Name.startswith("avx512.ptestm") ||
1754                          Name.startswith("avx512.ptestnm"))) {
1755       Value *Op0 = CI->getArgOperand(0);
1756       Value *Op1 = CI->getArgOperand(1);
1757       Value *Mask = CI->getArgOperand(2);
1758       Rep = Builder.CreateAnd(Op0, Op1);
1759       llvm::Type *Ty = Op0->getType();
1760       Value *Zero = llvm::Constant::getNullValue(Ty);
1761       ICmpInst::Predicate Pred =
1762         Name.startswith("avx512.ptestm") ? ICmpInst::ICMP_NE : ICmpInst::ICMP_EQ;
1763       Rep = Builder.CreateICmp(Pred, Rep, Zero);
1764       Rep = ApplyX86MaskOn1BitsVec(Builder, Rep, Mask);
1765     } else if (IsX86 && (Name.startswith("avx512.mask.pbroadcast"))){
1766       unsigned NumElts =
1767           CI->getArgOperand(1)->getType()->getVectorNumElements();
1768       Rep = Builder.CreateVectorSplat(NumElts, CI->getArgOperand(0));
1769       Rep = EmitX86Select(Builder, CI->getArgOperand(2), Rep,
1770                           CI->getArgOperand(1));
1771     } else if (IsX86 && (Name.startswith("avx512.kunpck"))) {
1772       unsigned NumElts = CI->getType()->getScalarSizeInBits();
1773       Value *LHS = getX86MaskVec(Builder, CI->getArgOperand(0), NumElts);
1774       Value *RHS = getX86MaskVec(Builder, CI->getArgOperand(1), NumElts);
1775       uint32_t Indices[64];
1776       for (unsigned i = 0; i != NumElts; ++i)
1777         Indices[i] = i;
1778 
1779       // First extract half of each vector. This gives better codegen than
1780       // doing it in a single shuffle.
1781       LHS = Builder.CreateShuffleVector(LHS, LHS,
1782                                         makeArrayRef(Indices, NumElts / 2));
1783       RHS = Builder.CreateShuffleVector(RHS, RHS,
1784                                         makeArrayRef(Indices, NumElts / 2));
1785       // Concat the vectors.
1786       // NOTE: Operands have to be swapped to match intrinsic definition.
1787       Rep = Builder.CreateShuffleVector(RHS, LHS,
1788                                         makeArrayRef(Indices, NumElts));
1789       Rep = Builder.CreateBitCast(Rep, CI->getType());
1790     } else if (IsX86 && Name == "avx512.kand.w") {
1791       Value *LHS = getX86MaskVec(Builder, CI->getArgOperand(0), 16);
1792       Value *RHS = getX86MaskVec(Builder, CI->getArgOperand(1), 16);
1793       Rep = Builder.CreateAnd(LHS, RHS);
1794       Rep = Builder.CreateBitCast(Rep, CI->getType());
1795     } else if (IsX86 && Name == "avx512.kandn.w") {
1796       Value *LHS = getX86MaskVec(Builder, CI->getArgOperand(0), 16);
1797       Value *RHS = getX86MaskVec(Builder, CI->getArgOperand(1), 16);
1798       LHS = Builder.CreateNot(LHS);
1799       Rep = Builder.CreateAnd(LHS, RHS);
1800       Rep = Builder.CreateBitCast(Rep, CI->getType());
1801     } else if (IsX86 && Name == "avx512.kor.w") {
1802       Value *LHS = getX86MaskVec(Builder, CI->getArgOperand(0), 16);
1803       Value *RHS = getX86MaskVec(Builder, CI->getArgOperand(1), 16);
1804       Rep = Builder.CreateOr(LHS, RHS);
1805       Rep = Builder.CreateBitCast(Rep, CI->getType());
1806     } else if (IsX86 && Name == "avx512.kxor.w") {
1807       Value *LHS = getX86MaskVec(Builder, CI->getArgOperand(0), 16);
1808       Value *RHS = getX86MaskVec(Builder, CI->getArgOperand(1), 16);
1809       Rep = Builder.CreateXor(LHS, RHS);
1810       Rep = Builder.CreateBitCast(Rep, CI->getType());
1811     } else if (IsX86 && Name == "avx512.kxnor.w") {
1812       Value *LHS = getX86MaskVec(Builder, CI->getArgOperand(0), 16);
1813       Value *RHS = getX86MaskVec(Builder, CI->getArgOperand(1), 16);
1814       LHS = Builder.CreateNot(LHS);
1815       Rep = Builder.CreateXor(LHS, RHS);
1816       Rep = Builder.CreateBitCast(Rep, CI->getType());
1817     } else if (IsX86 && Name == "avx512.knot.w") {
1818       Rep = getX86MaskVec(Builder, CI->getArgOperand(0), 16);
1819       Rep = Builder.CreateNot(Rep);
1820       Rep = Builder.CreateBitCast(Rep, CI->getType());
1821     } else if (IsX86 &&
1822                (Name == "avx512.kortestz.w" || Name == "avx512.kortestc.w")) {
1823       Value *LHS = getX86MaskVec(Builder, CI->getArgOperand(0), 16);
1824       Value *RHS = getX86MaskVec(Builder, CI->getArgOperand(1), 16);
1825       Rep = Builder.CreateOr(LHS, RHS);
1826       Rep = Builder.CreateBitCast(Rep, Builder.getInt16Ty());
1827       Value *C;
1828       if (Name[14] == 'c')
1829         C = ConstantInt::getAllOnesValue(Builder.getInt16Ty());
1830       else
1831         C = ConstantInt::getNullValue(Builder.getInt16Ty());
1832       Rep = Builder.CreateICmpEQ(Rep, C);
1833       Rep = Builder.CreateZExt(Rep, Builder.getInt32Ty());
1834     } else if (IsX86 && (Name == "sse.add.ss" || Name == "sse2.add.sd" ||
1835                          Name == "sse.sub.ss" || Name == "sse2.sub.sd" ||
1836                          Name == "sse.mul.ss" || Name == "sse2.mul.sd" ||
1837                          Name == "sse.div.ss" || Name == "sse2.div.sd")) {
1838       Type *I32Ty = Type::getInt32Ty(C);
1839       Value *Elt0 = Builder.CreateExtractElement(CI->getArgOperand(0),
1840                                                  ConstantInt::get(I32Ty, 0));
1841       Value *Elt1 = Builder.CreateExtractElement(CI->getArgOperand(1),
1842                                                  ConstantInt::get(I32Ty, 0));
1843       Value *EltOp;
1844       if (Name.contains(".add."))
1845         EltOp = Builder.CreateFAdd(Elt0, Elt1);
1846       else if (Name.contains(".sub."))
1847         EltOp = Builder.CreateFSub(Elt0, Elt1);
1848       else if (Name.contains(".mul."))
1849         EltOp = Builder.CreateFMul(Elt0, Elt1);
1850       else
1851         EltOp = Builder.CreateFDiv(Elt0, Elt1);
1852       Rep = Builder.CreateInsertElement(CI->getArgOperand(0), EltOp,
1853                                         ConstantInt::get(I32Ty, 0));
1854     } else if (IsX86 && Name.startswith("avx512.mask.pcmp")) {
1855       // "avx512.mask.pcmpeq." or "avx512.mask.pcmpgt."
1856       bool CmpEq = Name[16] == 'e';
1857       Rep = upgradeMaskedCompare(Builder, *CI, CmpEq ? 0 : 6, true);
1858     } else if (IsX86 && Name.startswith("avx512.mask.vpshufbitqmb.")) {
1859       Type *OpTy = CI->getArgOperand(0)->getType();
1860       unsigned VecWidth = OpTy->getPrimitiveSizeInBits();
1861       Intrinsic::ID IID;
1862       switch (VecWidth) {
1863       default: llvm_unreachable("Unexpected intrinsic");
1864       case 128: IID = Intrinsic::x86_avx512_vpshufbitqmb_128; break;
1865       case 256: IID = Intrinsic::x86_avx512_vpshufbitqmb_256; break;
1866       case 512: IID = Intrinsic::x86_avx512_vpshufbitqmb_512; break;
1867       }
1868 
1869       Rep = Builder.CreateCall(Intrinsic::getDeclaration(F->getParent(), IID),
1870                                { CI->getOperand(0), CI->getArgOperand(1) });
1871       Rep = ApplyX86MaskOn1BitsVec(Builder, Rep, CI->getArgOperand(2));
1872     } else if (IsX86 && Name.startswith("avx512.mask.fpclass.p")) {
1873       Type *OpTy = CI->getArgOperand(0)->getType();
1874       unsigned VecWidth = OpTy->getPrimitiveSizeInBits();
1875       unsigned EltWidth = OpTy->getScalarSizeInBits();
1876       Intrinsic::ID IID;
1877       if (VecWidth == 128 && EltWidth == 32)
1878         IID = Intrinsic::x86_avx512_fpclass_ps_128;
1879       else if (VecWidth == 256 && EltWidth == 32)
1880         IID = Intrinsic::x86_avx512_fpclass_ps_256;
1881       else if (VecWidth == 512 && EltWidth == 32)
1882         IID = Intrinsic::x86_avx512_fpclass_ps_512;
1883       else if (VecWidth == 128 && EltWidth == 64)
1884         IID = Intrinsic::x86_avx512_fpclass_pd_128;
1885       else if (VecWidth == 256 && EltWidth == 64)
1886         IID = Intrinsic::x86_avx512_fpclass_pd_256;
1887       else if (VecWidth == 512 && EltWidth == 64)
1888         IID = Intrinsic::x86_avx512_fpclass_pd_512;
1889       else
1890         llvm_unreachable("Unexpected intrinsic");
1891 
1892       Rep = Builder.CreateCall(Intrinsic::getDeclaration(F->getParent(), IID),
1893                                { CI->getOperand(0), CI->getArgOperand(1) });
1894       Rep = ApplyX86MaskOn1BitsVec(Builder, Rep, CI->getArgOperand(2));
1895     } else if (IsX86 && Name.startswith("avx512.mask.cmp.p")) {
1896       Type *OpTy = CI->getArgOperand(0)->getType();
1897       unsigned VecWidth = OpTy->getPrimitiveSizeInBits();
1898       unsigned EltWidth = OpTy->getScalarSizeInBits();
1899       Intrinsic::ID IID;
1900       if (VecWidth == 128 && EltWidth == 32)
1901         IID = Intrinsic::x86_avx512_cmp_ps_128;
1902       else if (VecWidth == 256 && EltWidth == 32)
1903         IID = Intrinsic::x86_avx512_cmp_ps_256;
1904       else if (VecWidth == 512 && EltWidth == 32)
1905         IID = Intrinsic::x86_avx512_cmp_ps_512;
1906       else if (VecWidth == 128 && EltWidth == 64)
1907         IID = Intrinsic::x86_avx512_cmp_pd_128;
1908       else if (VecWidth == 256 && EltWidth == 64)
1909         IID = Intrinsic::x86_avx512_cmp_pd_256;
1910       else if (VecWidth == 512 && EltWidth == 64)
1911         IID = Intrinsic::x86_avx512_cmp_pd_512;
1912       else
1913         llvm_unreachable("Unexpected intrinsic");
1914 
1915       SmallVector<Value *, 4> Args;
1916       Args.push_back(CI->getArgOperand(0));
1917       Args.push_back(CI->getArgOperand(1));
1918       Args.push_back(CI->getArgOperand(2));
1919       if (CI->getNumArgOperands() == 5)
1920         Args.push_back(CI->getArgOperand(4));
1921 
1922       Rep = Builder.CreateCall(Intrinsic::getDeclaration(F->getParent(), IID),
1923                                Args);
1924       Rep = ApplyX86MaskOn1BitsVec(Builder, Rep, CI->getArgOperand(3));
1925     } else if (IsX86 && Name.startswith("avx512.mask.cmp.") &&
1926                Name[16] != 'p') {
1927       // Integer compare intrinsics.
1928       unsigned Imm = cast<ConstantInt>(CI->getArgOperand(2))->getZExtValue();
1929       Rep = upgradeMaskedCompare(Builder, *CI, Imm, true);
1930     } else if (IsX86 && Name.startswith("avx512.mask.ucmp.")) {
1931       unsigned Imm = cast<ConstantInt>(CI->getArgOperand(2))->getZExtValue();
1932       Rep = upgradeMaskedCompare(Builder, *CI, Imm, false);
1933     } else if (IsX86 && (Name.startswith("avx512.cvtb2mask.") ||
1934                          Name.startswith("avx512.cvtw2mask.") ||
1935                          Name.startswith("avx512.cvtd2mask.") ||
1936                          Name.startswith("avx512.cvtq2mask."))) {
1937       Value *Op = CI->getArgOperand(0);
1938       Value *Zero = llvm::Constant::getNullValue(Op->getType());
1939       Rep = Builder.CreateICmp(ICmpInst::ICMP_SLT, Op, Zero);
1940       Rep = ApplyX86MaskOn1BitsVec(Builder, Rep, nullptr);
1941     } else if(IsX86 && (Name == "ssse3.pabs.b.128" ||
1942                         Name == "ssse3.pabs.w.128" ||
1943                         Name == "ssse3.pabs.d.128" ||
1944                         Name.startswith("avx2.pabs") ||
1945                         Name.startswith("avx512.mask.pabs"))) {
1946       Rep = upgradeAbs(Builder, *CI);
1947     } else if (IsX86 && (Name == "sse41.pmaxsb" ||
1948                          Name == "sse2.pmaxs.w" ||
1949                          Name == "sse41.pmaxsd" ||
1950                          Name.startswith("avx2.pmaxs") ||
1951                          Name.startswith("avx512.mask.pmaxs"))) {
1952       Rep = upgradeIntMinMax(Builder, *CI, ICmpInst::ICMP_SGT);
1953     } else if (IsX86 && (Name == "sse2.pmaxu.b" ||
1954                          Name == "sse41.pmaxuw" ||
1955                          Name == "sse41.pmaxud" ||
1956                          Name.startswith("avx2.pmaxu") ||
1957                          Name.startswith("avx512.mask.pmaxu"))) {
1958       Rep = upgradeIntMinMax(Builder, *CI, ICmpInst::ICMP_UGT);
1959     } else if (IsX86 && (Name == "sse41.pminsb" ||
1960                          Name == "sse2.pmins.w" ||
1961                          Name == "sse41.pminsd" ||
1962                          Name.startswith("avx2.pmins") ||
1963                          Name.startswith("avx512.mask.pmins"))) {
1964       Rep = upgradeIntMinMax(Builder, *CI, ICmpInst::ICMP_SLT);
1965     } else if (IsX86 && (Name == "sse2.pminu.b" ||
1966                          Name == "sse41.pminuw" ||
1967                          Name == "sse41.pminud" ||
1968                          Name.startswith("avx2.pminu") ||
1969                          Name.startswith("avx512.mask.pminu"))) {
1970       Rep = upgradeIntMinMax(Builder, *CI, ICmpInst::ICMP_ULT);
1971     } else if (IsX86 && (Name == "sse2.pmulu.dq" ||
1972                          Name == "avx2.pmulu.dq" ||
1973                          Name == "avx512.pmulu.dq.512" ||
1974                          Name.startswith("avx512.mask.pmulu.dq."))) {
1975       Rep = upgradePMULDQ(Builder, *CI, /*Signed*/false);
1976     } else if (IsX86 && (Name == "sse41.pmuldq" ||
1977                          Name == "avx2.pmul.dq" ||
1978                          Name == "avx512.pmul.dq.512" ||
1979                          Name.startswith("avx512.mask.pmul.dq."))) {
1980       Rep = upgradePMULDQ(Builder, *CI, /*Signed*/true);
1981     } else if (IsX86 && (Name == "sse.cvtsi2ss" ||
1982                          Name == "sse2.cvtsi2sd" ||
1983                          Name == "sse.cvtsi642ss" ||
1984                          Name == "sse2.cvtsi642sd")) {
1985       Rep = Builder.CreateSIToFP(CI->getArgOperand(1),
1986                                  CI->getType()->getVectorElementType());
1987       Rep = Builder.CreateInsertElement(CI->getArgOperand(0), Rep, (uint64_t)0);
1988     } else if (IsX86 && Name == "avx512.cvtusi2sd") {
1989       Rep = Builder.CreateUIToFP(CI->getArgOperand(1),
1990                                  CI->getType()->getVectorElementType());
1991       Rep = Builder.CreateInsertElement(CI->getArgOperand(0), Rep, (uint64_t)0);
1992     } else if (IsX86 && Name == "sse2.cvtss2sd") {
1993       Rep = Builder.CreateExtractElement(CI->getArgOperand(1), (uint64_t)0);
1994       Rep = Builder.CreateFPExt(Rep, CI->getType()->getVectorElementType());
1995       Rep = Builder.CreateInsertElement(CI->getArgOperand(0), Rep, (uint64_t)0);
1996     } else if (IsX86 && (Name == "sse2.cvtdq2pd" ||
1997                          Name == "sse2.cvtdq2ps" ||
1998                          Name == "avx.cvtdq2.pd.256" ||
1999                          Name == "avx.cvtdq2.ps.256" ||
2000                          Name.startswith("avx512.mask.cvtdq2pd.") ||
2001                          Name.startswith("avx512.mask.cvtudq2pd.") ||
2002                          Name.startswith("avx512.mask.cvtdq2ps.") ||
2003                          Name.startswith("avx512.mask.cvtudq2ps.") ||
2004                          Name.startswith("avx512.mask.cvtqq2pd.") ||
2005                          Name.startswith("avx512.mask.cvtuqq2pd.") ||
2006                          Name == "avx512.mask.cvtqq2ps.256" ||
2007                          Name == "avx512.mask.cvtqq2ps.512" ||
2008                          Name == "avx512.mask.cvtuqq2ps.256" ||
2009                          Name == "avx512.mask.cvtuqq2ps.512" ||
2010                          Name == "sse2.cvtps2pd" ||
2011                          Name == "avx.cvt.ps2.pd.256" ||
2012                          Name == "avx512.mask.cvtps2pd.128" ||
2013                          Name == "avx512.mask.cvtps2pd.256")) {
2014       Type *DstTy = CI->getType();
2015       Rep = CI->getArgOperand(0);
2016       Type *SrcTy = Rep->getType();
2017 
2018       unsigned NumDstElts = DstTy->getVectorNumElements();
2019       if (NumDstElts < SrcTy->getVectorNumElements()) {
2020         assert(NumDstElts == 2 && "Unexpected vector size");
2021         uint32_t ShuffleMask[2] = { 0, 1 };
2022         Rep = Builder.CreateShuffleVector(Rep, Rep, ShuffleMask);
2023       }
2024 
2025       bool IsPS2PD = SrcTy->getVectorElementType()->isFloatTy();
2026       bool IsUnsigned = (StringRef::npos != Name.find("cvtu"));
2027       if (IsPS2PD)
2028         Rep = Builder.CreateFPExt(Rep, DstTy, "cvtps2pd");
2029       else if (CI->getNumArgOperands() == 4 &&
2030                (!isa<ConstantInt>(CI->getArgOperand(3)) ||
2031                 cast<ConstantInt>(CI->getArgOperand(3))->getZExtValue() != 4)) {
2032         Intrinsic::ID IID = IsUnsigned ? Intrinsic::x86_avx512_uitofp_round
2033                                        : Intrinsic::x86_avx512_sitofp_round;
2034         Function *F = Intrinsic::getDeclaration(CI->getModule(), IID,
2035                                                 { DstTy, SrcTy });
2036         Rep = Builder.CreateCall(F, { Rep, CI->getArgOperand(3) });
2037       } else {
2038         Rep = IsUnsigned ? Builder.CreateUIToFP(Rep, DstTy, "cvt")
2039                          : Builder.CreateSIToFP(Rep, DstTy, "cvt");
2040       }
2041 
2042       if (CI->getNumArgOperands() >= 3)
2043         Rep = EmitX86Select(Builder, CI->getArgOperand(2), Rep,
2044                             CI->getArgOperand(1));
2045     } else if (IsX86 && (Name.startswith("avx512.mask.loadu."))) {
2046       Rep = UpgradeMaskedLoad(Builder, CI->getArgOperand(0),
2047                               CI->getArgOperand(1), CI->getArgOperand(2),
2048                               /*Aligned*/false);
2049     } else if (IsX86 && (Name.startswith("avx512.mask.load."))) {
2050       Rep = UpgradeMaskedLoad(Builder, CI->getArgOperand(0),
2051                               CI->getArgOperand(1),CI->getArgOperand(2),
2052                               /*Aligned*/true);
2053     } else if (IsX86 && Name.startswith("avx512.mask.expand.load.")) {
2054       Type *ResultTy = CI->getType();
2055       Type *PtrTy = ResultTy->getVectorElementType();
2056 
2057       // Cast the pointer to element type.
2058       Value *Ptr = Builder.CreateBitCast(CI->getOperand(0),
2059                                          llvm::PointerType::getUnqual(PtrTy));
2060 
2061       Value *MaskVec = getX86MaskVec(Builder, CI->getArgOperand(2),
2062                                      ResultTy->getVectorNumElements());
2063 
2064       Function *ELd = Intrinsic::getDeclaration(F->getParent(),
2065                                                 Intrinsic::masked_expandload,
2066                                                 ResultTy);
2067       Rep = Builder.CreateCall(ELd, { Ptr, MaskVec, CI->getOperand(1) });
2068     } else if (IsX86 && Name.startswith("avx512.mask.compress.store.")) {
2069       Type *ResultTy = CI->getArgOperand(1)->getType();
2070       Type *PtrTy = ResultTy->getVectorElementType();
2071 
2072       // Cast the pointer to element type.
2073       Value *Ptr = Builder.CreateBitCast(CI->getOperand(0),
2074                                          llvm::PointerType::getUnqual(PtrTy));
2075 
2076       Value *MaskVec = getX86MaskVec(Builder, CI->getArgOperand(2),
2077                                      ResultTy->getVectorNumElements());
2078 
2079       Function *CSt = Intrinsic::getDeclaration(F->getParent(),
2080                                                 Intrinsic::masked_compressstore,
2081                                                 ResultTy);
2082       Rep = Builder.CreateCall(CSt, { CI->getArgOperand(1), Ptr, MaskVec });
2083     } else if (IsX86 && (Name.startswith("avx512.mask.compress.") ||
2084                          Name.startswith("avx512.mask.expand."))) {
2085       Type *ResultTy = CI->getType();
2086 
2087       Value *MaskVec = getX86MaskVec(Builder, CI->getArgOperand(2),
2088                                      ResultTy->getVectorNumElements());
2089 
2090       bool IsCompress = Name[12] == 'c';
2091       Intrinsic::ID IID = IsCompress ? Intrinsic::x86_avx512_mask_compress
2092                                      : Intrinsic::x86_avx512_mask_expand;
2093       Function *Intr = Intrinsic::getDeclaration(F->getParent(), IID, ResultTy);
2094       Rep = Builder.CreateCall(Intr, { CI->getOperand(0), CI->getOperand(1),
2095                                        MaskVec });
2096     } else if (IsX86 && Name.startswith("xop.vpcom")) {
2097       bool IsSigned;
2098       if (Name.endswith("ub") || Name.endswith("uw") || Name.endswith("ud") ||
2099           Name.endswith("uq"))
2100         IsSigned = false;
2101       else if (Name.endswith("b") || Name.endswith("w") || Name.endswith("d") ||
2102                Name.endswith("q"))
2103         IsSigned = true;
2104       else
2105         llvm_unreachable("Unknown suffix");
2106 
2107       unsigned Imm;
2108       if (CI->getNumArgOperands() == 3) {
2109         Imm = cast<ConstantInt>(CI->getArgOperand(2))->getZExtValue();
2110       } else {
2111         Name = Name.substr(9); // strip off "xop.vpcom"
2112         if (Name.startswith("lt"))
2113           Imm = 0;
2114         else if (Name.startswith("le"))
2115           Imm = 1;
2116         else if (Name.startswith("gt"))
2117           Imm = 2;
2118         else if (Name.startswith("ge"))
2119           Imm = 3;
2120         else if (Name.startswith("eq"))
2121           Imm = 4;
2122         else if (Name.startswith("ne"))
2123           Imm = 5;
2124         else if (Name.startswith("false"))
2125           Imm = 6;
2126         else if (Name.startswith("true"))
2127           Imm = 7;
2128         else
2129           llvm_unreachable("Unknown condition");
2130       }
2131 
2132       Rep = upgradeX86vpcom(Builder, *CI, Imm, IsSigned);
2133     } else if (IsX86 && Name.startswith("xop.vpcmov")) {
2134       Value *Sel = CI->getArgOperand(2);
2135       Value *NotSel = Builder.CreateNot(Sel);
2136       Value *Sel0 = Builder.CreateAnd(CI->getArgOperand(0), Sel);
2137       Value *Sel1 = Builder.CreateAnd(CI->getArgOperand(1), NotSel);
2138       Rep = Builder.CreateOr(Sel0, Sel1);
2139     } else if (IsX86 && (Name.startswith("xop.vprot") ||
2140                          Name.startswith("avx512.prol") ||
2141                          Name.startswith("avx512.mask.prol"))) {
2142       Rep = upgradeX86Rotate(Builder, *CI, false);
2143     } else if (IsX86 && (Name.startswith("avx512.pror") ||
2144                          Name.startswith("avx512.mask.pror"))) {
2145       Rep = upgradeX86Rotate(Builder, *CI, true);
2146     } else if (IsX86 && (Name.startswith("avx512.vpshld.") ||
2147                          Name.startswith("avx512.mask.vpshld") ||
2148                          Name.startswith("avx512.maskz.vpshld"))) {
2149       bool ZeroMask = Name[11] == 'z';
2150       Rep = upgradeX86ConcatShift(Builder, *CI, false, ZeroMask);
2151     } else if (IsX86 && (Name.startswith("avx512.vpshrd.") ||
2152                          Name.startswith("avx512.mask.vpshrd") ||
2153                          Name.startswith("avx512.maskz.vpshrd"))) {
2154       bool ZeroMask = Name[11] == 'z';
2155       Rep = upgradeX86ConcatShift(Builder, *CI, true, ZeroMask);
2156     } else if (IsX86 && Name == "sse42.crc32.64.8") {
2157       Function *CRC32 = Intrinsic::getDeclaration(F->getParent(),
2158                                                Intrinsic::x86_sse42_crc32_32_8);
2159       Value *Trunc0 = Builder.CreateTrunc(CI->getArgOperand(0), Type::getInt32Ty(C));
2160       Rep = Builder.CreateCall(CRC32, {Trunc0, CI->getArgOperand(1)});
2161       Rep = Builder.CreateZExt(Rep, CI->getType(), "");
2162     } else if (IsX86 && (Name.startswith("avx.vbroadcast.s") ||
2163                          Name.startswith("avx512.vbroadcast.s"))) {
2164       // Replace broadcasts with a series of insertelements.
2165       Type *VecTy = CI->getType();
2166       Type *EltTy = VecTy->getVectorElementType();
2167       unsigned EltNum = VecTy->getVectorNumElements();
2168       Value *Cast = Builder.CreateBitCast(CI->getArgOperand(0),
2169                                           EltTy->getPointerTo());
2170       Value *Load = Builder.CreateLoad(EltTy, Cast);
2171       Type *I32Ty = Type::getInt32Ty(C);
2172       Rep = UndefValue::get(VecTy);
2173       for (unsigned I = 0; I < EltNum; ++I)
2174         Rep = Builder.CreateInsertElement(Rep, Load,
2175                                           ConstantInt::get(I32Ty, I));
2176     } else if (IsX86 && (Name.startswith("sse41.pmovsx") ||
2177                          Name.startswith("sse41.pmovzx") ||
2178                          Name.startswith("avx2.pmovsx") ||
2179                          Name.startswith("avx2.pmovzx") ||
2180                          Name.startswith("avx512.mask.pmovsx") ||
2181                          Name.startswith("avx512.mask.pmovzx"))) {
2182       VectorType *SrcTy = cast<VectorType>(CI->getArgOperand(0)->getType());
2183       VectorType *DstTy = cast<VectorType>(CI->getType());
2184       unsigned NumDstElts = DstTy->getNumElements();
2185 
2186       // Extract a subvector of the first NumDstElts lanes and sign/zero extend.
2187       SmallVector<uint32_t, 8> ShuffleMask(NumDstElts);
2188       for (unsigned i = 0; i != NumDstElts; ++i)
2189         ShuffleMask[i] = i;
2190 
2191       Value *SV = Builder.CreateShuffleVector(
2192           CI->getArgOperand(0), UndefValue::get(SrcTy), ShuffleMask);
2193 
2194       bool DoSext = (StringRef::npos != Name.find("pmovsx"));
2195       Rep = DoSext ? Builder.CreateSExt(SV, DstTy)
2196                    : Builder.CreateZExt(SV, DstTy);
2197       // If there are 3 arguments, it's a masked intrinsic so we need a select.
2198       if (CI->getNumArgOperands() == 3)
2199         Rep = EmitX86Select(Builder, CI->getArgOperand(2), Rep,
2200                             CI->getArgOperand(1));
2201     } else if (Name == "avx512.mask.pmov.qd.256" ||
2202                Name == "avx512.mask.pmov.qd.512" ||
2203                Name == "avx512.mask.pmov.wb.256" ||
2204                Name == "avx512.mask.pmov.wb.512") {
2205       Type *Ty = CI->getArgOperand(1)->getType();
2206       Rep = Builder.CreateTrunc(CI->getArgOperand(0), Ty);
2207       Rep = EmitX86Select(Builder, CI->getArgOperand(2), Rep,
2208                           CI->getArgOperand(1));
2209     } else if (IsX86 && (Name.startswith("avx.vbroadcastf128") ||
2210                          Name == "avx2.vbroadcasti128")) {
2211       // Replace vbroadcastf128/vbroadcasti128 with a vector load+shuffle.
2212       Type *EltTy = CI->getType()->getVectorElementType();
2213       unsigned NumSrcElts = 128 / EltTy->getPrimitiveSizeInBits();
2214       Type *VT = VectorType::get(EltTy, NumSrcElts);
2215       Value *Op = Builder.CreatePointerCast(CI->getArgOperand(0),
2216                                             PointerType::getUnqual(VT));
2217       Value *Load = Builder.CreateAlignedLoad(VT, Op, 1);
2218       if (NumSrcElts == 2)
2219         Rep = Builder.CreateShuffleVector(Load, UndefValue::get(Load->getType()),
2220                                           { 0, 1, 0, 1 });
2221       else
2222         Rep = Builder.CreateShuffleVector(Load, UndefValue::get(Load->getType()),
2223                                           { 0, 1, 2, 3, 0, 1, 2, 3 });
2224     } else if (IsX86 && (Name.startswith("avx512.mask.shuf.i") ||
2225                          Name.startswith("avx512.mask.shuf.f"))) {
2226       unsigned Imm = cast<ConstantInt>(CI->getArgOperand(2))->getZExtValue();
2227       Type *VT = CI->getType();
2228       unsigned NumLanes = VT->getPrimitiveSizeInBits() / 128;
2229       unsigned NumElementsInLane = 128 / VT->getScalarSizeInBits();
2230       unsigned ControlBitsMask = NumLanes - 1;
2231       unsigned NumControlBits = NumLanes / 2;
2232       SmallVector<uint32_t, 8> ShuffleMask(0);
2233 
2234       for (unsigned l = 0; l != NumLanes; ++l) {
2235         unsigned LaneMask = (Imm >> (l * NumControlBits)) & ControlBitsMask;
2236         // We actually need the other source.
2237         if (l >= NumLanes / 2)
2238           LaneMask += NumLanes;
2239         for (unsigned i = 0; i != NumElementsInLane; ++i)
2240           ShuffleMask.push_back(LaneMask * NumElementsInLane + i);
2241       }
2242       Rep = Builder.CreateShuffleVector(CI->getArgOperand(0),
2243                                         CI->getArgOperand(1), ShuffleMask);
2244       Rep = EmitX86Select(Builder, CI->getArgOperand(4), Rep,
2245                           CI->getArgOperand(3));
2246     }else if (IsX86 && (Name.startswith("avx512.mask.broadcastf") ||
2247                          Name.startswith("avx512.mask.broadcasti"))) {
2248       unsigned NumSrcElts =
2249                         CI->getArgOperand(0)->getType()->getVectorNumElements();
2250       unsigned NumDstElts = CI->getType()->getVectorNumElements();
2251 
2252       SmallVector<uint32_t, 8> ShuffleMask(NumDstElts);
2253       for (unsigned i = 0; i != NumDstElts; ++i)
2254         ShuffleMask[i] = i % NumSrcElts;
2255 
2256       Rep = Builder.CreateShuffleVector(CI->getArgOperand(0),
2257                                         CI->getArgOperand(0),
2258                                         ShuffleMask);
2259       Rep = EmitX86Select(Builder, CI->getArgOperand(2), Rep,
2260                           CI->getArgOperand(1));
2261     } else if (IsX86 && (Name.startswith("avx2.pbroadcast") ||
2262                          Name.startswith("avx2.vbroadcast") ||
2263                          Name.startswith("avx512.pbroadcast") ||
2264                          Name.startswith("avx512.mask.broadcast.s"))) {
2265       // Replace vp?broadcasts with a vector shuffle.
2266       Value *Op = CI->getArgOperand(0);
2267       unsigned NumElts = CI->getType()->getVectorNumElements();
2268       Type *MaskTy = VectorType::get(Type::getInt32Ty(C), NumElts);
2269       Rep = Builder.CreateShuffleVector(Op, UndefValue::get(Op->getType()),
2270                                         Constant::getNullValue(MaskTy));
2271 
2272       if (CI->getNumArgOperands() == 3)
2273         Rep = EmitX86Select(Builder, CI->getArgOperand(2), Rep,
2274                             CI->getArgOperand(1));
2275     } else if (IsX86 && (Name.startswith("sse2.padds.") ||
2276                          Name.startswith("sse2.psubs.") ||
2277                          Name.startswith("avx2.padds.") ||
2278                          Name.startswith("avx2.psubs.") ||
2279                          Name.startswith("avx512.padds.") ||
2280                          Name.startswith("avx512.psubs.") ||
2281                          Name.startswith("avx512.mask.padds.") ||
2282                          Name.startswith("avx512.mask.psubs."))) {
2283       bool IsAdd = Name.contains(".padds");
2284       Rep = UpgradeX86AddSubSatIntrinsics(Builder, *CI, true, IsAdd);
2285     } else if (IsX86 && (Name.startswith("sse2.paddus.") ||
2286                          Name.startswith("sse2.psubus.") ||
2287                          Name.startswith("avx2.paddus.") ||
2288                          Name.startswith("avx2.psubus.") ||
2289                          Name.startswith("avx512.mask.paddus.") ||
2290                          Name.startswith("avx512.mask.psubus."))) {
2291       bool IsAdd = Name.contains(".paddus");
2292       Rep = UpgradeX86AddSubSatIntrinsics(Builder, *CI, false, IsAdd);
2293     } else if (IsX86 && Name.startswith("avx512.mask.palignr.")) {
2294       Rep = UpgradeX86ALIGNIntrinsics(Builder, CI->getArgOperand(0),
2295                                       CI->getArgOperand(1),
2296                                       CI->getArgOperand(2),
2297                                       CI->getArgOperand(3),
2298                                       CI->getArgOperand(4),
2299                                       false);
2300     } else if (IsX86 && Name.startswith("avx512.mask.valign.")) {
2301       Rep = UpgradeX86ALIGNIntrinsics(Builder, CI->getArgOperand(0),
2302                                       CI->getArgOperand(1),
2303                                       CI->getArgOperand(2),
2304                                       CI->getArgOperand(3),
2305                                       CI->getArgOperand(4),
2306                                       true);
2307     } else if (IsX86 && (Name == "sse2.psll.dq" ||
2308                          Name == "avx2.psll.dq")) {
2309       // 128/256-bit shift left specified in bits.
2310       unsigned Shift = cast<ConstantInt>(CI->getArgOperand(1))->getZExtValue();
2311       Rep = UpgradeX86PSLLDQIntrinsics(Builder, CI->getArgOperand(0),
2312                                        Shift / 8); // Shift is in bits.
2313     } else if (IsX86 && (Name == "sse2.psrl.dq" ||
2314                          Name == "avx2.psrl.dq")) {
2315       // 128/256-bit shift right specified in bits.
2316       unsigned Shift = cast<ConstantInt>(CI->getArgOperand(1))->getZExtValue();
2317       Rep = UpgradeX86PSRLDQIntrinsics(Builder, CI->getArgOperand(0),
2318                                        Shift / 8); // Shift is in bits.
2319     } else if (IsX86 && (Name == "sse2.psll.dq.bs" ||
2320                          Name == "avx2.psll.dq.bs" ||
2321                          Name == "avx512.psll.dq.512")) {
2322       // 128/256/512-bit shift left specified in bytes.
2323       unsigned Shift = cast<ConstantInt>(CI->getArgOperand(1))->getZExtValue();
2324       Rep = UpgradeX86PSLLDQIntrinsics(Builder, CI->getArgOperand(0), Shift);
2325     } else if (IsX86 && (Name == "sse2.psrl.dq.bs" ||
2326                          Name == "avx2.psrl.dq.bs" ||
2327                          Name == "avx512.psrl.dq.512")) {
2328       // 128/256/512-bit shift right specified in bytes.
2329       unsigned Shift = cast<ConstantInt>(CI->getArgOperand(1))->getZExtValue();
2330       Rep = UpgradeX86PSRLDQIntrinsics(Builder, CI->getArgOperand(0), Shift);
2331     } else if (IsX86 && (Name == "sse41.pblendw" ||
2332                          Name.startswith("sse41.blendp") ||
2333                          Name.startswith("avx.blend.p") ||
2334                          Name == "avx2.pblendw" ||
2335                          Name.startswith("avx2.pblendd."))) {
2336       Value *Op0 = CI->getArgOperand(0);
2337       Value *Op1 = CI->getArgOperand(1);
2338       unsigned Imm = cast <ConstantInt>(CI->getArgOperand(2))->getZExtValue();
2339       VectorType *VecTy = cast<VectorType>(CI->getType());
2340       unsigned NumElts = VecTy->getNumElements();
2341 
2342       SmallVector<uint32_t, 16> Idxs(NumElts);
2343       for (unsigned i = 0; i != NumElts; ++i)
2344         Idxs[i] = ((Imm >> (i%8)) & 1) ? i + NumElts : i;
2345 
2346       Rep = Builder.CreateShuffleVector(Op0, Op1, Idxs);
2347     } else if (IsX86 && (Name.startswith("avx.vinsertf128.") ||
2348                          Name == "avx2.vinserti128" ||
2349                          Name.startswith("avx512.mask.insert"))) {
2350       Value *Op0 = CI->getArgOperand(0);
2351       Value *Op1 = CI->getArgOperand(1);
2352       unsigned Imm = cast<ConstantInt>(CI->getArgOperand(2))->getZExtValue();
2353       unsigned DstNumElts = CI->getType()->getVectorNumElements();
2354       unsigned SrcNumElts = Op1->getType()->getVectorNumElements();
2355       unsigned Scale = DstNumElts / SrcNumElts;
2356 
2357       // Mask off the high bits of the immediate value; hardware ignores those.
2358       Imm = Imm % Scale;
2359 
2360       // Extend the second operand into a vector the size of the destination.
2361       Value *UndefV = UndefValue::get(Op1->getType());
2362       SmallVector<uint32_t, 8> Idxs(DstNumElts);
2363       for (unsigned i = 0; i != SrcNumElts; ++i)
2364         Idxs[i] = i;
2365       for (unsigned i = SrcNumElts; i != DstNumElts; ++i)
2366         Idxs[i] = SrcNumElts;
2367       Rep = Builder.CreateShuffleVector(Op1, UndefV, Idxs);
2368 
2369       // Insert the second operand into the first operand.
2370 
2371       // Note that there is no guarantee that instruction lowering will actually
2372       // produce a vinsertf128 instruction for the created shuffles. In
2373       // particular, the 0 immediate case involves no lane changes, so it can
2374       // be handled as a blend.
2375 
2376       // Example of shuffle mask for 32-bit elements:
2377       // Imm = 1  <i32 0, i32 1, i32 2,  i32 3,  i32 8, i32 9, i32 10, i32 11>
2378       // Imm = 0  <i32 8, i32 9, i32 10, i32 11, i32 4, i32 5, i32 6,  i32 7 >
2379 
2380       // First fill with identify mask.
2381       for (unsigned i = 0; i != DstNumElts; ++i)
2382         Idxs[i] = i;
2383       // Then replace the elements where we need to insert.
2384       for (unsigned i = 0; i != SrcNumElts; ++i)
2385         Idxs[i + Imm * SrcNumElts] = i + DstNumElts;
2386       Rep = Builder.CreateShuffleVector(Op0, Rep, Idxs);
2387 
2388       // If the intrinsic has a mask operand, handle that.
2389       if (CI->getNumArgOperands() == 5)
2390         Rep = EmitX86Select(Builder, CI->getArgOperand(4), Rep,
2391                             CI->getArgOperand(3));
2392     } else if (IsX86 && (Name.startswith("avx.vextractf128.") ||
2393                          Name == "avx2.vextracti128" ||
2394                          Name.startswith("avx512.mask.vextract"))) {
2395       Value *Op0 = CI->getArgOperand(0);
2396       unsigned Imm = cast<ConstantInt>(CI->getArgOperand(1))->getZExtValue();
2397       unsigned DstNumElts = CI->getType()->getVectorNumElements();
2398       unsigned SrcNumElts = Op0->getType()->getVectorNumElements();
2399       unsigned Scale = SrcNumElts / DstNumElts;
2400 
2401       // Mask off the high bits of the immediate value; hardware ignores those.
2402       Imm = Imm % Scale;
2403 
2404       // Get indexes for the subvector of the input vector.
2405       SmallVector<uint32_t, 8> Idxs(DstNumElts);
2406       for (unsigned i = 0; i != DstNumElts; ++i) {
2407         Idxs[i] = i + (Imm * DstNumElts);
2408       }
2409       Rep = Builder.CreateShuffleVector(Op0, Op0, Idxs);
2410 
2411       // If the intrinsic has a mask operand, handle that.
2412       if (CI->getNumArgOperands() == 4)
2413         Rep = EmitX86Select(Builder, CI->getArgOperand(3), Rep,
2414                             CI->getArgOperand(2));
2415     } else if (!IsX86 && Name == "stackprotectorcheck") {
2416       Rep = nullptr;
2417     } else if (IsX86 && (Name.startswith("avx512.mask.perm.df.") ||
2418                          Name.startswith("avx512.mask.perm.di."))) {
2419       Value *Op0 = CI->getArgOperand(0);
2420       unsigned Imm = cast<ConstantInt>(CI->getArgOperand(1))->getZExtValue();
2421       VectorType *VecTy = cast<VectorType>(CI->getType());
2422       unsigned NumElts = VecTy->getNumElements();
2423 
2424       SmallVector<uint32_t, 8> Idxs(NumElts);
2425       for (unsigned i = 0; i != NumElts; ++i)
2426         Idxs[i] = (i & ~0x3) + ((Imm >> (2 * (i & 0x3))) & 3);
2427 
2428       Rep = Builder.CreateShuffleVector(Op0, Op0, Idxs);
2429 
2430       if (CI->getNumArgOperands() == 4)
2431         Rep = EmitX86Select(Builder, CI->getArgOperand(3), Rep,
2432                             CI->getArgOperand(2));
2433     } else if (IsX86 && (Name.startswith("avx.vperm2f128.") ||
2434                          Name == "avx2.vperm2i128")) {
2435       // The immediate permute control byte looks like this:
2436       //    [1:0] - select 128 bits from sources for low half of destination
2437       //    [2]   - ignore
2438       //    [3]   - zero low half of destination
2439       //    [5:4] - select 128 bits from sources for high half of destination
2440       //    [6]   - ignore
2441       //    [7]   - zero high half of destination
2442 
2443       uint8_t Imm = cast<ConstantInt>(CI->getArgOperand(2))->getZExtValue();
2444 
2445       unsigned NumElts = CI->getType()->getVectorNumElements();
2446       unsigned HalfSize = NumElts / 2;
2447       SmallVector<uint32_t, 8> ShuffleMask(NumElts);
2448 
2449       // Determine which operand(s) are actually in use for this instruction.
2450       Value *V0 = (Imm & 0x02) ? CI->getArgOperand(1) : CI->getArgOperand(0);
2451       Value *V1 = (Imm & 0x20) ? CI->getArgOperand(1) : CI->getArgOperand(0);
2452 
2453       // If needed, replace operands based on zero mask.
2454       V0 = (Imm & 0x08) ? ConstantAggregateZero::get(CI->getType()) : V0;
2455       V1 = (Imm & 0x80) ? ConstantAggregateZero::get(CI->getType()) : V1;
2456 
2457       // Permute low half of result.
2458       unsigned StartIndex = (Imm & 0x01) ? HalfSize : 0;
2459       for (unsigned i = 0; i < HalfSize; ++i)
2460         ShuffleMask[i] = StartIndex + i;
2461 
2462       // Permute high half of result.
2463       StartIndex = (Imm & 0x10) ? HalfSize : 0;
2464       for (unsigned i = 0; i < HalfSize; ++i)
2465         ShuffleMask[i + HalfSize] = NumElts + StartIndex + i;
2466 
2467       Rep = Builder.CreateShuffleVector(V0, V1, ShuffleMask);
2468 
2469     } else if (IsX86 && (Name.startswith("avx.vpermil.") ||
2470                          Name == "sse2.pshuf.d" ||
2471                          Name.startswith("avx512.mask.vpermil.p") ||
2472                          Name.startswith("avx512.mask.pshuf.d."))) {
2473       Value *Op0 = CI->getArgOperand(0);
2474       unsigned Imm = cast<ConstantInt>(CI->getArgOperand(1))->getZExtValue();
2475       VectorType *VecTy = cast<VectorType>(CI->getType());
2476       unsigned NumElts = VecTy->getNumElements();
2477       // Calculate the size of each index in the immediate.
2478       unsigned IdxSize = 64 / VecTy->getScalarSizeInBits();
2479       unsigned IdxMask = ((1 << IdxSize) - 1);
2480 
2481       SmallVector<uint32_t, 8> Idxs(NumElts);
2482       // Lookup the bits for this element, wrapping around the immediate every
2483       // 8-bits. Elements are grouped into sets of 2 or 4 elements so we need
2484       // to offset by the first index of each group.
2485       for (unsigned i = 0; i != NumElts; ++i)
2486         Idxs[i] = ((Imm >> ((i * IdxSize) % 8)) & IdxMask) | (i & ~IdxMask);
2487 
2488       Rep = Builder.CreateShuffleVector(Op0, Op0, Idxs);
2489 
2490       if (CI->getNumArgOperands() == 4)
2491         Rep = EmitX86Select(Builder, CI->getArgOperand(3), Rep,
2492                             CI->getArgOperand(2));
2493     } else if (IsX86 && (Name == "sse2.pshufl.w" ||
2494                          Name.startswith("avx512.mask.pshufl.w."))) {
2495       Value *Op0 = CI->getArgOperand(0);
2496       unsigned Imm = cast<ConstantInt>(CI->getArgOperand(1))->getZExtValue();
2497       unsigned NumElts = CI->getType()->getVectorNumElements();
2498 
2499       SmallVector<uint32_t, 16> Idxs(NumElts);
2500       for (unsigned l = 0; l != NumElts; l += 8) {
2501         for (unsigned i = 0; i != 4; ++i)
2502           Idxs[i + l] = ((Imm >> (2 * i)) & 0x3) + l;
2503         for (unsigned i = 4; i != 8; ++i)
2504           Idxs[i + l] = i + l;
2505       }
2506 
2507       Rep = Builder.CreateShuffleVector(Op0, Op0, Idxs);
2508 
2509       if (CI->getNumArgOperands() == 4)
2510         Rep = EmitX86Select(Builder, CI->getArgOperand(3), Rep,
2511                             CI->getArgOperand(2));
2512     } else if (IsX86 && (Name == "sse2.pshufh.w" ||
2513                          Name.startswith("avx512.mask.pshufh.w."))) {
2514       Value *Op0 = CI->getArgOperand(0);
2515       unsigned Imm = cast<ConstantInt>(CI->getArgOperand(1))->getZExtValue();
2516       unsigned NumElts = CI->getType()->getVectorNumElements();
2517 
2518       SmallVector<uint32_t, 16> Idxs(NumElts);
2519       for (unsigned l = 0; l != NumElts; l += 8) {
2520         for (unsigned i = 0; i != 4; ++i)
2521           Idxs[i + l] = i + l;
2522         for (unsigned i = 0; i != 4; ++i)
2523           Idxs[i + l + 4] = ((Imm >> (2 * i)) & 0x3) + 4 + l;
2524       }
2525 
2526       Rep = Builder.CreateShuffleVector(Op0, Op0, Idxs);
2527 
2528       if (CI->getNumArgOperands() == 4)
2529         Rep = EmitX86Select(Builder, CI->getArgOperand(3), Rep,
2530                             CI->getArgOperand(2));
2531     } else if (IsX86 && Name.startswith("avx512.mask.shuf.p")) {
2532       Value *Op0 = CI->getArgOperand(0);
2533       Value *Op1 = CI->getArgOperand(1);
2534       unsigned Imm = cast<ConstantInt>(CI->getArgOperand(2))->getZExtValue();
2535       unsigned NumElts = CI->getType()->getVectorNumElements();
2536 
2537       unsigned NumLaneElts = 128/CI->getType()->getScalarSizeInBits();
2538       unsigned HalfLaneElts = NumLaneElts / 2;
2539 
2540       SmallVector<uint32_t, 16> Idxs(NumElts);
2541       for (unsigned i = 0; i != NumElts; ++i) {
2542         // Base index is the starting element of the lane.
2543         Idxs[i] = i - (i % NumLaneElts);
2544         // If we are half way through the lane switch to the other source.
2545         if ((i % NumLaneElts) >= HalfLaneElts)
2546           Idxs[i] += NumElts;
2547         // Now select the specific element. By adding HalfLaneElts bits from
2548         // the immediate. Wrapping around the immediate every 8-bits.
2549         Idxs[i] += (Imm >> ((i * HalfLaneElts) % 8)) & ((1 << HalfLaneElts) - 1);
2550       }
2551 
2552       Rep = Builder.CreateShuffleVector(Op0, Op1, Idxs);
2553 
2554       Rep = EmitX86Select(Builder, CI->getArgOperand(4), Rep,
2555                           CI->getArgOperand(3));
2556     } else if (IsX86 && (Name.startswith("avx512.mask.movddup") ||
2557                          Name.startswith("avx512.mask.movshdup") ||
2558                          Name.startswith("avx512.mask.movsldup"))) {
2559       Value *Op0 = CI->getArgOperand(0);
2560       unsigned NumElts = CI->getType()->getVectorNumElements();
2561       unsigned NumLaneElts = 128/CI->getType()->getScalarSizeInBits();
2562 
2563       unsigned Offset = 0;
2564       if (Name.startswith("avx512.mask.movshdup."))
2565         Offset = 1;
2566 
2567       SmallVector<uint32_t, 16> Idxs(NumElts);
2568       for (unsigned l = 0; l != NumElts; l += NumLaneElts)
2569         for (unsigned i = 0; i != NumLaneElts; i += 2) {
2570           Idxs[i + l + 0] = i + l + Offset;
2571           Idxs[i + l + 1] = i + l + Offset;
2572         }
2573 
2574       Rep = Builder.CreateShuffleVector(Op0, Op0, Idxs);
2575 
2576       Rep = EmitX86Select(Builder, CI->getArgOperand(2), Rep,
2577                           CI->getArgOperand(1));
2578     } else if (IsX86 && (Name.startswith("avx512.mask.punpckl") ||
2579                          Name.startswith("avx512.mask.unpckl."))) {
2580       Value *Op0 = CI->getArgOperand(0);
2581       Value *Op1 = CI->getArgOperand(1);
2582       int NumElts = CI->getType()->getVectorNumElements();
2583       int NumLaneElts = 128/CI->getType()->getScalarSizeInBits();
2584 
2585       SmallVector<uint32_t, 64> Idxs(NumElts);
2586       for (int l = 0; l != NumElts; l += NumLaneElts)
2587         for (int i = 0; i != NumLaneElts; ++i)
2588           Idxs[i + l] = l + (i / 2) + NumElts * (i % 2);
2589 
2590       Rep = Builder.CreateShuffleVector(Op0, Op1, Idxs);
2591 
2592       Rep = EmitX86Select(Builder, CI->getArgOperand(3), Rep,
2593                           CI->getArgOperand(2));
2594     } else if (IsX86 && (Name.startswith("avx512.mask.punpckh") ||
2595                          Name.startswith("avx512.mask.unpckh."))) {
2596       Value *Op0 = CI->getArgOperand(0);
2597       Value *Op1 = CI->getArgOperand(1);
2598       int NumElts = CI->getType()->getVectorNumElements();
2599       int NumLaneElts = 128/CI->getType()->getScalarSizeInBits();
2600 
2601       SmallVector<uint32_t, 64> Idxs(NumElts);
2602       for (int l = 0; l != NumElts; l += NumLaneElts)
2603         for (int i = 0; i != NumLaneElts; ++i)
2604           Idxs[i + l] = (NumLaneElts / 2) + l + (i / 2) + NumElts * (i % 2);
2605 
2606       Rep = Builder.CreateShuffleVector(Op0, Op1, Idxs);
2607 
2608       Rep = EmitX86Select(Builder, CI->getArgOperand(3), Rep,
2609                           CI->getArgOperand(2));
2610     } else if (IsX86 && (Name.startswith("avx512.mask.and.") ||
2611                          Name.startswith("avx512.mask.pand."))) {
2612       VectorType *FTy = cast<VectorType>(CI->getType());
2613       VectorType *ITy = VectorType::getInteger(FTy);
2614       Rep = Builder.CreateAnd(Builder.CreateBitCast(CI->getArgOperand(0), ITy),
2615                               Builder.CreateBitCast(CI->getArgOperand(1), ITy));
2616       Rep = Builder.CreateBitCast(Rep, FTy);
2617       Rep = EmitX86Select(Builder, CI->getArgOperand(3), Rep,
2618                           CI->getArgOperand(2));
2619     } else if (IsX86 && (Name.startswith("avx512.mask.andn.") ||
2620                          Name.startswith("avx512.mask.pandn."))) {
2621       VectorType *FTy = cast<VectorType>(CI->getType());
2622       VectorType *ITy = VectorType::getInteger(FTy);
2623       Rep = Builder.CreateNot(Builder.CreateBitCast(CI->getArgOperand(0), ITy));
2624       Rep = Builder.CreateAnd(Rep,
2625                               Builder.CreateBitCast(CI->getArgOperand(1), ITy));
2626       Rep = Builder.CreateBitCast(Rep, FTy);
2627       Rep = EmitX86Select(Builder, CI->getArgOperand(3), Rep,
2628                           CI->getArgOperand(2));
2629     } else if (IsX86 && (Name.startswith("avx512.mask.or.") ||
2630                          Name.startswith("avx512.mask.por."))) {
2631       VectorType *FTy = cast<VectorType>(CI->getType());
2632       VectorType *ITy = VectorType::getInteger(FTy);
2633       Rep = Builder.CreateOr(Builder.CreateBitCast(CI->getArgOperand(0), ITy),
2634                              Builder.CreateBitCast(CI->getArgOperand(1), ITy));
2635       Rep = Builder.CreateBitCast(Rep, FTy);
2636       Rep = EmitX86Select(Builder, CI->getArgOperand(3), Rep,
2637                           CI->getArgOperand(2));
2638     } else if (IsX86 && (Name.startswith("avx512.mask.xor.") ||
2639                          Name.startswith("avx512.mask.pxor."))) {
2640       VectorType *FTy = cast<VectorType>(CI->getType());
2641       VectorType *ITy = VectorType::getInteger(FTy);
2642       Rep = Builder.CreateXor(Builder.CreateBitCast(CI->getArgOperand(0), ITy),
2643                               Builder.CreateBitCast(CI->getArgOperand(1), ITy));
2644       Rep = Builder.CreateBitCast(Rep, FTy);
2645       Rep = EmitX86Select(Builder, CI->getArgOperand(3), Rep,
2646                           CI->getArgOperand(2));
2647     } else if (IsX86 && Name.startswith("avx512.mask.padd.")) {
2648       Rep = Builder.CreateAdd(CI->getArgOperand(0), CI->getArgOperand(1));
2649       Rep = EmitX86Select(Builder, CI->getArgOperand(3), Rep,
2650                           CI->getArgOperand(2));
2651     } else if (IsX86 && Name.startswith("avx512.mask.psub.")) {
2652       Rep = Builder.CreateSub(CI->getArgOperand(0), CI->getArgOperand(1));
2653       Rep = EmitX86Select(Builder, CI->getArgOperand(3), Rep,
2654                           CI->getArgOperand(2));
2655     } else if (IsX86 && Name.startswith("avx512.mask.pmull.")) {
2656       Rep = Builder.CreateMul(CI->getArgOperand(0), CI->getArgOperand(1));
2657       Rep = EmitX86Select(Builder, CI->getArgOperand(3), Rep,
2658                           CI->getArgOperand(2));
2659     } else if (IsX86 && Name.startswith("avx512.mask.add.p")) {
2660       if (Name.endswith(".512")) {
2661         Intrinsic::ID IID;
2662         if (Name[17] == 's')
2663           IID = Intrinsic::x86_avx512_add_ps_512;
2664         else
2665           IID = Intrinsic::x86_avx512_add_pd_512;
2666 
2667         Rep = Builder.CreateCall(Intrinsic::getDeclaration(F->getParent(), IID),
2668                                  { CI->getArgOperand(0), CI->getArgOperand(1),
2669                                    CI->getArgOperand(4) });
2670       } else {
2671         Rep = Builder.CreateFAdd(CI->getArgOperand(0), CI->getArgOperand(1));
2672       }
2673       Rep = EmitX86Select(Builder, CI->getArgOperand(3), Rep,
2674                           CI->getArgOperand(2));
2675     } else if (IsX86 && Name.startswith("avx512.mask.div.p")) {
2676       if (Name.endswith(".512")) {
2677         Intrinsic::ID IID;
2678         if (Name[17] == 's')
2679           IID = Intrinsic::x86_avx512_div_ps_512;
2680         else
2681           IID = Intrinsic::x86_avx512_div_pd_512;
2682 
2683         Rep = Builder.CreateCall(Intrinsic::getDeclaration(F->getParent(), IID),
2684                                  { CI->getArgOperand(0), CI->getArgOperand(1),
2685                                    CI->getArgOperand(4) });
2686       } else {
2687         Rep = Builder.CreateFDiv(CI->getArgOperand(0), CI->getArgOperand(1));
2688       }
2689       Rep = EmitX86Select(Builder, CI->getArgOperand(3), Rep,
2690                           CI->getArgOperand(2));
2691     } else if (IsX86 && Name.startswith("avx512.mask.mul.p")) {
2692       if (Name.endswith(".512")) {
2693         Intrinsic::ID IID;
2694         if (Name[17] == 's')
2695           IID = Intrinsic::x86_avx512_mul_ps_512;
2696         else
2697           IID = Intrinsic::x86_avx512_mul_pd_512;
2698 
2699         Rep = Builder.CreateCall(Intrinsic::getDeclaration(F->getParent(), IID),
2700                                  { CI->getArgOperand(0), CI->getArgOperand(1),
2701                                    CI->getArgOperand(4) });
2702       } else {
2703         Rep = Builder.CreateFMul(CI->getArgOperand(0), CI->getArgOperand(1));
2704       }
2705       Rep = EmitX86Select(Builder, CI->getArgOperand(3), Rep,
2706                           CI->getArgOperand(2));
2707     } else if (IsX86 && Name.startswith("avx512.mask.sub.p")) {
2708       if (Name.endswith(".512")) {
2709         Intrinsic::ID IID;
2710         if (Name[17] == 's')
2711           IID = Intrinsic::x86_avx512_sub_ps_512;
2712         else
2713           IID = Intrinsic::x86_avx512_sub_pd_512;
2714 
2715         Rep = Builder.CreateCall(Intrinsic::getDeclaration(F->getParent(), IID),
2716                                  { CI->getArgOperand(0), CI->getArgOperand(1),
2717                                    CI->getArgOperand(4) });
2718       } else {
2719         Rep = Builder.CreateFSub(CI->getArgOperand(0), CI->getArgOperand(1));
2720       }
2721       Rep = EmitX86Select(Builder, CI->getArgOperand(3), Rep,
2722                           CI->getArgOperand(2));
2723     } else if (IsX86 && (Name.startswith("avx512.mask.max.p") ||
2724                          Name.startswith("avx512.mask.min.p")) &&
2725                Name.drop_front(18) == ".512") {
2726       bool IsDouble = Name[17] == 'd';
2727       bool IsMin = Name[13] == 'i';
2728       static const Intrinsic::ID MinMaxTbl[2][2] = {
2729         { Intrinsic::x86_avx512_max_ps_512, Intrinsic::x86_avx512_max_pd_512 },
2730         { Intrinsic::x86_avx512_min_ps_512, Intrinsic::x86_avx512_min_pd_512 }
2731       };
2732       Intrinsic::ID IID = MinMaxTbl[IsMin][IsDouble];
2733 
2734       Rep = Builder.CreateCall(Intrinsic::getDeclaration(F->getParent(), IID),
2735                                { CI->getArgOperand(0), CI->getArgOperand(1),
2736                                  CI->getArgOperand(4) });
2737       Rep = EmitX86Select(Builder, CI->getArgOperand(3), Rep,
2738                           CI->getArgOperand(2));
2739     } else if (IsX86 && Name.startswith("avx512.mask.lzcnt.")) {
2740       Rep = Builder.CreateCall(Intrinsic::getDeclaration(F->getParent(),
2741                                                          Intrinsic::ctlz,
2742                                                          CI->getType()),
2743                                { CI->getArgOperand(0), Builder.getInt1(false) });
2744       Rep = EmitX86Select(Builder, CI->getArgOperand(2), Rep,
2745                           CI->getArgOperand(1));
2746     } else if (IsX86 && Name.startswith("avx512.mask.psll")) {
2747       bool IsImmediate = Name[16] == 'i' ||
2748                          (Name.size() > 18 && Name[18] == 'i');
2749       bool IsVariable = Name[16] == 'v';
2750       char Size = Name[16] == '.' ? Name[17] :
2751                   Name[17] == '.' ? Name[18] :
2752                   Name[18] == '.' ? Name[19] :
2753                                     Name[20];
2754 
2755       Intrinsic::ID IID;
2756       if (IsVariable && Name[17] != '.') {
2757         if (Size == 'd' && Name[17] == '2') // avx512.mask.psllv2.di
2758           IID = Intrinsic::x86_avx2_psllv_q;
2759         else if (Size == 'd' && Name[17] == '4') // avx512.mask.psllv4.di
2760           IID = Intrinsic::x86_avx2_psllv_q_256;
2761         else if (Size == 's' && Name[17] == '4') // avx512.mask.psllv4.si
2762           IID = Intrinsic::x86_avx2_psllv_d;
2763         else if (Size == 's' && Name[17] == '8') // avx512.mask.psllv8.si
2764           IID = Intrinsic::x86_avx2_psllv_d_256;
2765         else if (Size == 'h' && Name[17] == '8') // avx512.mask.psllv8.hi
2766           IID = Intrinsic::x86_avx512_psllv_w_128;
2767         else if (Size == 'h' && Name[17] == '1') // avx512.mask.psllv16.hi
2768           IID = Intrinsic::x86_avx512_psllv_w_256;
2769         else if (Name[17] == '3' && Name[18] == '2') // avx512.mask.psllv32hi
2770           IID = Intrinsic::x86_avx512_psllv_w_512;
2771         else
2772           llvm_unreachable("Unexpected size");
2773       } else if (Name.endswith(".128")) {
2774         if (Size == 'd') // avx512.mask.psll.d.128, avx512.mask.psll.di.128
2775           IID = IsImmediate ? Intrinsic::x86_sse2_pslli_d
2776                             : Intrinsic::x86_sse2_psll_d;
2777         else if (Size == 'q') // avx512.mask.psll.q.128, avx512.mask.psll.qi.128
2778           IID = IsImmediate ? Intrinsic::x86_sse2_pslli_q
2779                             : Intrinsic::x86_sse2_psll_q;
2780         else if (Size == 'w') // avx512.mask.psll.w.128, avx512.mask.psll.wi.128
2781           IID = IsImmediate ? Intrinsic::x86_sse2_pslli_w
2782                             : Intrinsic::x86_sse2_psll_w;
2783         else
2784           llvm_unreachable("Unexpected size");
2785       } else if (Name.endswith(".256")) {
2786         if (Size == 'd') // avx512.mask.psll.d.256, avx512.mask.psll.di.256
2787           IID = IsImmediate ? Intrinsic::x86_avx2_pslli_d
2788                             : Intrinsic::x86_avx2_psll_d;
2789         else if (Size == 'q') // avx512.mask.psll.q.256, avx512.mask.psll.qi.256
2790           IID = IsImmediate ? Intrinsic::x86_avx2_pslli_q
2791                             : Intrinsic::x86_avx2_psll_q;
2792         else if (Size == 'w') // avx512.mask.psll.w.256, avx512.mask.psll.wi.256
2793           IID = IsImmediate ? Intrinsic::x86_avx2_pslli_w
2794                             : Intrinsic::x86_avx2_psll_w;
2795         else
2796           llvm_unreachable("Unexpected size");
2797       } else {
2798         if (Size == 'd') // psll.di.512, pslli.d, psll.d, psllv.d.512
2799           IID = IsImmediate ? Intrinsic::x86_avx512_pslli_d_512 :
2800                 IsVariable  ? Intrinsic::x86_avx512_psllv_d_512 :
2801                               Intrinsic::x86_avx512_psll_d_512;
2802         else if (Size == 'q') // psll.qi.512, pslli.q, psll.q, psllv.q.512
2803           IID = IsImmediate ? Intrinsic::x86_avx512_pslli_q_512 :
2804                 IsVariable  ? Intrinsic::x86_avx512_psllv_q_512 :
2805                               Intrinsic::x86_avx512_psll_q_512;
2806         else if (Size == 'w') // psll.wi.512, pslli.w, psll.w
2807           IID = IsImmediate ? Intrinsic::x86_avx512_pslli_w_512
2808                             : Intrinsic::x86_avx512_psll_w_512;
2809         else
2810           llvm_unreachable("Unexpected size");
2811       }
2812 
2813       Rep = UpgradeX86MaskedShift(Builder, *CI, IID);
2814     } else if (IsX86 && Name.startswith("avx512.mask.psrl")) {
2815       bool IsImmediate = Name[16] == 'i' ||
2816                          (Name.size() > 18 && Name[18] == 'i');
2817       bool IsVariable = Name[16] == 'v';
2818       char Size = Name[16] == '.' ? Name[17] :
2819                   Name[17] == '.' ? Name[18] :
2820                   Name[18] == '.' ? Name[19] :
2821                                     Name[20];
2822 
2823       Intrinsic::ID IID;
2824       if (IsVariable && Name[17] != '.') {
2825         if (Size == 'd' && Name[17] == '2') // avx512.mask.psrlv2.di
2826           IID = Intrinsic::x86_avx2_psrlv_q;
2827         else if (Size == 'd' && Name[17] == '4') // avx512.mask.psrlv4.di
2828           IID = Intrinsic::x86_avx2_psrlv_q_256;
2829         else if (Size == 's' && Name[17] == '4') // avx512.mask.psrlv4.si
2830           IID = Intrinsic::x86_avx2_psrlv_d;
2831         else if (Size == 's' && Name[17] == '8') // avx512.mask.psrlv8.si
2832           IID = Intrinsic::x86_avx2_psrlv_d_256;
2833         else if (Size == 'h' && Name[17] == '8') // avx512.mask.psrlv8.hi
2834           IID = Intrinsic::x86_avx512_psrlv_w_128;
2835         else if (Size == 'h' && Name[17] == '1') // avx512.mask.psrlv16.hi
2836           IID = Intrinsic::x86_avx512_psrlv_w_256;
2837         else if (Name[17] == '3' && Name[18] == '2') // avx512.mask.psrlv32hi
2838           IID = Intrinsic::x86_avx512_psrlv_w_512;
2839         else
2840           llvm_unreachable("Unexpected size");
2841       } else if (Name.endswith(".128")) {
2842         if (Size == 'd') // avx512.mask.psrl.d.128, avx512.mask.psrl.di.128
2843           IID = IsImmediate ? Intrinsic::x86_sse2_psrli_d
2844                             : Intrinsic::x86_sse2_psrl_d;
2845         else if (Size == 'q') // avx512.mask.psrl.q.128, avx512.mask.psrl.qi.128
2846           IID = IsImmediate ? Intrinsic::x86_sse2_psrli_q
2847                             : Intrinsic::x86_sse2_psrl_q;
2848         else if (Size == 'w') // avx512.mask.psrl.w.128, avx512.mask.psrl.wi.128
2849           IID = IsImmediate ? Intrinsic::x86_sse2_psrli_w
2850                             : Intrinsic::x86_sse2_psrl_w;
2851         else
2852           llvm_unreachable("Unexpected size");
2853       } else if (Name.endswith(".256")) {
2854         if (Size == 'd') // avx512.mask.psrl.d.256, avx512.mask.psrl.di.256
2855           IID = IsImmediate ? Intrinsic::x86_avx2_psrli_d
2856                             : Intrinsic::x86_avx2_psrl_d;
2857         else if (Size == 'q') // avx512.mask.psrl.q.256, avx512.mask.psrl.qi.256
2858           IID = IsImmediate ? Intrinsic::x86_avx2_psrli_q
2859                             : Intrinsic::x86_avx2_psrl_q;
2860         else if (Size == 'w') // avx512.mask.psrl.w.256, avx512.mask.psrl.wi.256
2861           IID = IsImmediate ? Intrinsic::x86_avx2_psrli_w
2862                             : Intrinsic::x86_avx2_psrl_w;
2863         else
2864           llvm_unreachable("Unexpected size");
2865       } else {
2866         if (Size == 'd') // psrl.di.512, psrli.d, psrl.d, psrl.d.512
2867           IID = IsImmediate ? Intrinsic::x86_avx512_psrli_d_512 :
2868                 IsVariable  ? Intrinsic::x86_avx512_psrlv_d_512 :
2869                               Intrinsic::x86_avx512_psrl_d_512;
2870         else if (Size == 'q') // psrl.qi.512, psrli.q, psrl.q, psrl.q.512
2871           IID = IsImmediate ? Intrinsic::x86_avx512_psrli_q_512 :
2872                 IsVariable  ? Intrinsic::x86_avx512_psrlv_q_512 :
2873                               Intrinsic::x86_avx512_psrl_q_512;
2874         else if (Size == 'w') // psrl.wi.512, psrli.w, psrl.w)
2875           IID = IsImmediate ? Intrinsic::x86_avx512_psrli_w_512
2876                             : Intrinsic::x86_avx512_psrl_w_512;
2877         else
2878           llvm_unreachable("Unexpected size");
2879       }
2880 
2881       Rep = UpgradeX86MaskedShift(Builder, *CI, IID);
2882     } else if (IsX86 && Name.startswith("avx512.mask.psra")) {
2883       bool IsImmediate = Name[16] == 'i' ||
2884                          (Name.size() > 18 && Name[18] == 'i');
2885       bool IsVariable = Name[16] == 'v';
2886       char Size = Name[16] == '.' ? Name[17] :
2887                   Name[17] == '.' ? Name[18] :
2888                   Name[18] == '.' ? Name[19] :
2889                                     Name[20];
2890 
2891       Intrinsic::ID IID;
2892       if (IsVariable && Name[17] != '.') {
2893         if (Size == 's' && Name[17] == '4') // avx512.mask.psrav4.si
2894           IID = Intrinsic::x86_avx2_psrav_d;
2895         else if (Size == 's' && Name[17] == '8') // avx512.mask.psrav8.si
2896           IID = Intrinsic::x86_avx2_psrav_d_256;
2897         else if (Size == 'h' && Name[17] == '8') // avx512.mask.psrav8.hi
2898           IID = Intrinsic::x86_avx512_psrav_w_128;
2899         else if (Size == 'h' && Name[17] == '1') // avx512.mask.psrav16.hi
2900           IID = Intrinsic::x86_avx512_psrav_w_256;
2901         else if (Name[17] == '3' && Name[18] == '2') // avx512.mask.psrav32hi
2902           IID = Intrinsic::x86_avx512_psrav_w_512;
2903         else
2904           llvm_unreachable("Unexpected size");
2905       } else if (Name.endswith(".128")) {
2906         if (Size == 'd') // avx512.mask.psra.d.128, avx512.mask.psra.di.128
2907           IID = IsImmediate ? Intrinsic::x86_sse2_psrai_d
2908                             : Intrinsic::x86_sse2_psra_d;
2909         else if (Size == 'q') // avx512.mask.psra.q.128, avx512.mask.psra.qi.128
2910           IID = IsImmediate ? Intrinsic::x86_avx512_psrai_q_128 :
2911                 IsVariable  ? Intrinsic::x86_avx512_psrav_q_128 :
2912                               Intrinsic::x86_avx512_psra_q_128;
2913         else if (Size == 'w') // avx512.mask.psra.w.128, avx512.mask.psra.wi.128
2914           IID = IsImmediate ? Intrinsic::x86_sse2_psrai_w
2915                             : Intrinsic::x86_sse2_psra_w;
2916         else
2917           llvm_unreachable("Unexpected size");
2918       } else if (Name.endswith(".256")) {
2919         if (Size == 'd') // avx512.mask.psra.d.256, avx512.mask.psra.di.256
2920           IID = IsImmediate ? Intrinsic::x86_avx2_psrai_d
2921                             : Intrinsic::x86_avx2_psra_d;
2922         else if (Size == 'q') // avx512.mask.psra.q.256, avx512.mask.psra.qi.256
2923           IID = IsImmediate ? Intrinsic::x86_avx512_psrai_q_256 :
2924                 IsVariable  ? Intrinsic::x86_avx512_psrav_q_256 :
2925                               Intrinsic::x86_avx512_psra_q_256;
2926         else if (Size == 'w') // avx512.mask.psra.w.256, avx512.mask.psra.wi.256
2927           IID = IsImmediate ? Intrinsic::x86_avx2_psrai_w
2928                             : Intrinsic::x86_avx2_psra_w;
2929         else
2930           llvm_unreachable("Unexpected size");
2931       } else {
2932         if (Size == 'd') // psra.di.512, psrai.d, psra.d, psrav.d.512
2933           IID = IsImmediate ? Intrinsic::x86_avx512_psrai_d_512 :
2934                 IsVariable  ? Intrinsic::x86_avx512_psrav_d_512 :
2935                               Intrinsic::x86_avx512_psra_d_512;
2936         else if (Size == 'q') // psra.qi.512, psrai.q, psra.q
2937           IID = IsImmediate ? Intrinsic::x86_avx512_psrai_q_512 :
2938                 IsVariable  ? Intrinsic::x86_avx512_psrav_q_512 :
2939                               Intrinsic::x86_avx512_psra_q_512;
2940         else if (Size == 'w') // psra.wi.512, psrai.w, psra.w
2941           IID = IsImmediate ? Intrinsic::x86_avx512_psrai_w_512
2942                             : Intrinsic::x86_avx512_psra_w_512;
2943         else
2944           llvm_unreachable("Unexpected size");
2945       }
2946 
2947       Rep = UpgradeX86MaskedShift(Builder, *CI, IID);
2948     } else if (IsX86 && Name.startswith("avx512.mask.move.s")) {
2949       Rep = upgradeMaskedMove(Builder, *CI);
2950     } else if (IsX86 && Name.startswith("avx512.cvtmask2")) {
2951       Rep = UpgradeMaskToInt(Builder, *CI);
2952     } else if (IsX86 && Name.endswith(".movntdqa")) {
2953       Module *M = F->getParent();
2954       MDNode *Node = MDNode::get(
2955           C, ConstantAsMetadata::get(ConstantInt::get(Type::getInt32Ty(C), 1)));
2956 
2957       Value *Ptr = CI->getArgOperand(0);
2958       VectorType *VTy = cast<VectorType>(CI->getType());
2959 
2960       // Convert the type of the pointer to a pointer to the stored type.
2961       Value *BC =
2962           Builder.CreateBitCast(Ptr, PointerType::getUnqual(VTy), "cast");
2963       LoadInst *LI = Builder.CreateAlignedLoad(VTy, BC, VTy->getBitWidth() / 8);
2964       LI->setMetadata(M->getMDKindID("nontemporal"), Node);
2965       Rep = LI;
2966     } else if (IsX86 &&
2967                (Name.startswith("sse2.pavg") || Name.startswith("avx2.pavg") ||
2968                 Name.startswith("avx512.mask.pavg"))) {
2969       // llvm.x86.sse2.pavg.b/w, llvm.x86.avx2.pavg.b/w,
2970       // llvm.x86.avx512.mask.pavg.b/w
2971       Value *A = CI->getArgOperand(0);
2972       Value *B = CI->getArgOperand(1);
2973       VectorType *ZextType = VectorType::getExtendedElementVectorType(
2974           cast<VectorType>(A->getType()));
2975       Value *ExtendedA = Builder.CreateZExt(A, ZextType);
2976       Value *ExtendedB = Builder.CreateZExt(B, ZextType);
2977       Value *Sum = Builder.CreateAdd(ExtendedA, ExtendedB);
2978       Value *AddOne = Builder.CreateAdd(Sum, ConstantInt::get(ZextType, 1));
2979       Value *ShiftR = Builder.CreateLShr(AddOne, ConstantInt::get(ZextType, 1));
2980       Rep = Builder.CreateTrunc(ShiftR, A->getType());
2981       if (CI->getNumArgOperands() > 2) {
2982         Rep = EmitX86Select(Builder, CI->getArgOperand(3), Rep,
2983                             CI->getArgOperand(2));
2984       }
2985     } else if (IsX86 && (Name.startswith("fma.vfmadd.") ||
2986                          Name.startswith("fma.vfmsub.") ||
2987                          Name.startswith("fma.vfnmadd.") ||
2988                          Name.startswith("fma.vfnmsub."))) {
2989       bool NegMul = Name[6] == 'n';
2990       bool NegAcc = NegMul ? Name[8] == 's' : Name[7] == 's';
2991       bool IsScalar = NegMul ? Name[12] == 's' : Name[11] == 's';
2992 
2993       Value *Ops[] = { CI->getArgOperand(0), CI->getArgOperand(1),
2994                        CI->getArgOperand(2) };
2995 
2996       if (IsScalar) {
2997         Ops[0] = Builder.CreateExtractElement(Ops[0], (uint64_t)0);
2998         Ops[1] = Builder.CreateExtractElement(Ops[1], (uint64_t)0);
2999         Ops[2] = Builder.CreateExtractElement(Ops[2], (uint64_t)0);
3000       }
3001 
3002       if (NegMul && !IsScalar)
3003         Ops[0] = Builder.CreateFNeg(Ops[0]);
3004       if (NegMul && IsScalar)
3005         Ops[1] = Builder.CreateFNeg(Ops[1]);
3006       if (NegAcc)
3007         Ops[2] = Builder.CreateFNeg(Ops[2]);
3008 
3009       Rep = Builder.CreateCall(Intrinsic::getDeclaration(CI->getModule(),
3010                                                          Intrinsic::fma,
3011                                                          Ops[0]->getType()),
3012                                Ops);
3013 
3014       if (IsScalar)
3015         Rep = Builder.CreateInsertElement(CI->getArgOperand(0), Rep,
3016                                           (uint64_t)0);
3017     } else if (IsX86 && Name.startswith("fma4.vfmadd.s")) {
3018       Value *Ops[] = { CI->getArgOperand(0), CI->getArgOperand(1),
3019                        CI->getArgOperand(2) };
3020 
3021       Ops[0] = Builder.CreateExtractElement(Ops[0], (uint64_t)0);
3022       Ops[1] = Builder.CreateExtractElement(Ops[1], (uint64_t)0);
3023       Ops[2] = Builder.CreateExtractElement(Ops[2], (uint64_t)0);
3024 
3025       Rep = Builder.CreateCall(Intrinsic::getDeclaration(CI->getModule(),
3026                                                          Intrinsic::fma,
3027                                                          Ops[0]->getType()),
3028                                Ops);
3029 
3030       Rep = Builder.CreateInsertElement(Constant::getNullValue(CI->getType()),
3031                                         Rep, (uint64_t)0);
3032     } else if (IsX86 && (Name.startswith("avx512.mask.vfmadd.s") ||
3033                          Name.startswith("avx512.maskz.vfmadd.s") ||
3034                          Name.startswith("avx512.mask3.vfmadd.s") ||
3035                          Name.startswith("avx512.mask3.vfmsub.s") ||
3036                          Name.startswith("avx512.mask3.vfnmsub.s"))) {
3037       bool IsMask3 = Name[11] == '3';
3038       bool IsMaskZ = Name[11] == 'z';
3039       // Drop the "avx512.mask." to make it easier.
3040       Name = Name.drop_front(IsMask3 || IsMaskZ ? 13 : 12);
3041       bool NegMul = Name[2] == 'n';
3042       bool NegAcc = NegMul ? Name[4] == 's' : Name[3] == 's';
3043 
3044       Value *A = CI->getArgOperand(0);
3045       Value *B = CI->getArgOperand(1);
3046       Value *C = CI->getArgOperand(2);
3047 
3048       if (NegMul && (IsMask3 || IsMaskZ))
3049         A = Builder.CreateFNeg(A);
3050       if (NegMul && !(IsMask3 || IsMaskZ))
3051         B = Builder.CreateFNeg(B);
3052       if (NegAcc)
3053         C = Builder.CreateFNeg(C);
3054 
3055       A = Builder.CreateExtractElement(A, (uint64_t)0);
3056       B = Builder.CreateExtractElement(B, (uint64_t)0);
3057       C = Builder.CreateExtractElement(C, (uint64_t)0);
3058 
3059       if (!isa<ConstantInt>(CI->getArgOperand(4)) ||
3060           cast<ConstantInt>(CI->getArgOperand(4))->getZExtValue() != 4) {
3061         Value *Ops[] = { A, B, C, CI->getArgOperand(4) };
3062 
3063         Intrinsic::ID IID;
3064         if (Name.back() == 'd')
3065           IID = Intrinsic::x86_avx512_vfmadd_f64;
3066         else
3067           IID = Intrinsic::x86_avx512_vfmadd_f32;
3068         Function *FMA = Intrinsic::getDeclaration(CI->getModule(), IID);
3069         Rep = Builder.CreateCall(FMA, Ops);
3070       } else {
3071         Function *FMA = Intrinsic::getDeclaration(CI->getModule(),
3072                                                   Intrinsic::fma,
3073                                                   A->getType());
3074         Rep = Builder.CreateCall(FMA, { A, B, C });
3075       }
3076 
3077       Value *PassThru = IsMaskZ ? Constant::getNullValue(Rep->getType()) :
3078                         IsMask3 ? C : A;
3079 
3080       // For Mask3 with NegAcc, we need to create a new extractelement that
3081       // avoids the negation above.
3082       if (NegAcc && IsMask3)
3083         PassThru = Builder.CreateExtractElement(CI->getArgOperand(2),
3084                                                 (uint64_t)0);
3085 
3086       Rep = EmitX86ScalarSelect(Builder, CI->getArgOperand(3),
3087                                 Rep, PassThru);
3088       Rep = Builder.CreateInsertElement(CI->getArgOperand(IsMask3 ? 2 : 0),
3089                                         Rep, (uint64_t)0);
3090     } else if (IsX86 && (Name.startswith("avx512.mask.vfmadd.p") ||
3091                          Name.startswith("avx512.mask.vfnmadd.p") ||
3092                          Name.startswith("avx512.mask.vfnmsub.p") ||
3093                          Name.startswith("avx512.mask3.vfmadd.p") ||
3094                          Name.startswith("avx512.mask3.vfmsub.p") ||
3095                          Name.startswith("avx512.mask3.vfnmsub.p") ||
3096                          Name.startswith("avx512.maskz.vfmadd.p"))) {
3097       bool IsMask3 = Name[11] == '3';
3098       bool IsMaskZ = Name[11] == 'z';
3099       // Drop the "avx512.mask." to make it easier.
3100       Name = Name.drop_front(IsMask3 || IsMaskZ ? 13 : 12);
3101       bool NegMul = Name[2] == 'n';
3102       bool NegAcc = NegMul ? Name[4] == 's' : Name[3] == 's';
3103 
3104       Value *A = CI->getArgOperand(0);
3105       Value *B = CI->getArgOperand(1);
3106       Value *C = CI->getArgOperand(2);
3107 
3108       if (NegMul && (IsMask3 || IsMaskZ))
3109         A = Builder.CreateFNeg(A);
3110       if (NegMul && !(IsMask3 || IsMaskZ))
3111         B = Builder.CreateFNeg(B);
3112       if (NegAcc)
3113         C = Builder.CreateFNeg(C);
3114 
3115       if (CI->getNumArgOperands() == 5 &&
3116           (!isa<ConstantInt>(CI->getArgOperand(4)) ||
3117            cast<ConstantInt>(CI->getArgOperand(4))->getZExtValue() != 4)) {
3118         Intrinsic::ID IID;
3119         // Check the character before ".512" in string.
3120         if (Name[Name.size()-5] == 's')
3121           IID = Intrinsic::x86_avx512_vfmadd_ps_512;
3122         else
3123           IID = Intrinsic::x86_avx512_vfmadd_pd_512;
3124 
3125         Rep = Builder.CreateCall(Intrinsic::getDeclaration(F->getParent(), IID),
3126                                  { A, B, C, CI->getArgOperand(4) });
3127       } else {
3128         Function *FMA = Intrinsic::getDeclaration(CI->getModule(),
3129                                                   Intrinsic::fma,
3130                                                   A->getType());
3131         Rep = Builder.CreateCall(FMA, { A, B, C });
3132       }
3133 
3134       Value *PassThru = IsMaskZ ? llvm::Constant::getNullValue(CI->getType()) :
3135                         IsMask3 ? CI->getArgOperand(2) :
3136                                   CI->getArgOperand(0);
3137 
3138       Rep = EmitX86Select(Builder, CI->getArgOperand(3), Rep, PassThru);
3139     } else if (IsX86 && (Name.startswith("fma.vfmaddsub.p") ||
3140                          Name.startswith("fma.vfmsubadd.p"))) {
3141       bool IsSubAdd = Name[7] == 's';
3142       int NumElts = CI->getType()->getVectorNumElements();
3143 
3144       Value *Ops[] = { CI->getArgOperand(0), CI->getArgOperand(1),
3145                        CI->getArgOperand(2) };
3146 
3147       Function *FMA = Intrinsic::getDeclaration(CI->getModule(), Intrinsic::fma,
3148                                                 Ops[0]->getType());
3149       Value *Odd = Builder.CreateCall(FMA, Ops);
3150       Ops[2] = Builder.CreateFNeg(Ops[2]);
3151       Value *Even = Builder.CreateCall(FMA, Ops);
3152 
3153       if (IsSubAdd)
3154         std::swap(Even, Odd);
3155 
3156       SmallVector<uint32_t, 32> Idxs(NumElts);
3157       for (int i = 0; i != NumElts; ++i)
3158         Idxs[i] = i + (i % 2) * NumElts;
3159 
3160       Rep = Builder.CreateShuffleVector(Even, Odd, Idxs);
3161     } else if (IsX86 && (Name.startswith("avx512.mask.vfmaddsub.p") ||
3162                          Name.startswith("avx512.mask3.vfmaddsub.p") ||
3163                          Name.startswith("avx512.maskz.vfmaddsub.p") ||
3164                          Name.startswith("avx512.mask3.vfmsubadd.p"))) {
3165       bool IsMask3 = Name[11] == '3';
3166       bool IsMaskZ = Name[11] == 'z';
3167       // Drop the "avx512.mask." to make it easier.
3168       Name = Name.drop_front(IsMask3 || IsMaskZ ? 13 : 12);
3169       bool IsSubAdd = Name[3] == 's';
3170       if (CI->getNumArgOperands() == 5 &&
3171           (!isa<ConstantInt>(CI->getArgOperand(4)) ||
3172            cast<ConstantInt>(CI->getArgOperand(4))->getZExtValue() != 4)) {
3173         Intrinsic::ID IID;
3174         // Check the character before ".512" in string.
3175         if (Name[Name.size()-5] == 's')
3176           IID = Intrinsic::x86_avx512_vfmaddsub_ps_512;
3177         else
3178           IID = Intrinsic::x86_avx512_vfmaddsub_pd_512;
3179 
3180         Value *Ops[] = { CI->getArgOperand(0), CI->getArgOperand(1),
3181                          CI->getArgOperand(2), CI->getArgOperand(4) };
3182         if (IsSubAdd)
3183           Ops[2] = Builder.CreateFNeg(Ops[2]);
3184 
3185         Rep = Builder.CreateCall(Intrinsic::getDeclaration(F->getParent(), IID),
3186                                  {CI->getArgOperand(0), CI->getArgOperand(1),
3187                                   CI->getArgOperand(2), CI->getArgOperand(4)});
3188       } else {
3189         int NumElts = CI->getType()->getVectorNumElements();
3190 
3191         Value *Ops[] = { CI->getArgOperand(0), CI->getArgOperand(1),
3192                          CI->getArgOperand(2) };
3193 
3194         Function *FMA = Intrinsic::getDeclaration(CI->getModule(), Intrinsic::fma,
3195                                                   Ops[0]->getType());
3196         Value *Odd = Builder.CreateCall(FMA, Ops);
3197         Ops[2] = Builder.CreateFNeg(Ops[2]);
3198         Value *Even = Builder.CreateCall(FMA, Ops);
3199 
3200         if (IsSubAdd)
3201           std::swap(Even, Odd);
3202 
3203         SmallVector<uint32_t, 32> Idxs(NumElts);
3204         for (int i = 0; i != NumElts; ++i)
3205           Idxs[i] = i + (i % 2) * NumElts;
3206 
3207         Rep = Builder.CreateShuffleVector(Even, Odd, Idxs);
3208       }
3209 
3210       Value *PassThru = IsMaskZ ? llvm::Constant::getNullValue(CI->getType()) :
3211                         IsMask3 ? CI->getArgOperand(2) :
3212                                   CI->getArgOperand(0);
3213 
3214       Rep = EmitX86Select(Builder, CI->getArgOperand(3), Rep, PassThru);
3215     } else if (IsX86 && (Name.startswith("avx512.mask.pternlog.") ||
3216                          Name.startswith("avx512.maskz.pternlog."))) {
3217       bool ZeroMask = Name[11] == 'z';
3218       unsigned VecWidth = CI->getType()->getPrimitiveSizeInBits();
3219       unsigned EltWidth = CI->getType()->getScalarSizeInBits();
3220       Intrinsic::ID IID;
3221       if (VecWidth == 128 && EltWidth == 32)
3222         IID = Intrinsic::x86_avx512_pternlog_d_128;
3223       else if (VecWidth == 256 && EltWidth == 32)
3224         IID = Intrinsic::x86_avx512_pternlog_d_256;
3225       else if (VecWidth == 512 && EltWidth == 32)
3226         IID = Intrinsic::x86_avx512_pternlog_d_512;
3227       else if (VecWidth == 128 && EltWidth == 64)
3228         IID = Intrinsic::x86_avx512_pternlog_q_128;
3229       else if (VecWidth == 256 && EltWidth == 64)
3230         IID = Intrinsic::x86_avx512_pternlog_q_256;
3231       else if (VecWidth == 512 && EltWidth == 64)
3232         IID = Intrinsic::x86_avx512_pternlog_q_512;
3233       else
3234         llvm_unreachable("Unexpected intrinsic");
3235 
3236       Value *Args[] = { CI->getArgOperand(0) , CI->getArgOperand(1),
3237                         CI->getArgOperand(2), CI->getArgOperand(3) };
3238       Rep = Builder.CreateCall(Intrinsic::getDeclaration(CI->getModule(), IID),
3239                                Args);
3240       Value *PassThru = ZeroMask ? ConstantAggregateZero::get(CI->getType())
3241                                  : CI->getArgOperand(0);
3242       Rep = EmitX86Select(Builder, CI->getArgOperand(4), Rep, PassThru);
3243     } else if (IsX86 && (Name.startswith("avx512.mask.vpmadd52") ||
3244                          Name.startswith("avx512.maskz.vpmadd52"))) {
3245       bool ZeroMask = Name[11] == 'z';
3246       bool High = Name[20] == 'h' || Name[21] == 'h';
3247       unsigned VecWidth = CI->getType()->getPrimitiveSizeInBits();
3248       Intrinsic::ID IID;
3249       if (VecWidth == 128 && !High)
3250         IID = Intrinsic::x86_avx512_vpmadd52l_uq_128;
3251       else if (VecWidth == 256 && !High)
3252         IID = Intrinsic::x86_avx512_vpmadd52l_uq_256;
3253       else if (VecWidth == 512 && !High)
3254         IID = Intrinsic::x86_avx512_vpmadd52l_uq_512;
3255       else if (VecWidth == 128 && High)
3256         IID = Intrinsic::x86_avx512_vpmadd52h_uq_128;
3257       else if (VecWidth == 256 && High)
3258         IID = Intrinsic::x86_avx512_vpmadd52h_uq_256;
3259       else if (VecWidth == 512 && High)
3260         IID = Intrinsic::x86_avx512_vpmadd52h_uq_512;
3261       else
3262         llvm_unreachable("Unexpected intrinsic");
3263 
3264       Value *Args[] = { CI->getArgOperand(0) , CI->getArgOperand(1),
3265                         CI->getArgOperand(2) };
3266       Rep = Builder.CreateCall(Intrinsic::getDeclaration(CI->getModule(), IID),
3267                                Args);
3268       Value *PassThru = ZeroMask ? ConstantAggregateZero::get(CI->getType())
3269                                  : CI->getArgOperand(0);
3270       Rep = EmitX86Select(Builder, CI->getArgOperand(3), Rep, PassThru);
3271     } else if (IsX86 && (Name.startswith("avx512.mask.vpermi2var.") ||
3272                          Name.startswith("avx512.mask.vpermt2var.") ||
3273                          Name.startswith("avx512.maskz.vpermt2var."))) {
3274       bool ZeroMask = Name[11] == 'z';
3275       bool IndexForm = Name[17] == 'i';
3276       Rep = UpgradeX86VPERMT2Intrinsics(Builder, *CI, ZeroMask, IndexForm);
3277     } else if (IsX86 && (Name.startswith("avx512.mask.vpdpbusd.") ||
3278                          Name.startswith("avx512.maskz.vpdpbusd.") ||
3279                          Name.startswith("avx512.mask.vpdpbusds.") ||
3280                          Name.startswith("avx512.maskz.vpdpbusds."))) {
3281       bool ZeroMask = Name[11] == 'z';
3282       bool IsSaturating = Name[ZeroMask ? 21 : 20] == 's';
3283       unsigned VecWidth = CI->getType()->getPrimitiveSizeInBits();
3284       Intrinsic::ID IID;
3285       if (VecWidth == 128 && !IsSaturating)
3286         IID = Intrinsic::x86_avx512_vpdpbusd_128;
3287       else if (VecWidth == 256 && !IsSaturating)
3288         IID = Intrinsic::x86_avx512_vpdpbusd_256;
3289       else if (VecWidth == 512 && !IsSaturating)
3290         IID = Intrinsic::x86_avx512_vpdpbusd_512;
3291       else if (VecWidth == 128 && IsSaturating)
3292         IID = Intrinsic::x86_avx512_vpdpbusds_128;
3293       else if (VecWidth == 256 && IsSaturating)
3294         IID = Intrinsic::x86_avx512_vpdpbusds_256;
3295       else if (VecWidth == 512 && IsSaturating)
3296         IID = Intrinsic::x86_avx512_vpdpbusds_512;
3297       else
3298         llvm_unreachable("Unexpected intrinsic");
3299 
3300       Value *Args[] = { CI->getArgOperand(0), CI->getArgOperand(1),
3301                         CI->getArgOperand(2)  };
3302       Rep = Builder.CreateCall(Intrinsic::getDeclaration(CI->getModule(), IID),
3303                                Args);
3304       Value *PassThru = ZeroMask ? ConstantAggregateZero::get(CI->getType())
3305                                  : CI->getArgOperand(0);
3306       Rep = EmitX86Select(Builder, CI->getArgOperand(3), Rep, PassThru);
3307     } else if (IsX86 && (Name.startswith("avx512.mask.vpdpwssd.") ||
3308                          Name.startswith("avx512.maskz.vpdpwssd.") ||
3309                          Name.startswith("avx512.mask.vpdpwssds.") ||
3310                          Name.startswith("avx512.maskz.vpdpwssds."))) {
3311       bool ZeroMask = Name[11] == 'z';
3312       bool IsSaturating = Name[ZeroMask ? 21 : 20] == 's';
3313       unsigned VecWidth = CI->getType()->getPrimitiveSizeInBits();
3314       Intrinsic::ID IID;
3315       if (VecWidth == 128 && !IsSaturating)
3316         IID = Intrinsic::x86_avx512_vpdpwssd_128;
3317       else if (VecWidth == 256 && !IsSaturating)
3318         IID = Intrinsic::x86_avx512_vpdpwssd_256;
3319       else if (VecWidth == 512 && !IsSaturating)
3320         IID = Intrinsic::x86_avx512_vpdpwssd_512;
3321       else if (VecWidth == 128 && IsSaturating)
3322         IID = Intrinsic::x86_avx512_vpdpwssds_128;
3323       else if (VecWidth == 256 && IsSaturating)
3324         IID = Intrinsic::x86_avx512_vpdpwssds_256;
3325       else if (VecWidth == 512 && IsSaturating)
3326         IID = Intrinsic::x86_avx512_vpdpwssds_512;
3327       else
3328         llvm_unreachable("Unexpected intrinsic");
3329 
3330       Value *Args[] = { CI->getArgOperand(0), CI->getArgOperand(1),
3331                         CI->getArgOperand(2)  };
3332       Rep = Builder.CreateCall(Intrinsic::getDeclaration(CI->getModule(), IID),
3333                                Args);
3334       Value *PassThru = ZeroMask ? ConstantAggregateZero::get(CI->getType())
3335                                  : CI->getArgOperand(0);
3336       Rep = EmitX86Select(Builder, CI->getArgOperand(3), Rep, PassThru);
3337     } else if (IsX86 && (Name == "addcarryx.u32" || Name == "addcarryx.u64" ||
3338                          Name == "addcarry.u32" || Name == "addcarry.u64" ||
3339                          Name == "subborrow.u32" || Name == "subborrow.u64")) {
3340       Intrinsic::ID IID;
3341       if (Name[0] == 'a' && Name.back() == '2')
3342         IID = Intrinsic::x86_addcarry_32;
3343       else if (Name[0] == 'a' && Name.back() == '4')
3344         IID = Intrinsic::x86_addcarry_64;
3345       else if (Name[0] == 's' && Name.back() == '2')
3346         IID = Intrinsic::x86_subborrow_32;
3347       else if (Name[0] == 's' && Name.back() == '4')
3348         IID = Intrinsic::x86_subborrow_64;
3349       else
3350         llvm_unreachable("Unexpected intrinsic");
3351 
3352       // Make a call with 3 operands.
3353       Value *Args[] = { CI->getArgOperand(0), CI->getArgOperand(1),
3354                         CI->getArgOperand(2)};
3355       Value *NewCall = Builder.CreateCall(
3356                                 Intrinsic::getDeclaration(CI->getModule(), IID),
3357                                 Args);
3358 
3359       // Extract the second result and store it.
3360       Value *Data = Builder.CreateExtractValue(NewCall, 1);
3361       // Cast the pointer to the right type.
3362       Value *Ptr = Builder.CreateBitCast(CI->getArgOperand(3),
3363                                  llvm::PointerType::getUnqual(Data->getType()));
3364       Builder.CreateAlignedStore(Data, Ptr, 1);
3365       // Replace the original call result with the first result of the new call.
3366       Value *CF = Builder.CreateExtractValue(NewCall, 0);
3367 
3368       CI->replaceAllUsesWith(CF);
3369       Rep = nullptr;
3370     } else if (IsX86 && Name.startswith("avx512.mask.") &&
3371                upgradeAVX512MaskToSelect(Name, Builder, *CI, Rep)) {
3372       // Rep will be updated by the call in the condition.
3373     } else if (IsNVVM && (Name == "abs.i" || Name == "abs.ll")) {
3374       Value *Arg = CI->getArgOperand(0);
3375       Value *Neg = Builder.CreateNeg(Arg, "neg");
3376       Value *Cmp = Builder.CreateICmpSGE(
3377           Arg, llvm::Constant::getNullValue(Arg->getType()), "abs.cond");
3378       Rep = Builder.CreateSelect(Cmp, Arg, Neg, "abs");
3379     } else if (IsNVVM && (Name == "max.i" || Name == "max.ll" ||
3380                           Name == "max.ui" || Name == "max.ull")) {
3381       Value *Arg0 = CI->getArgOperand(0);
3382       Value *Arg1 = CI->getArgOperand(1);
3383       Value *Cmp = Name.endswith(".ui") || Name.endswith(".ull")
3384                        ? Builder.CreateICmpUGE(Arg0, Arg1, "max.cond")
3385                        : Builder.CreateICmpSGE(Arg0, Arg1, "max.cond");
3386       Rep = Builder.CreateSelect(Cmp, Arg0, Arg1, "max");
3387     } else if (IsNVVM && (Name == "min.i" || Name == "min.ll" ||
3388                           Name == "min.ui" || Name == "min.ull")) {
3389       Value *Arg0 = CI->getArgOperand(0);
3390       Value *Arg1 = CI->getArgOperand(1);
3391       Value *Cmp = Name.endswith(".ui") || Name.endswith(".ull")
3392                        ? Builder.CreateICmpULE(Arg0, Arg1, "min.cond")
3393                        : Builder.CreateICmpSLE(Arg0, Arg1, "min.cond");
3394       Rep = Builder.CreateSelect(Cmp, Arg0, Arg1, "min");
3395     } else if (IsNVVM && Name == "clz.ll") {
3396       // llvm.nvvm.clz.ll returns an i32, but llvm.ctlz.i64 and returns an i64.
3397       Value *Arg = CI->getArgOperand(0);
3398       Value *Ctlz = Builder.CreateCall(
3399           Intrinsic::getDeclaration(F->getParent(), Intrinsic::ctlz,
3400                                     {Arg->getType()}),
3401           {Arg, Builder.getFalse()}, "ctlz");
3402       Rep = Builder.CreateTrunc(Ctlz, Builder.getInt32Ty(), "ctlz.trunc");
3403     } else if (IsNVVM && Name == "popc.ll") {
3404       // llvm.nvvm.popc.ll returns an i32, but llvm.ctpop.i64 and returns an
3405       // i64.
3406       Value *Arg = CI->getArgOperand(0);
3407       Value *Popc = Builder.CreateCall(
3408           Intrinsic::getDeclaration(F->getParent(), Intrinsic::ctpop,
3409                                     {Arg->getType()}),
3410           Arg, "ctpop");
3411       Rep = Builder.CreateTrunc(Popc, Builder.getInt32Ty(), "ctpop.trunc");
3412     } else if (IsNVVM && Name == "h2f") {
3413       Rep = Builder.CreateCall(Intrinsic::getDeclaration(
3414                                    F->getParent(), Intrinsic::convert_from_fp16,
3415                                    {Builder.getFloatTy()}),
3416                                CI->getArgOperand(0), "h2f");
3417     } else {
3418       llvm_unreachable("Unknown function for CallInst upgrade.");
3419     }
3420 
3421     if (Rep)
3422       CI->replaceAllUsesWith(Rep);
3423     CI->eraseFromParent();
3424     return;
3425   }
3426 
3427   const auto &DefaultCase = [&NewFn, &CI]() -> void {
3428     // Handle generic mangling change, but nothing else
3429     assert(
3430         (CI->getCalledFunction()->getName() != NewFn->getName()) &&
3431         "Unknown function for CallInst upgrade and isn't just a name change");
3432     CI->setCalledFunction(NewFn);
3433   };
3434   CallInst *NewCall = nullptr;
3435   switch (NewFn->getIntrinsicID()) {
3436   default: {
3437     DefaultCase();
3438     return;
3439   }
3440 
3441   case Intrinsic::arm_neon_vld1:
3442   case Intrinsic::arm_neon_vld2:
3443   case Intrinsic::arm_neon_vld3:
3444   case Intrinsic::arm_neon_vld4:
3445   case Intrinsic::arm_neon_vld2lane:
3446   case Intrinsic::arm_neon_vld3lane:
3447   case Intrinsic::arm_neon_vld4lane:
3448   case Intrinsic::arm_neon_vst1:
3449   case Intrinsic::arm_neon_vst2:
3450   case Intrinsic::arm_neon_vst3:
3451   case Intrinsic::arm_neon_vst4:
3452   case Intrinsic::arm_neon_vst2lane:
3453   case Intrinsic::arm_neon_vst3lane:
3454   case Intrinsic::arm_neon_vst4lane: {
3455     SmallVector<Value *, 4> Args(CI->arg_operands().begin(),
3456                                  CI->arg_operands().end());
3457     NewCall = Builder.CreateCall(NewFn, Args);
3458     break;
3459   }
3460 
3461   case Intrinsic::bitreverse:
3462     NewCall = Builder.CreateCall(NewFn, {CI->getArgOperand(0)});
3463     break;
3464 
3465   case Intrinsic::ctlz:
3466   case Intrinsic::cttz:
3467     assert(CI->getNumArgOperands() == 1 &&
3468            "Mismatch between function args and call args");
3469     NewCall =
3470         Builder.CreateCall(NewFn, {CI->getArgOperand(0), Builder.getFalse()});
3471     break;
3472 
3473   case Intrinsic::objectsize: {
3474     Value *NullIsUnknownSize = CI->getNumArgOperands() == 2
3475                                    ? Builder.getFalse()
3476                                    : CI->getArgOperand(2);
3477     Value *Dynamic =
3478         CI->getNumArgOperands() < 4 ? Builder.getFalse() : CI->getArgOperand(3);
3479     NewCall = Builder.CreateCall(
3480         NewFn, {CI->getArgOperand(0), CI->getArgOperand(1), NullIsUnknownSize, Dynamic});
3481     break;
3482   }
3483 
3484   case Intrinsic::ctpop:
3485     NewCall = Builder.CreateCall(NewFn, {CI->getArgOperand(0)});
3486     break;
3487 
3488   case Intrinsic::convert_from_fp16:
3489     NewCall = Builder.CreateCall(NewFn, {CI->getArgOperand(0)});
3490     break;
3491 
3492   case Intrinsic::dbg_value:
3493     // Upgrade from the old version that had an extra offset argument.
3494     assert(CI->getNumArgOperands() == 4);
3495     // Drop nonzero offsets instead of attempting to upgrade them.
3496     if (auto *Offset = dyn_cast_or_null<Constant>(CI->getArgOperand(1)))
3497       if (Offset->isZeroValue()) {
3498         NewCall = Builder.CreateCall(
3499             NewFn,
3500             {CI->getArgOperand(0), CI->getArgOperand(2), CI->getArgOperand(3)});
3501         break;
3502       }
3503     CI->eraseFromParent();
3504     return;
3505 
3506   case Intrinsic::x86_xop_vfrcz_ss:
3507   case Intrinsic::x86_xop_vfrcz_sd:
3508     NewCall = Builder.CreateCall(NewFn, {CI->getArgOperand(1)});
3509     break;
3510 
3511   case Intrinsic::x86_xop_vpermil2pd:
3512   case Intrinsic::x86_xop_vpermil2ps:
3513   case Intrinsic::x86_xop_vpermil2pd_256:
3514   case Intrinsic::x86_xop_vpermil2ps_256: {
3515     SmallVector<Value *, 4> Args(CI->arg_operands().begin(),
3516                                  CI->arg_operands().end());
3517     VectorType *FltIdxTy = cast<VectorType>(Args[2]->getType());
3518     VectorType *IntIdxTy = VectorType::getInteger(FltIdxTy);
3519     Args[2] = Builder.CreateBitCast(Args[2], IntIdxTy);
3520     NewCall = Builder.CreateCall(NewFn, Args);
3521     break;
3522   }
3523 
3524   case Intrinsic::x86_sse41_ptestc:
3525   case Intrinsic::x86_sse41_ptestz:
3526   case Intrinsic::x86_sse41_ptestnzc: {
3527     // The arguments for these intrinsics used to be v4f32, and changed
3528     // to v2i64. This is purely a nop, since those are bitwise intrinsics.
3529     // So, the only thing required is a bitcast for both arguments.
3530     // First, check the arguments have the old type.
3531     Value *Arg0 = CI->getArgOperand(0);
3532     if (Arg0->getType() != VectorType::get(Type::getFloatTy(C), 4))
3533       return;
3534 
3535     // Old intrinsic, add bitcasts
3536     Value *Arg1 = CI->getArgOperand(1);
3537 
3538     Type *NewVecTy = VectorType::get(Type::getInt64Ty(C), 2);
3539 
3540     Value *BC0 = Builder.CreateBitCast(Arg0, NewVecTy, "cast");
3541     Value *BC1 = Builder.CreateBitCast(Arg1, NewVecTy, "cast");
3542 
3543     NewCall = Builder.CreateCall(NewFn, {BC0, BC1});
3544     break;
3545   }
3546 
3547   case Intrinsic::x86_rdtscp: {
3548     // This used to take 1 arguments. If we have no arguments, it is already
3549     // upgraded.
3550     if (CI->getNumOperands() == 0)
3551       return;
3552 
3553     NewCall = Builder.CreateCall(NewFn);
3554     // Extract the second result and store it.
3555     Value *Data = Builder.CreateExtractValue(NewCall, 1);
3556     // Cast the pointer to the right type.
3557     Value *Ptr = Builder.CreateBitCast(CI->getArgOperand(0),
3558                                  llvm::PointerType::getUnqual(Data->getType()));
3559     Builder.CreateAlignedStore(Data, Ptr, 1);
3560     // Replace the original call result with the first result of the new call.
3561     Value *TSC = Builder.CreateExtractValue(NewCall, 0);
3562 
3563     std::string Name = CI->getName();
3564     if (!Name.empty()) {
3565       CI->setName(Name + ".old");
3566       NewCall->setName(Name);
3567     }
3568     CI->replaceAllUsesWith(TSC);
3569     CI->eraseFromParent();
3570     return;
3571   }
3572 
3573   case Intrinsic::x86_sse41_insertps:
3574   case Intrinsic::x86_sse41_dppd:
3575   case Intrinsic::x86_sse41_dpps:
3576   case Intrinsic::x86_sse41_mpsadbw:
3577   case Intrinsic::x86_avx_dp_ps_256:
3578   case Intrinsic::x86_avx2_mpsadbw: {
3579     // Need to truncate the last argument from i32 to i8 -- this argument models
3580     // an inherently 8-bit immediate operand to these x86 instructions.
3581     SmallVector<Value *, 4> Args(CI->arg_operands().begin(),
3582                                  CI->arg_operands().end());
3583 
3584     // Replace the last argument with a trunc.
3585     Args.back() = Builder.CreateTrunc(Args.back(), Type::getInt8Ty(C), "trunc");
3586     NewCall = Builder.CreateCall(NewFn, Args);
3587     break;
3588   }
3589 
3590   case Intrinsic::thread_pointer: {
3591     NewCall = Builder.CreateCall(NewFn, {});
3592     break;
3593   }
3594 
3595   case Intrinsic::invariant_start:
3596   case Intrinsic::invariant_end:
3597   case Intrinsic::masked_load:
3598   case Intrinsic::masked_store:
3599   case Intrinsic::masked_gather:
3600   case Intrinsic::masked_scatter: {
3601     SmallVector<Value *, 4> Args(CI->arg_operands().begin(),
3602                                  CI->arg_operands().end());
3603     NewCall = Builder.CreateCall(NewFn, Args);
3604     break;
3605   }
3606 
3607   case Intrinsic::memcpy:
3608   case Intrinsic::memmove:
3609   case Intrinsic::memset: {
3610     // We have to make sure that the call signature is what we're expecting.
3611     // We only want to change the old signatures by removing the alignment arg:
3612     //  @llvm.mem[cpy|move]...(i8*, i8*, i[32|i64], i32, i1)
3613     //    -> @llvm.mem[cpy|move]...(i8*, i8*, i[32|i64], i1)
3614     //  @llvm.memset...(i8*, i8, i[32|64], i32, i1)
3615     //    -> @llvm.memset...(i8*, i8, i[32|64], i1)
3616     // Note: i8*'s in the above can be any pointer type
3617     if (CI->getNumArgOperands() != 5) {
3618       DefaultCase();
3619       return;
3620     }
3621     // Remove alignment argument (3), and add alignment attributes to the
3622     // dest/src pointers.
3623     Value *Args[4] = {CI->getArgOperand(0), CI->getArgOperand(1),
3624                       CI->getArgOperand(2), CI->getArgOperand(4)};
3625     NewCall = Builder.CreateCall(NewFn, Args);
3626     auto *MemCI = cast<MemIntrinsic>(NewCall);
3627     // All mem intrinsics support dest alignment.
3628     const ConstantInt *Align = cast<ConstantInt>(CI->getArgOperand(3));
3629     MemCI->setDestAlignment(Align->getZExtValue());
3630     // Memcpy/Memmove also support source alignment.
3631     if (auto *MTI = dyn_cast<MemTransferInst>(MemCI))
3632       MTI->setSourceAlignment(Align->getZExtValue());
3633     break;
3634   }
3635   }
3636   assert(NewCall && "Should have either set this variable or returned through "
3637                     "the default case");
3638   std::string Name = CI->getName();
3639   if (!Name.empty()) {
3640     CI->setName(Name + ".old");
3641     NewCall->setName(Name);
3642   }
3643   CI->replaceAllUsesWith(NewCall);
3644   CI->eraseFromParent();
3645 }
3646 
3647 void llvm::UpgradeCallsToIntrinsic(Function *F) {
3648   assert(F && "Illegal attempt to upgrade a non-existent intrinsic.");
3649 
3650   // Check if this function should be upgraded and get the replacement function
3651   // if there is one.
3652   Function *NewFn;
3653   if (UpgradeIntrinsicFunction(F, NewFn)) {
3654     // Replace all users of the old function with the new function or new
3655     // instructions. This is not a range loop because the call is deleted.
3656     for (auto UI = F->user_begin(), UE = F->user_end(); UI != UE; )
3657       if (CallInst *CI = dyn_cast<CallInst>(*UI++))
3658         UpgradeIntrinsicCall(CI, NewFn);
3659 
3660     // Remove old function, no longer used, from the module.
3661     F->eraseFromParent();
3662   }
3663 }
3664 
3665 MDNode *llvm::UpgradeTBAANode(MDNode &MD) {
3666   // Check if the tag uses struct-path aware TBAA format.
3667   if (isa<MDNode>(MD.getOperand(0)) && MD.getNumOperands() >= 3)
3668     return &MD;
3669 
3670   auto &Context = MD.getContext();
3671   if (MD.getNumOperands() == 3) {
3672     Metadata *Elts[] = {MD.getOperand(0), MD.getOperand(1)};
3673     MDNode *ScalarType = MDNode::get(Context, Elts);
3674     // Create a MDNode <ScalarType, ScalarType, offset 0, const>
3675     Metadata *Elts2[] = {ScalarType, ScalarType,
3676                          ConstantAsMetadata::get(
3677                              Constant::getNullValue(Type::getInt64Ty(Context))),
3678                          MD.getOperand(2)};
3679     return MDNode::get(Context, Elts2);
3680   }
3681   // Create a MDNode <MD, MD, offset 0>
3682   Metadata *Elts[] = {&MD, &MD, ConstantAsMetadata::get(Constant::getNullValue(
3683                                     Type::getInt64Ty(Context)))};
3684   return MDNode::get(Context, Elts);
3685 }
3686 
3687 Instruction *llvm::UpgradeBitCastInst(unsigned Opc, Value *V, Type *DestTy,
3688                                       Instruction *&Temp) {
3689   if (Opc != Instruction::BitCast)
3690     return nullptr;
3691 
3692   Temp = nullptr;
3693   Type *SrcTy = V->getType();
3694   if (SrcTy->isPtrOrPtrVectorTy() && DestTy->isPtrOrPtrVectorTy() &&
3695       SrcTy->getPointerAddressSpace() != DestTy->getPointerAddressSpace()) {
3696     LLVMContext &Context = V->getContext();
3697 
3698     // We have no information about target data layout, so we assume that
3699     // the maximum pointer size is 64bit.
3700     Type *MidTy = Type::getInt64Ty(Context);
3701     Temp = CastInst::Create(Instruction::PtrToInt, V, MidTy);
3702 
3703     return CastInst::Create(Instruction::IntToPtr, Temp, DestTy);
3704   }
3705 
3706   return nullptr;
3707 }
3708 
3709 Value *llvm::UpgradeBitCastExpr(unsigned Opc, Constant *C, Type *DestTy) {
3710   if (Opc != Instruction::BitCast)
3711     return nullptr;
3712 
3713   Type *SrcTy = C->getType();
3714   if (SrcTy->isPtrOrPtrVectorTy() && DestTy->isPtrOrPtrVectorTy() &&
3715       SrcTy->getPointerAddressSpace() != DestTy->getPointerAddressSpace()) {
3716     LLVMContext &Context = C->getContext();
3717 
3718     // We have no information about target data layout, so we assume that
3719     // the maximum pointer size is 64bit.
3720     Type *MidTy = Type::getInt64Ty(Context);
3721 
3722     return ConstantExpr::getIntToPtr(ConstantExpr::getPtrToInt(C, MidTy),
3723                                      DestTy);
3724   }
3725 
3726   return nullptr;
3727 }
3728 
3729 /// Check the debug info version number, if it is out-dated, drop the debug
3730 /// info. Return true if module is modified.
3731 bool llvm::UpgradeDebugInfo(Module &M) {
3732   unsigned Version = getDebugMetadataVersionFromModule(M);
3733   if (Version == DEBUG_METADATA_VERSION) {
3734     bool BrokenDebugInfo = false;
3735     if (verifyModule(M, &llvm::errs(), &BrokenDebugInfo))
3736       report_fatal_error("Broken module found, compilation aborted!");
3737     if (!BrokenDebugInfo)
3738       // Everything is ok.
3739       return false;
3740     else {
3741       // Diagnose malformed debug info.
3742       DiagnosticInfoIgnoringInvalidDebugMetadata Diag(M);
3743       M.getContext().diagnose(Diag);
3744     }
3745   }
3746   bool Modified = StripDebugInfo(M);
3747   if (Modified && Version != DEBUG_METADATA_VERSION) {
3748     // Diagnose a version mismatch.
3749     DiagnosticInfoDebugMetadataVersion DiagVersion(M, Version);
3750     M.getContext().diagnose(DiagVersion);
3751   }
3752   return Modified;
3753 }
3754 
3755 bool llvm::UpgradeRetainReleaseMarker(Module &M) {
3756   bool Changed = false;
3757   NamedMDNode *ModRetainReleaseMarker =
3758       M.getNamedMetadata("clang.arc.retainAutoreleasedReturnValueMarker");
3759   if (ModRetainReleaseMarker) {
3760     MDNode *Op = ModRetainReleaseMarker->getOperand(0);
3761     if (Op) {
3762       MDString *ID = dyn_cast_or_null<MDString>(Op->getOperand(0));
3763       if (ID) {
3764         SmallVector<StringRef, 4> ValueComp;
3765         ID->getString().split(ValueComp, "#");
3766         if (ValueComp.size() == 2) {
3767           std::string NewValue = ValueComp[0].str() + ";" + ValueComp[1].str();
3768           Metadata *Ops[1] = {MDString::get(M.getContext(), NewValue)};
3769           ModRetainReleaseMarker->setOperand(0,
3770                                              MDNode::get(M.getContext(), Ops));
3771           Changed = true;
3772         }
3773       }
3774     }
3775   }
3776   return Changed;
3777 }
3778 
3779 bool llvm::UpgradeModuleFlags(Module &M) {
3780   NamedMDNode *ModFlags = M.getModuleFlagsMetadata();
3781   if (!ModFlags)
3782     return false;
3783 
3784   bool HasObjCFlag = false, HasClassProperties = false, Changed = false;
3785   for (unsigned I = 0, E = ModFlags->getNumOperands(); I != E; ++I) {
3786     MDNode *Op = ModFlags->getOperand(I);
3787     if (Op->getNumOperands() != 3)
3788       continue;
3789     MDString *ID = dyn_cast_or_null<MDString>(Op->getOperand(1));
3790     if (!ID)
3791       continue;
3792     if (ID->getString() == "Objective-C Image Info Version")
3793       HasObjCFlag = true;
3794     if (ID->getString() == "Objective-C Class Properties")
3795       HasClassProperties = true;
3796     // Upgrade PIC/PIE Module Flags. The module flag behavior for these two
3797     // field was Error and now they are Max.
3798     if (ID->getString() == "PIC Level" || ID->getString() == "PIE Level") {
3799       if (auto *Behavior =
3800               mdconst::dyn_extract_or_null<ConstantInt>(Op->getOperand(0))) {
3801         if (Behavior->getLimitedValue() == Module::Error) {
3802           Type *Int32Ty = Type::getInt32Ty(M.getContext());
3803           Metadata *Ops[3] = {
3804               ConstantAsMetadata::get(ConstantInt::get(Int32Ty, Module::Max)),
3805               MDString::get(M.getContext(), ID->getString()),
3806               Op->getOperand(2)};
3807           ModFlags->setOperand(I, MDNode::get(M.getContext(), Ops));
3808           Changed = true;
3809         }
3810       }
3811     }
3812     // Upgrade Objective-C Image Info Section. Removed the whitespce in the
3813     // section name so that llvm-lto will not complain about mismatching
3814     // module flags that is functionally the same.
3815     if (ID->getString() == "Objective-C Image Info Section") {
3816       if (auto *Value = dyn_cast_or_null<MDString>(Op->getOperand(2))) {
3817         SmallVector<StringRef, 4> ValueComp;
3818         Value->getString().split(ValueComp, " ");
3819         if (ValueComp.size() != 1) {
3820           std::string NewValue;
3821           for (auto &S : ValueComp)
3822             NewValue += S.str();
3823           Metadata *Ops[3] = {Op->getOperand(0), Op->getOperand(1),
3824                               MDString::get(M.getContext(), NewValue)};
3825           ModFlags->setOperand(I, MDNode::get(M.getContext(), Ops));
3826           Changed = true;
3827         }
3828       }
3829     }
3830   }
3831 
3832   // "Objective-C Class Properties" is recently added for Objective-C. We
3833   // upgrade ObjC bitcodes to contain a "Objective-C Class Properties" module
3834   // flag of value 0, so we can correclty downgrade this flag when trying to
3835   // link an ObjC bitcode without this module flag with an ObjC bitcode with
3836   // this module flag.
3837   if (HasObjCFlag && !HasClassProperties) {
3838     M.addModuleFlag(llvm::Module::Override, "Objective-C Class Properties",
3839                     (uint32_t)0);
3840     Changed = true;
3841   }
3842 
3843   return Changed;
3844 }
3845 
3846 void llvm::UpgradeSectionAttributes(Module &M) {
3847   auto TrimSpaces = [](StringRef Section) -> std::string {
3848     SmallVector<StringRef, 5> Components;
3849     Section.split(Components, ',');
3850 
3851     SmallString<32> Buffer;
3852     raw_svector_ostream OS(Buffer);
3853 
3854     for (auto Component : Components)
3855       OS << ',' << Component.trim();
3856 
3857     return OS.str().substr(1);
3858   };
3859 
3860   for (auto &GV : M.globals()) {
3861     if (!GV.hasSection())
3862       continue;
3863 
3864     StringRef Section = GV.getSection();
3865 
3866     if (!Section.startswith("__DATA, __objc_catlist"))
3867       continue;
3868 
3869     // __DATA, __objc_catlist, regular, no_dead_strip
3870     // __DATA,__objc_catlist,regular,no_dead_strip
3871     GV.setSection(TrimSpaces(Section));
3872   }
3873 }
3874 
3875 static bool isOldLoopArgument(Metadata *MD) {
3876   auto *T = dyn_cast_or_null<MDTuple>(MD);
3877   if (!T)
3878     return false;
3879   if (T->getNumOperands() < 1)
3880     return false;
3881   auto *S = dyn_cast_or_null<MDString>(T->getOperand(0));
3882   if (!S)
3883     return false;
3884   return S->getString().startswith("llvm.vectorizer.");
3885 }
3886 
3887 static MDString *upgradeLoopTag(LLVMContext &C, StringRef OldTag) {
3888   StringRef OldPrefix = "llvm.vectorizer.";
3889   assert(OldTag.startswith(OldPrefix) && "Expected old prefix");
3890 
3891   if (OldTag == "llvm.vectorizer.unroll")
3892     return MDString::get(C, "llvm.loop.interleave.count");
3893 
3894   return MDString::get(
3895       C, (Twine("llvm.loop.vectorize.") + OldTag.drop_front(OldPrefix.size()))
3896              .str());
3897 }
3898 
3899 static Metadata *upgradeLoopArgument(Metadata *MD) {
3900   auto *T = dyn_cast_or_null<MDTuple>(MD);
3901   if (!T)
3902     return MD;
3903   if (T->getNumOperands() < 1)
3904     return MD;
3905   auto *OldTag = dyn_cast_or_null<MDString>(T->getOperand(0));
3906   if (!OldTag)
3907     return MD;
3908   if (!OldTag->getString().startswith("llvm.vectorizer."))
3909     return MD;
3910 
3911   // This has an old tag.  Upgrade it.
3912   SmallVector<Metadata *, 8> Ops;
3913   Ops.reserve(T->getNumOperands());
3914   Ops.push_back(upgradeLoopTag(T->getContext(), OldTag->getString()));
3915   for (unsigned I = 1, E = T->getNumOperands(); I != E; ++I)
3916     Ops.push_back(T->getOperand(I));
3917 
3918   return MDTuple::get(T->getContext(), Ops);
3919 }
3920 
3921 MDNode *llvm::upgradeInstructionLoopAttachment(MDNode &N) {
3922   auto *T = dyn_cast<MDTuple>(&N);
3923   if (!T)
3924     return &N;
3925 
3926   if (none_of(T->operands(), isOldLoopArgument))
3927     return &N;
3928 
3929   SmallVector<Metadata *, 8> Ops;
3930   Ops.reserve(T->getNumOperands());
3931   for (Metadata *MD : T->operands())
3932     Ops.push_back(upgradeLoopArgument(MD));
3933 
3934   return MDTuple::get(T->getContext(), Ops);
3935 }
3936