1 //===-- AutoUpgrade.cpp - Implement auto-upgrade helper functions ---------===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 // This file implements the auto-upgrade helper functions.
10 // This is where deprecated IR intrinsics and other IR features are updated to
11 // current specifications.
12 //
13 //===----------------------------------------------------------------------===//
14 
15 #include "llvm/IR/AutoUpgrade.h"
16 #include "llvm/ADT/StringSwitch.h"
17 #include "llvm/ADT/Triple.h"
18 #include "llvm/IR/Constants.h"
19 #include "llvm/IR/DebugInfo.h"
20 #include "llvm/IR/DiagnosticInfo.h"
21 #include "llvm/IR/Function.h"
22 #include "llvm/IR/IRBuilder.h"
23 #include "llvm/IR/InstVisitor.h"
24 #include "llvm/IR/Instruction.h"
25 #include "llvm/IR/IntrinsicInst.h"
26 #include "llvm/IR/Intrinsics.h"
27 #include "llvm/IR/IntrinsicsAArch64.h"
28 #include "llvm/IR/IntrinsicsARM.h"
29 #include "llvm/IR/IntrinsicsX86.h"
30 #include "llvm/IR/LLVMContext.h"
31 #include "llvm/IR/Module.h"
32 #include "llvm/IR/Verifier.h"
33 #include "llvm/Support/ErrorHandling.h"
34 #include "llvm/Support/Regex.h"
35 #include <cstring>
36 using namespace llvm;
37 
38 static void rename(GlobalValue *GV) { GV->setName(GV->getName() + ".old"); }
39 
40 // Upgrade the declarations of the SSE4.1 ptest intrinsics whose arguments have
41 // changed their type from v4f32 to v2i64.
42 static bool UpgradePTESTIntrinsic(Function* F, Intrinsic::ID IID,
43                                   Function *&NewFn) {
44   // Check whether this is an old version of the function, which received
45   // v4f32 arguments.
46   Type *Arg0Type = F->getFunctionType()->getParamType(0);
47   if (Arg0Type != FixedVectorType::get(Type::getFloatTy(F->getContext()), 4))
48     return false;
49 
50   // Yes, it's old, replace it with new version.
51   rename(F);
52   NewFn = Intrinsic::getDeclaration(F->getParent(), IID);
53   return true;
54 }
55 
56 // Upgrade the declarations of intrinsic functions whose 8-bit immediate mask
57 // arguments have changed their type from i32 to i8.
58 static bool UpgradeX86IntrinsicsWith8BitMask(Function *F, Intrinsic::ID IID,
59                                              Function *&NewFn) {
60   // Check that the last argument is an i32.
61   Type *LastArgType = F->getFunctionType()->getParamType(
62      F->getFunctionType()->getNumParams() - 1);
63   if (!LastArgType->isIntegerTy(32))
64     return false;
65 
66   // Move this function aside and map down.
67   rename(F);
68   NewFn = Intrinsic::getDeclaration(F->getParent(), IID);
69   return true;
70 }
71 
72 // Upgrade the declaration of fp compare intrinsics that change return type
73 // from scalar to vXi1 mask.
74 static bool UpgradeX86MaskedFPCompare(Function *F, Intrinsic::ID IID,
75                                       Function *&NewFn) {
76   // Check if the return type is a vector.
77   if (F->getReturnType()->isVectorTy())
78     return false;
79 
80   rename(F);
81   NewFn = Intrinsic::getDeclaration(F->getParent(), IID);
82   return true;
83 }
84 
85 static bool ShouldUpgradeX86Intrinsic(Function *F, StringRef Name) {
86   // All of the intrinsics matches below should be marked with which llvm
87   // version started autoupgrading them. At some point in the future we would
88   // like to use this information to remove upgrade code for some older
89   // intrinsics. It is currently undecided how we will determine that future
90   // point.
91   if (Name == "addcarryx.u32" || // Added in 8.0
92       Name == "addcarryx.u64" || // Added in 8.0
93       Name == "addcarry.u32" || // Added in 8.0
94       Name == "addcarry.u64" || // Added in 8.0
95       Name == "subborrow.u32" || // Added in 8.0
96       Name == "subborrow.u64" || // Added in 8.0
97       Name.startswith("sse2.padds.") || // Added in 8.0
98       Name.startswith("sse2.psubs.") || // Added in 8.0
99       Name.startswith("sse2.paddus.") || // Added in 8.0
100       Name.startswith("sse2.psubus.") || // Added in 8.0
101       Name.startswith("avx2.padds.") || // Added in 8.0
102       Name.startswith("avx2.psubs.") || // Added in 8.0
103       Name.startswith("avx2.paddus.") || // Added in 8.0
104       Name.startswith("avx2.psubus.") || // Added in 8.0
105       Name.startswith("avx512.padds.") || // Added in 8.0
106       Name.startswith("avx512.psubs.") || // Added in 8.0
107       Name.startswith("avx512.mask.padds.") || // Added in 8.0
108       Name.startswith("avx512.mask.psubs.") || // Added in 8.0
109       Name.startswith("avx512.mask.paddus.") || // Added in 8.0
110       Name.startswith("avx512.mask.psubus.") || // Added in 8.0
111       Name=="ssse3.pabs.b.128" || // Added in 6.0
112       Name=="ssse3.pabs.w.128" || // Added in 6.0
113       Name=="ssse3.pabs.d.128" || // Added in 6.0
114       Name.startswith("fma4.vfmadd.s") || // Added in 7.0
115       Name.startswith("fma.vfmadd.") || // Added in 7.0
116       Name.startswith("fma.vfmsub.") || // Added in 7.0
117       Name.startswith("fma.vfmsubadd.") || // Added in 7.0
118       Name.startswith("fma.vfnmadd.") || // Added in 7.0
119       Name.startswith("fma.vfnmsub.") || // Added in 7.0
120       Name.startswith("avx512.mask.vfmadd.") || // Added in 7.0
121       Name.startswith("avx512.mask.vfnmadd.") || // Added in 7.0
122       Name.startswith("avx512.mask.vfnmsub.") || // Added in 7.0
123       Name.startswith("avx512.mask3.vfmadd.") || // Added in 7.0
124       Name.startswith("avx512.maskz.vfmadd.") || // Added in 7.0
125       Name.startswith("avx512.mask3.vfmsub.") || // Added in 7.0
126       Name.startswith("avx512.mask3.vfnmsub.") || // Added in 7.0
127       Name.startswith("avx512.mask.vfmaddsub.") || // Added in 7.0
128       Name.startswith("avx512.maskz.vfmaddsub.") || // Added in 7.0
129       Name.startswith("avx512.mask3.vfmaddsub.") || // Added in 7.0
130       Name.startswith("avx512.mask3.vfmsubadd.") || // Added in 7.0
131       Name.startswith("avx512.mask.shuf.i") || // Added in 6.0
132       Name.startswith("avx512.mask.shuf.f") || // Added in 6.0
133       Name.startswith("avx512.kunpck") || //added in 6.0
134       Name.startswith("avx2.pabs.") || // Added in 6.0
135       Name.startswith("avx512.mask.pabs.") || // Added in 6.0
136       Name.startswith("avx512.broadcastm") || // Added in 6.0
137       Name == "sse.sqrt.ss" || // Added in 7.0
138       Name == "sse2.sqrt.sd" || // Added in 7.0
139       Name.startswith("avx512.mask.sqrt.p") || // Added in 7.0
140       Name.startswith("avx.sqrt.p") || // Added in 7.0
141       Name.startswith("sse2.sqrt.p") || // Added in 7.0
142       Name.startswith("sse.sqrt.p") || // Added in 7.0
143       Name.startswith("avx512.mask.pbroadcast") || // Added in 6.0
144       Name.startswith("sse2.pcmpeq.") || // Added in 3.1
145       Name.startswith("sse2.pcmpgt.") || // Added in 3.1
146       Name.startswith("avx2.pcmpeq.") || // Added in 3.1
147       Name.startswith("avx2.pcmpgt.") || // Added in 3.1
148       Name.startswith("avx512.mask.pcmpeq.") || // Added in 3.9
149       Name.startswith("avx512.mask.pcmpgt.") || // Added in 3.9
150       Name.startswith("avx.vperm2f128.") || // Added in 6.0
151       Name == "avx2.vperm2i128" || // Added in 6.0
152       Name == "sse.add.ss" || // Added in 4.0
153       Name == "sse2.add.sd" || // Added in 4.0
154       Name == "sse.sub.ss" || // Added in 4.0
155       Name == "sse2.sub.sd" || // Added in 4.0
156       Name == "sse.mul.ss" || // Added in 4.0
157       Name == "sse2.mul.sd" || // Added in 4.0
158       Name == "sse.div.ss" || // Added in 4.0
159       Name == "sse2.div.sd" || // Added in 4.0
160       Name == "sse41.pmaxsb" || // Added in 3.9
161       Name == "sse2.pmaxs.w" || // Added in 3.9
162       Name == "sse41.pmaxsd" || // Added in 3.9
163       Name == "sse2.pmaxu.b" || // Added in 3.9
164       Name == "sse41.pmaxuw" || // Added in 3.9
165       Name == "sse41.pmaxud" || // Added in 3.9
166       Name == "sse41.pminsb" || // Added in 3.9
167       Name == "sse2.pmins.w" || // Added in 3.9
168       Name == "sse41.pminsd" || // Added in 3.9
169       Name == "sse2.pminu.b" || // Added in 3.9
170       Name == "sse41.pminuw" || // Added in 3.9
171       Name == "sse41.pminud" || // Added in 3.9
172       Name == "avx512.kand.w" || // Added in 7.0
173       Name == "avx512.kandn.w" || // Added in 7.0
174       Name == "avx512.knot.w" || // Added in 7.0
175       Name == "avx512.kor.w" || // Added in 7.0
176       Name == "avx512.kxor.w" || // Added in 7.0
177       Name == "avx512.kxnor.w" || // Added in 7.0
178       Name == "avx512.kortestc.w" || // Added in 7.0
179       Name == "avx512.kortestz.w" || // Added in 7.0
180       Name.startswith("avx512.mask.pshuf.b.") || // Added in 4.0
181       Name.startswith("avx2.pmax") || // Added in 3.9
182       Name.startswith("avx2.pmin") || // Added in 3.9
183       Name.startswith("avx512.mask.pmax") || // Added in 4.0
184       Name.startswith("avx512.mask.pmin") || // Added in 4.0
185       Name.startswith("avx2.vbroadcast") || // Added in 3.8
186       Name.startswith("avx2.pbroadcast") || // Added in 3.8
187       Name.startswith("avx.vpermil.") || // Added in 3.1
188       Name.startswith("sse2.pshuf") || // Added in 3.9
189       Name.startswith("avx512.pbroadcast") || // Added in 3.9
190       Name.startswith("avx512.mask.broadcast.s") || // Added in 3.9
191       Name.startswith("avx512.mask.movddup") || // Added in 3.9
192       Name.startswith("avx512.mask.movshdup") || // Added in 3.9
193       Name.startswith("avx512.mask.movsldup") || // Added in 3.9
194       Name.startswith("avx512.mask.pshuf.d.") || // Added in 3.9
195       Name.startswith("avx512.mask.pshufl.w.") || // Added in 3.9
196       Name.startswith("avx512.mask.pshufh.w.") || // Added in 3.9
197       Name.startswith("avx512.mask.shuf.p") || // Added in 4.0
198       Name.startswith("avx512.mask.vpermil.p") || // Added in 3.9
199       Name.startswith("avx512.mask.perm.df.") || // Added in 3.9
200       Name.startswith("avx512.mask.perm.di.") || // Added in 3.9
201       Name.startswith("avx512.mask.punpckl") || // Added in 3.9
202       Name.startswith("avx512.mask.punpckh") || // Added in 3.9
203       Name.startswith("avx512.mask.unpckl.") || // Added in 3.9
204       Name.startswith("avx512.mask.unpckh.") || // Added in 3.9
205       Name.startswith("avx512.mask.pand.") || // Added in 3.9
206       Name.startswith("avx512.mask.pandn.") || // Added in 3.9
207       Name.startswith("avx512.mask.por.") || // Added in 3.9
208       Name.startswith("avx512.mask.pxor.") || // Added in 3.9
209       Name.startswith("avx512.mask.and.") || // Added in 3.9
210       Name.startswith("avx512.mask.andn.") || // Added in 3.9
211       Name.startswith("avx512.mask.or.") || // Added in 3.9
212       Name.startswith("avx512.mask.xor.") || // Added in 3.9
213       Name.startswith("avx512.mask.padd.") || // Added in 4.0
214       Name.startswith("avx512.mask.psub.") || // Added in 4.0
215       Name.startswith("avx512.mask.pmull.") || // Added in 4.0
216       Name.startswith("avx512.mask.cvtdq2pd.") || // Added in 4.0
217       Name.startswith("avx512.mask.cvtudq2pd.") || // Added in 4.0
218       Name.startswith("avx512.mask.cvtudq2ps.") || // Added in 7.0 updated 9.0
219       Name.startswith("avx512.mask.cvtqq2pd.") || // Added in 7.0 updated 9.0
220       Name.startswith("avx512.mask.cvtuqq2pd.") || // Added in 7.0 updated 9.0
221       Name.startswith("avx512.mask.cvtdq2ps.") || // Added in 7.0 updated 9.0
222       Name == "avx512.mask.vcvtph2ps.128" || // Added in 11.0
223       Name == "avx512.mask.vcvtph2ps.256" || // Added in 11.0
224       Name == "avx512.mask.cvtqq2ps.256" || // Added in 9.0
225       Name == "avx512.mask.cvtqq2ps.512" || // Added in 9.0
226       Name == "avx512.mask.cvtuqq2ps.256" || // Added in 9.0
227       Name == "avx512.mask.cvtuqq2ps.512" || // Added in 9.0
228       Name == "avx512.mask.cvtpd2dq.256" || // Added in 7.0
229       Name == "avx512.mask.cvtpd2ps.256" || // Added in 7.0
230       Name == "avx512.mask.cvttpd2dq.256" || // Added in 7.0
231       Name == "avx512.mask.cvttps2dq.128" || // Added in 7.0
232       Name == "avx512.mask.cvttps2dq.256" || // Added in 7.0
233       Name == "avx512.mask.cvtps2pd.128" || // Added in 7.0
234       Name == "avx512.mask.cvtps2pd.256" || // Added in 7.0
235       Name == "avx512.cvtusi2sd" || // Added in 7.0
236       Name.startswith("avx512.mask.permvar.") || // Added in 7.0
237       Name == "sse2.pmulu.dq" || // Added in 7.0
238       Name == "sse41.pmuldq" || // Added in 7.0
239       Name == "avx2.pmulu.dq" || // Added in 7.0
240       Name == "avx2.pmul.dq" || // Added in 7.0
241       Name == "avx512.pmulu.dq.512" || // Added in 7.0
242       Name == "avx512.pmul.dq.512" || // Added in 7.0
243       Name.startswith("avx512.mask.pmul.dq.") || // Added in 4.0
244       Name.startswith("avx512.mask.pmulu.dq.") || // Added in 4.0
245       Name.startswith("avx512.mask.pmul.hr.sw.") || // Added in 7.0
246       Name.startswith("avx512.mask.pmulh.w.") || // Added in 7.0
247       Name.startswith("avx512.mask.pmulhu.w.") || // Added in 7.0
248       Name.startswith("avx512.mask.pmaddw.d.") || // Added in 7.0
249       Name.startswith("avx512.mask.pmaddubs.w.") || // Added in 7.0
250       Name.startswith("avx512.mask.packsswb.") || // Added in 5.0
251       Name.startswith("avx512.mask.packssdw.") || // Added in 5.0
252       Name.startswith("avx512.mask.packuswb.") || // Added in 5.0
253       Name.startswith("avx512.mask.packusdw.") || // Added in 5.0
254       Name.startswith("avx512.mask.cmp.b") || // Added in 5.0
255       Name.startswith("avx512.mask.cmp.d") || // Added in 5.0
256       Name.startswith("avx512.mask.cmp.q") || // Added in 5.0
257       Name.startswith("avx512.mask.cmp.w") || // Added in 5.0
258       Name.startswith("avx512.cmp.p") || // Added in 12.0
259       Name.startswith("avx512.mask.ucmp.") || // Added in 5.0
260       Name.startswith("avx512.cvtb2mask.") || // Added in 7.0
261       Name.startswith("avx512.cvtw2mask.") || // Added in 7.0
262       Name.startswith("avx512.cvtd2mask.") || // Added in 7.0
263       Name.startswith("avx512.cvtq2mask.") || // Added in 7.0
264       Name.startswith("avx512.mask.vpermilvar.") || // Added in 4.0
265       Name.startswith("avx512.mask.psll.d") || // Added in 4.0
266       Name.startswith("avx512.mask.psll.q") || // Added in 4.0
267       Name.startswith("avx512.mask.psll.w") || // Added in 4.0
268       Name.startswith("avx512.mask.psra.d") || // Added in 4.0
269       Name.startswith("avx512.mask.psra.q") || // Added in 4.0
270       Name.startswith("avx512.mask.psra.w") || // Added in 4.0
271       Name.startswith("avx512.mask.psrl.d") || // Added in 4.0
272       Name.startswith("avx512.mask.psrl.q") || // Added in 4.0
273       Name.startswith("avx512.mask.psrl.w") || // Added in 4.0
274       Name.startswith("avx512.mask.pslli") || // Added in 4.0
275       Name.startswith("avx512.mask.psrai") || // Added in 4.0
276       Name.startswith("avx512.mask.psrli") || // Added in 4.0
277       Name.startswith("avx512.mask.psllv") || // Added in 4.0
278       Name.startswith("avx512.mask.psrav") || // Added in 4.0
279       Name.startswith("avx512.mask.psrlv") || // Added in 4.0
280       Name.startswith("sse41.pmovsx") || // Added in 3.8
281       Name.startswith("sse41.pmovzx") || // Added in 3.9
282       Name.startswith("avx2.pmovsx") || // Added in 3.9
283       Name.startswith("avx2.pmovzx") || // Added in 3.9
284       Name.startswith("avx512.mask.pmovsx") || // Added in 4.0
285       Name.startswith("avx512.mask.pmovzx") || // Added in 4.0
286       Name.startswith("avx512.mask.lzcnt.") || // Added in 5.0
287       Name.startswith("avx512.mask.pternlog.") || // Added in 7.0
288       Name.startswith("avx512.maskz.pternlog.") || // Added in 7.0
289       Name.startswith("avx512.mask.vpmadd52") || // Added in 7.0
290       Name.startswith("avx512.maskz.vpmadd52") || // Added in 7.0
291       Name.startswith("avx512.mask.vpermi2var.") || // Added in 7.0
292       Name.startswith("avx512.mask.vpermt2var.") || // Added in 7.0
293       Name.startswith("avx512.maskz.vpermt2var.") || // Added in 7.0
294       Name.startswith("avx512.mask.vpdpbusd.") || // Added in 7.0
295       Name.startswith("avx512.maskz.vpdpbusd.") || // Added in 7.0
296       Name.startswith("avx512.mask.vpdpbusds.") || // Added in 7.0
297       Name.startswith("avx512.maskz.vpdpbusds.") || // Added in 7.0
298       Name.startswith("avx512.mask.vpdpwssd.") || // Added in 7.0
299       Name.startswith("avx512.maskz.vpdpwssd.") || // Added in 7.0
300       Name.startswith("avx512.mask.vpdpwssds.") || // Added in 7.0
301       Name.startswith("avx512.maskz.vpdpwssds.") || // Added in 7.0
302       Name.startswith("avx512.mask.dbpsadbw.") || // Added in 7.0
303       Name.startswith("avx512.mask.vpshld.") || // Added in 7.0
304       Name.startswith("avx512.mask.vpshrd.") || // Added in 7.0
305       Name.startswith("avx512.mask.vpshldv.") || // Added in 8.0
306       Name.startswith("avx512.mask.vpshrdv.") || // Added in 8.0
307       Name.startswith("avx512.maskz.vpshldv.") || // Added in 8.0
308       Name.startswith("avx512.maskz.vpshrdv.") || // Added in 8.0
309       Name.startswith("avx512.vpshld.") || // Added in 8.0
310       Name.startswith("avx512.vpshrd.") || // Added in 8.0
311       Name.startswith("avx512.mask.add.p") || // Added in 7.0. 128/256 in 4.0
312       Name.startswith("avx512.mask.sub.p") || // Added in 7.0. 128/256 in 4.0
313       Name.startswith("avx512.mask.mul.p") || // Added in 7.0. 128/256 in 4.0
314       Name.startswith("avx512.mask.div.p") || // Added in 7.0. 128/256 in 4.0
315       Name.startswith("avx512.mask.max.p") || // Added in 7.0. 128/256 in 5.0
316       Name.startswith("avx512.mask.min.p") || // Added in 7.0. 128/256 in 5.0
317       Name.startswith("avx512.mask.fpclass.p") || // Added in 7.0
318       Name.startswith("avx512.mask.vpshufbitqmb.") || // Added in 8.0
319       Name.startswith("avx512.mask.pmultishift.qb.") || // Added in 8.0
320       Name.startswith("avx512.mask.conflict.") || // Added in 9.0
321       Name == "avx512.mask.pmov.qd.256" || // Added in 9.0
322       Name == "avx512.mask.pmov.qd.512" || // Added in 9.0
323       Name == "avx512.mask.pmov.wb.256" || // Added in 9.0
324       Name == "avx512.mask.pmov.wb.512" || // Added in 9.0
325       Name == "sse.cvtsi2ss" || // Added in 7.0
326       Name == "sse.cvtsi642ss" || // Added in 7.0
327       Name == "sse2.cvtsi2sd" || // Added in 7.0
328       Name == "sse2.cvtsi642sd" || // Added in 7.0
329       Name == "sse2.cvtss2sd" || // Added in 7.0
330       Name == "sse2.cvtdq2pd" || // Added in 3.9
331       Name == "sse2.cvtdq2ps" || // Added in 7.0
332       Name == "sse2.cvtps2pd" || // Added in 3.9
333       Name == "avx.cvtdq2.pd.256" || // Added in 3.9
334       Name == "avx.cvtdq2.ps.256" || // Added in 7.0
335       Name == "avx.cvt.ps2.pd.256" || // Added in 3.9
336       Name.startswith("vcvtph2ps.") || // Added in 11.0
337       Name.startswith("avx.vinsertf128.") || // Added in 3.7
338       Name == "avx2.vinserti128" || // Added in 3.7
339       Name.startswith("avx512.mask.insert") || // Added in 4.0
340       Name.startswith("avx.vextractf128.") || // Added in 3.7
341       Name == "avx2.vextracti128" || // Added in 3.7
342       Name.startswith("avx512.mask.vextract") || // Added in 4.0
343       Name.startswith("sse4a.movnt.") || // Added in 3.9
344       Name.startswith("avx.movnt.") || // Added in 3.2
345       Name.startswith("avx512.storent.") || // Added in 3.9
346       Name == "sse41.movntdqa" || // Added in 5.0
347       Name == "avx2.movntdqa" || // Added in 5.0
348       Name == "avx512.movntdqa" || // Added in 5.0
349       Name == "sse2.storel.dq" || // Added in 3.9
350       Name.startswith("sse.storeu.") || // Added in 3.9
351       Name.startswith("sse2.storeu.") || // Added in 3.9
352       Name.startswith("avx.storeu.") || // Added in 3.9
353       Name.startswith("avx512.mask.storeu.") || // Added in 3.9
354       Name.startswith("avx512.mask.store.p") || // Added in 3.9
355       Name.startswith("avx512.mask.store.b.") || // Added in 3.9
356       Name.startswith("avx512.mask.store.w.") || // Added in 3.9
357       Name.startswith("avx512.mask.store.d.") || // Added in 3.9
358       Name.startswith("avx512.mask.store.q.") || // Added in 3.9
359       Name == "avx512.mask.store.ss" || // Added in 7.0
360       Name.startswith("avx512.mask.loadu.") || // Added in 3.9
361       Name.startswith("avx512.mask.load.") || // Added in 3.9
362       Name.startswith("avx512.mask.expand.load.") || // Added in 7.0
363       Name.startswith("avx512.mask.compress.store.") || // Added in 7.0
364       Name.startswith("avx512.mask.expand.b") || // Added in 9.0
365       Name.startswith("avx512.mask.expand.w") || // Added in 9.0
366       Name.startswith("avx512.mask.expand.d") || // Added in 9.0
367       Name.startswith("avx512.mask.expand.q") || // Added in 9.0
368       Name.startswith("avx512.mask.expand.p") || // Added in 9.0
369       Name.startswith("avx512.mask.compress.b") || // Added in 9.0
370       Name.startswith("avx512.mask.compress.w") || // Added in 9.0
371       Name.startswith("avx512.mask.compress.d") || // Added in 9.0
372       Name.startswith("avx512.mask.compress.q") || // Added in 9.0
373       Name.startswith("avx512.mask.compress.p") || // Added in 9.0
374       Name == "sse42.crc32.64.8" || // Added in 3.4
375       Name.startswith("avx.vbroadcast.s") || // Added in 3.5
376       Name.startswith("avx512.vbroadcast.s") || // Added in 7.0
377       Name.startswith("avx512.mask.palignr.") || // Added in 3.9
378       Name.startswith("avx512.mask.valign.") || // Added in 4.0
379       Name.startswith("sse2.psll.dq") || // Added in 3.7
380       Name.startswith("sse2.psrl.dq") || // Added in 3.7
381       Name.startswith("avx2.psll.dq") || // Added in 3.7
382       Name.startswith("avx2.psrl.dq") || // Added in 3.7
383       Name.startswith("avx512.psll.dq") || // Added in 3.9
384       Name.startswith("avx512.psrl.dq") || // Added in 3.9
385       Name == "sse41.pblendw" || // Added in 3.7
386       Name.startswith("sse41.blendp") || // Added in 3.7
387       Name.startswith("avx.blend.p") || // Added in 3.7
388       Name == "avx2.pblendw" || // Added in 3.7
389       Name.startswith("avx2.pblendd.") || // Added in 3.7
390       Name.startswith("avx.vbroadcastf128") || // Added in 4.0
391       Name == "avx2.vbroadcasti128" || // Added in 3.7
392       Name.startswith("avx512.mask.broadcastf32x4.") || // Added in 6.0
393       Name.startswith("avx512.mask.broadcastf64x2.") || // Added in 6.0
394       Name.startswith("avx512.mask.broadcastf32x8.") || // Added in 6.0
395       Name.startswith("avx512.mask.broadcastf64x4.") || // Added in 6.0
396       Name.startswith("avx512.mask.broadcasti32x4.") || // Added in 6.0
397       Name.startswith("avx512.mask.broadcasti64x2.") || // Added in 6.0
398       Name.startswith("avx512.mask.broadcasti32x8.") || // Added in 6.0
399       Name.startswith("avx512.mask.broadcasti64x4.") || // Added in 6.0
400       Name == "xop.vpcmov" || // Added in 3.8
401       Name == "xop.vpcmov.256" || // Added in 5.0
402       Name.startswith("avx512.mask.move.s") || // Added in 4.0
403       Name.startswith("avx512.cvtmask2") || // Added in 5.0
404       Name.startswith("xop.vpcom") || // Added in 3.2, Updated in 9.0
405       Name.startswith("xop.vprot") || // Added in 8.0
406       Name.startswith("avx512.prol") || // Added in 8.0
407       Name.startswith("avx512.pror") || // Added in 8.0
408       Name.startswith("avx512.mask.prorv.") || // Added in 8.0
409       Name.startswith("avx512.mask.pror.") ||  // Added in 8.0
410       Name.startswith("avx512.mask.prolv.") || // Added in 8.0
411       Name.startswith("avx512.mask.prol.") ||  // Added in 8.0
412       Name.startswith("avx512.ptestm") || //Added in 6.0
413       Name.startswith("avx512.ptestnm") || //Added in 6.0
414       Name.startswith("avx512.mask.pavg")) // Added in 6.0
415     return true;
416 
417   return false;
418 }
419 
420 static bool UpgradeX86IntrinsicFunction(Function *F, StringRef Name,
421                                         Function *&NewFn) {
422   // Only handle intrinsics that start with "x86.".
423   if (!Name.startswith("x86."))
424     return false;
425   // Remove "x86." prefix.
426   Name = Name.substr(4);
427 
428   if (ShouldUpgradeX86Intrinsic(F, Name)) {
429     NewFn = nullptr;
430     return true;
431   }
432 
433   if (Name == "rdtscp") { // Added in 8.0
434     // If this intrinsic has 0 operands, it's the new version.
435     if (F->getFunctionType()->getNumParams() == 0)
436       return false;
437 
438     rename(F);
439     NewFn = Intrinsic::getDeclaration(F->getParent(),
440                                       Intrinsic::x86_rdtscp);
441     return true;
442   }
443 
444   // SSE4.1 ptest functions may have an old signature.
445   if (Name.startswith("sse41.ptest")) { // Added in 3.2
446     if (Name.substr(11) == "c")
447       return UpgradePTESTIntrinsic(F, Intrinsic::x86_sse41_ptestc, NewFn);
448     if (Name.substr(11) == "z")
449       return UpgradePTESTIntrinsic(F, Intrinsic::x86_sse41_ptestz, NewFn);
450     if (Name.substr(11) == "nzc")
451       return UpgradePTESTIntrinsic(F, Intrinsic::x86_sse41_ptestnzc, NewFn);
452   }
453   // Several blend and other instructions with masks used the wrong number of
454   // bits.
455   if (Name == "sse41.insertps") // Added in 3.6
456     return UpgradeX86IntrinsicsWith8BitMask(F, Intrinsic::x86_sse41_insertps,
457                                             NewFn);
458   if (Name == "sse41.dppd") // Added in 3.6
459     return UpgradeX86IntrinsicsWith8BitMask(F, Intrinsic::x86_sse41_dppd,
460                                             NewFn);
461   if (Name == "sse41.dpps") // Added in 3.6
462     return UpgradeX86IntrinsicsWith8BitMask(F, Intrinsic::x86_sse41_dpps,
463                                             NewFn);
464   if (Name == "sse41.mpsadbw") // Added in 3.6
465     return UpgradeX86IntrinsicsWith8BitMask(F, Intrinsic::x86_sse41_mpsadbw,
466                                             NewFn);
467   if (Name == "avx.dp.ps.256") // Added in 3.6
468     return UpgradeX86IntrinsicsWith8BitMask(F, Intrinsic::x86_avx_dp_ps_256,
469                                             NewFn);
470   if (Name == "avx2.mpsadbw") // Added in 3.6
471     return UpgradeX86IntrinsicsWith8BitMask(F, Intrinsic::x86_avx2_mpsadbw,
472                                             NewFn);
473   if (Name == "avx512.mask.cmp.pd.128") // Added in 7.0
474     return UpgradeX86MaskedFPCompare(F, Intrinsic::x86_avx512_mask_cmp_pd_128,
475                                      NewFn);
476   if (Name == "avx512.mask.cmp.pd.256") // Added in 7.0
477     return UpgradeX86MaskedFPCompare(F, Intrinsic::x86_avx512_mask_cmp_pd_256,
478                                      NewFn);
479   if (Name == "avx512.mask.cmp.pd.512") // Added in 7.0
480     return UpgradeX86MaskedFPCompare(F, Intrinsic::x86_avx512_mask_cmp_pd_512,
481                                      NewFn);
482   if (Name == "avx512.mask.cmp.ps.128") // Added in 7.0
483     return UpgradeX86MaskedFPCompare(F, Intrinsic::x86_avx512_mask_cmp_ps_128,
484                                      NewFn);
485   if (Name == "avx512.mask.cmp.ps.256") // Added in 7.0
486     return UpgradeX86MaskedFPCompare(F, Intrinsic::x86_avx512_mask_cmp_ps_256,
487                                      NewFn);
488   if (Name == "avx512.mask.cmp.ps.512") // Added in 7.0
489     return UpgradeX86MaskedFPCompare(F, Intrinsic::x86_avx512_mask_cmp_ps_512,
490                                      NewFn);
491 
492   // frcz.ss/sd may need to have an argument dropped. Added in 3.2
493   if (Name.startswith("xop.vfrcz.ss") && F->arg_size() == 2) {
494     rename(F);
495     NewFn = Intrinsic::getDeclaration(F->getParent(),
496                                       Intrinsic::x86_xop_vfrcz_ss);
497     return true;
498   }
499   if (Name.startswith("xop.vfrcz.sd") && F->arg_size() == 2) {
500     rename(F);
501     NewFn = Intrinsic::getDeclaration(F->getParent(),
502                                       Intrinsic::x86_xop_vfrcz_sd);
503     return true;
504   }
505   // Upgrade any XOP PERMIL2 index operand still using a float/double vector.
506   if (Name.startswith("xop.vpermil2")) { // Added in 3.9
507     auto Idx = F->getFunctionType()->getParamType(2);
508     if (Idx->isFPOrFPVectorTy()) {
509       rename(F);
510       unsigned IdxSize = Idx->getPrimitiveSizeInBits();
511       unsigned EltSize = Idx->getScalarSizeInBits();
512       Intrinsic::ID Permil2ID;
513       if (EltSize == 64 && IdxSize == 128)
514         Permil2ID = Intrinsic::x86_xop_vpermil2pd;
515       else if (EltSize == 32 && IdxSize == 128)
516         Permil2ID = Intrinsic::x86_xop_vpermil2ps;
517       else if (EltSize == 64 && IdxSize == 256)
518         Permil2ID = Intrinsic::x86_xop_vpermil2pd_256;
519       else
520         Permil2ID = Intrinsic::x86_xop_vpermil2ps_256;
521       NewFn = Intrinsic::getDeclaration(F->getParent(), Permil2ID);
522       return true;
523     }
524   }
525 
526   if (Name == "seh.recoverfp") {
527     NewFn = Intrinsic::getDeclaration(F->getParent(), Intrinsic::eh_recoverfp);
528     return true;
529   }
530 
531   return false;
532 }
533 
534 static bool UpgradeIntrinsicFunction1(Function *F, Function *&NewFn) {
535   assert(F && "Illegal to upgrade a non-existent Function.");
536 
537   // Quickly eliminate it, if it's not a candidate.
538   StringRef Name = F->getName();
539   if (Name.size() <= 8 || !Name.startswith("llvm."))
540     return false;
541   Name = Name.substr(5); // Strip off "llvm."
542 
543   switch (Name[0]) {
544   default: break;
545   case 'a': {
546     if (Name.startswith("arm.rbit") || Name.startswith("aarch64.rbit")) {
547       NewFn = Intrinsic::getDeclaration(F->getParent(), Intrinsic::bitreverse,
548                                         F->arg_begin()->getType());
549       return true;
550     }
551     if (Name.startswith("aarch64.neon.frintn")) {
552       NewFn = Intrinsic::getDeclaration(F->getParent(), Intrinsic::roundeven,
553                                         F->arg_begin()->getType());
554       return true;
555     }
556     if (Name.startswith("aarch64.neon.rbit")) {
557       NewFn = Intrinsic::getDeclaration(F->getParent(), Intrinsic::bitreverse,
558                                         F->arg_begin()->getType());
559       return true;
560     }
561     if (Name.startswith("arm.neon.vclz")) {
562       Type* args[2] = {
563         F->arg_begin()->getType(),
564         Type::getInt1Ty(F->getContext())
565       };
566       // Can't use Intrinsic::getDeclaration here as it adds a ".i1" to
567       // the end of the name. Change name from llvm.arm.neon.vclz.* to
568       //  llvm.ctlz.*
569       FunctionType* fType = FunctionType::get(F->getReturnType(), args, false);
570       NewFn = Function::Create(fType, F->getLinkage(), F->getAddressSpace(),
571                                "llvm.ctlz." + Name.substr(14), F->getParent());
572       return true;
573     }
574     if (Name.startswith("arm.neon.vcnt")) {
575       NewFn = Intrinsic::getDeclaration(F->getParent(), Intrinsic::ctpop,
576                                         F->arg_begin()->getType());
577       return true;
578     }
579     static const Regex vstRegex("^arm\\.neon\\.vst([1234]|[234]lane)\\.v[a-z0-9]*$");
580     if (vstRegex.match(Name)) {
581       static const Intrinsic::ID StoreInts[] = {Intrinsic::arm_neon_vst1,
582                                                 Intrinsic::arm_neon_vst2,
583                                                 Intrinsic::arm_neon_vst3,
584                                                 Intrinsic::arm_neon_vst4};
585 
586       static const Intrinsic::ID StoreLaneInts[] = {
587         Intrinsic::arm_neon_vst2lane, Intrinsic::arm_neon_vst3lane,
588         Intrinsic::arm_neon_vst4lane
589       };
590 
591       auto fArgs = F->getFunctionType()->params();
592       Type *Tys[] = {fArgs[0], fArgs[1]};
593       if (!Name.contains("lane"))
594         NewFn = Intrinsic::getDeclaration(F->getParent(),
595                                           StoreInts[fArgs.size() - 3], Tys);
596       else
597         NewFn = Intrinsic::getDeclaration(F->getParent(),
598                                           StoreLaneInts[fArgs.size() - 5], Tys);
599       return true;
600     }
601     if (Name == "aarch64.thread.pointer" || Name == "arm.thread.pointer") {
602       NewFn = Intrinsic::getDeclaration(F->getParent(), Intrinsic::thread_pointer);
603       return true;
604     }
605     if (Name.startswith("arm.neon.vqadds.")) {
606       NewFn = Intrinsic::getDeclaration(F->getParent(), Intrinsic::sadd_sat,
607                                         F->arg_begin()->getType());
608       return true;
609     }
610     if (Name.startswith("arm.neon.vqaddu.")) {
611       NewFn = Intrinsic::getDeclaration(F->getParent(), Intrinsic::uadd_sat,
612                                         F->arg_begin()->getType());
613       return true;
614     }
615     if (Name.startswith("arm.neon.vqsubs.")) {
616       NewFn = Intrinsic::getDeclaration(F->getParent(), Intrinsic::ssub_sat,
617                                         F->arg_begin()->getType());
618       return true;
619     }
620     if (Name.startswith("arm.neon.vqsubu.")) {
621       NewFn = Intrinsic::getDeclaration(F->getParent(), Intrinsic::usub_sat,
622                                         F->arg_begin()->getType());
623       return true;
624     }
625     if (Name.startswith("aarch64.neon.addp")) {
626       if (F->arg_size() != 2)
627         break; // Invalid IR.
628       VectorType *Ty = dyn_cast<VectorType>(F->getReturnType());
629       if (Ty && Ty->getElementType()->isFloatingPointTy()) {
630         NewFn = Intrinsic::getDeclaration(F->getParent(),
631                                           Intrinsic::aarch64_neon_faddp, Ty);
632         return true;
633       }
634     }
635 
636     // Changed in 12.0: bfdot accept v4bf16 and v8bf16 instead of v8i8 and v16i8
637     // respectively
638     if ((Name.startswith("arm.neon.bfdot.") ||
639          Name.startswith("aarch64.neon.bfdot.")) &&
640         Name.endswith("i8")) {
641       Intrinsic::ID IID =
642           StringSwitch<Intrinsic::ID>(Name)
643               .Cases("arm.neon.bfdot.v2f32.v8i8",
644                      "arm.neon.bfdot.v4f32.v16i8",
645                      Intrinsic::arm_neon_bfdot)
646               .Cases("aarch64.neon.bfdot.v2f32.v8i8",
647                      "aarch64.neon.bfdot.v4f32.v16i8",
648                      Intrinsic::aarch64_neon_bfdot)
649               .Default(Intrinsic::not_intrinsic);
650       if (IID == Intrinsic::not_intrinsic)
651         break;
652 
653       size_t OperandWidth = F->getReturnType()->getPrimitiveSizeInBits();
654       assert((OperandWidth == 64 || OperandWidth == 128) &&
655              "Unexpected operand width");
656       LLVMContext &Ctx = F->getParent()->getContext();
657       std::array<Type *, 2> Tys {{
658         F->getReturnType(),
659         FixedVectorType::get(Type::getBFloatTy(Ctx), OperandWidth / 16)
660       }};
661       NewFn = Intrinsic::getDeclaration(F->getParent(), IID, Tys);
662       return true;
663     }
664 
665     // Changed in 12.0: bfmmla, bfmlalb and bfmlalt are not polymorphic anymore
666     // and accept v8bf16 instead of v16i8
667     if ((Name.startswith("arm.neon.bfm") ||
668          Name.startswith("aarch64.neon.bfm")) &&
669         Name.endswith(".v4f32.v16i8")) {
670       Intrinsic::ID IID =
671           StringSwitch<Intrinsic::ID>(Name)
672               .Case("arm.neon.bfmmla.v4f32.v16i8",
673                     Intrinsic::arm_neon_bfmmla)
674               .Case("arm.neon.bfmlalb.v4f32.v16i8",
675                     Intrinsic::arm_neon_bfmlalb)
676               .Case("arm.neon.bfmlalt.v4f32.v16i8",
677                     Intrinsic::arm_neon_bfmlalt)
678               .Case("aarch64.neon.bfmmla.v4f32.v16i8",
679                     Intrinsic::aarch64_neon_bfmmla)
680               .Case("aarch64.neon.bfmlalb.v4f32.v16i8",
681                     Intrinsic::aarch64_neon_bfmlalb)
682               .Case("aarch64.neon.bfmlalt.v4f32.v16i8",
683                     Intrinsic::aarch64_neon_bfmlalt)
684               .Default(Intrinsic::not_intrinsic);
685       if (IID == Intrinsic::not_intrinsic)
686         break;
687 
688       std::array<Type *, 0> Tys;
689       NewFn = Intrinsic::getDeclaration(F->getParent(), IID, Tys);
690       return true;
691     }
692 
693     if (Name == "arm.mve.vctp64" &&
694         cast<FixedVectorType>(F->getReturnType())->getNumElements() == 4) {
695       // A vctp64 returning a v4i1 is converted to return a v2i1. Rename the
696       // function and deal with it below in UpgradeIntrinsicCall.
697       rename(F);
698       return true;
699     }
700     // These too are changed to accept a v2i1 insteead of the old v4i1.
701     if (Name == "arm.mve.mull.int.predicated.v2i64.v4i32.v4i1" ||
702         Name == "arm.mve.vqdmull.predicated.v2i64.v4i32.v4i1" ||
703         Name == "arm.mve.vldr.gather.base.predicated.v2i64.v2i64.v4i1" ||
704         Name == "arm.mve.vldr.gather.base.wb.predicated.v2i64.v2i64.v4i1" ||
705         Name == "arm.mve.vldr.gather.offset.predicated.v2i64.p0i64.v2i64.v4i1" ||
706         Name == "arm.mve.vstr.scatter.base.predicated.v2i64.v2i64.v4i1" ||
707         Name == "arm.mve.vstr.scatter.base.wb.predicated.v2i64.v2i64.v4i1" ||
708         Name == "arm.mve.vstr.scatter.offset.predicated.p0i64.v2i64.v2i64.v4i1" ||
709         Name == "arm.cde.vcx1q.predicated.v2i64.v4i1" ||
710         Name == "arm.cde.vcx1qa.predicated.v2i64.v4i1" ||
711         Name == "arm.cde.vcx2q.predicated.v2i64.v4i1" ||
712         Name == "arm.cde.vcx2qa.predicated.v2i64.v4i1" ||
713         Name == "arm.cde.vcx3q.predicated.v2i64.v4i1" ||
714         Name == "arm.cde.vcx3qa.predicated.v2i64.v4i1")
715       return true;
716 
717     if (Name == "amdgcn.alignbit") {
718       // Target specific intrinsic became redundant
719       NewFn = Intrinsic::getDeclaration(F->getParent(), Intrinsic::fshr,
720                                         {F->getReturnType()});
721       return true;
722     }
723 
724     break;
725   }
726 
727   case 'c': {
728     if (Name.startswith("ctlz.") && F->arg_size() == 1) {
729       rename(F);
730       NewFn = Intrinsic::getDeclaration(F->getParent(), Intrinsic::ctlz,
731                                         F->arg_begin()->getType());
732       return true;
733     }
734     if (Name.startswith("cttz.") && F->arg_size() == 1) {
735       rename(F);
736       NewFn = Intrinsic::getDeclaration(F->getParent(), Intrinsic::cttz,
737                                         F->arg_begin()->getType());
738       return true;
739     }
740     break;
741   }
742   case 'd': {
743     if (Name == "dbg.value" && F->arg_size() == 4) {
744       rename(F);
745       NewFn = Intrinsic::getDeclaration(F->getParent(), Intrinsic::dbg_value);
746       return true;
747     }
748     break;
749   }
750   case 'e': {
751     SmallVector<StringRef, 2> Groups;
752     static const Regex R("^experimental.vector.reduce.([a-z]+)\\.[a-z][0-9]+");
753     if (R.match(Name, &Groups)) {
754       Intrinsic::ID ID;
755       ID = StringSwitch<Intrinsic::ID>(Groups[1])
756                .Case("add", Intrinsic::vector_reduce_add)
757                .Case("mul", Intrinsic::vector_reduce_mul)
758                .Case("and", Intrinsic::vector_reduce_and)
759                .Case("or", Intrinsic::vector_reduce_or)
760                .Case("xor", Intrinsic::vector_reduce_xor)
761                .Case("smax", Intrinsic::vector_reduce_smax)
762                .Case("smin", Intrinsic::vector_reduce_smin)
763                .Case("umax", Intrinsic::vector_reduce_umax)
764                .Case("umin", Intrinsic::vector_reduce_umin)
765                .Case("fmax", Intrinsic::vector_reduce_fmax)
766                .Case("fmin", Intrinsic::vector_reduce_fmin)
767                .Default(Intrinsic::not_intrinsic);
768       if (ID != Intrinsic::not_intrinsic) {
769         rename(F);
770         auto Args = F->getFunctionType()->params();
771         NewFn = Intrinsic::getDeclaration(F->getParent(), ID, {Args[0]});
772         return true;
773       }
774     }
775     static const Regex R2(
776         "^experimental.vector.reduce.v2.([a-z]+)\\.[fi][0-9]+");
777     Groups.clear();
778     if (R2.match(Name, &Groups)) {
779       Intrinsic::ID ID = Intrinsic::not_intrinsic;
780       if (Groups[1] == "fadd")
781         ID = Intrinsic::vector_reduce_fadd;
782       if (Groups[1] == "fmul")
783         ID = Intrinsic::vector_reduce_fmul;
784       if (ID != Intrinsic::not_intrinsic) {
785         rename(F);
786         auto Args = F->getFunctionType()->params();
787         Type *Tys[] = {Args[1]};
788         NewFn = Intrinsic::getDeclaration(F->getParent(), ID, Tys);
789         return true;
790       }
791     }
792     break;
793   }
794   case 'i':
795   case 'l': {
796     bool IsLifetimeStart = Name.startswith("lifetime.start");
797     if (IsLifetimeStart || Name.startswith("invariant.start")) {
798       Intrinsic::ID ID = IsLifetimeStart ?
799         Intrinsic::lifetime_start : Intrinsic::invariant_start;
800       auto Args = F->getFunctionType()->params();
801       Type* ObjectPtr[1] = {Args[1]};
802       if (F->getName() != Intrinsic::getName(ID, ObjectPtr, F->getParent())) {
803         rename(F);
804         NewFn = Intrinsic::getDeclaration(F->getParent(), ID, ObjectPtr);
805         return true;
806       }
807     }
808 
809     bool IsLifetimeEnd = Name.startswith("lifetime.end");
810     if (IsLifetimeEnd || Name.startswith("invariant.end")) {
811       Intrinsic::ID ID = IsLifetimeEnd ?
812         Intrinsic::lifetime_end : Intrinsic::invariant_end;
813 
814       auto Args = F->getFunctionType()->params();
815       Type* ObjectPtr[1] = {Args[IsLifetimeEnd ? 1 : 2]};
816       if (F->getName() != Intrinsic::getName(ID, ObjectPtr, F->getParent())) {
817         rename(F);
818         NewFn = Intrinsic::getDeclaration(F->getParent(), ID, ObjectPtr);
819         return true;
820       }
821     }
822     if (Name.startswith("invariant.group.barrier")) {
823       // Rename invariant.group.barrier to launder.invariant.group
824       auto Args = F->getFunctionType()->params();
825       Type* ObjectPtr[1] = {Args[0]};
826       rename(F);
827       NewFn = Intrinsic::getDeclaration(F->getParent(),
828           Intrinsic::launder_invariant_group, ObjectPtr);
829       return true;
830 
831     }
832 
833     break;
834   }
835   case 'm': {
836     if (Name.startswith("masked.load.")) {
837       Type *Tys[] = { F->getReturnType(), F->arg_begin()->getType() };
838       if (F->getName() !=
839           Intrinsic::getName(Intrinsic::masked_load, Tys, F->getParent())) {
840         rename(F);
841         NewFn = Intrinsic::getDeclaration(F->getParent(),
842                                           Intrinsic::masked_load,
843                                           Tys);
844         return true;
845       }
846     }
847     if (Name.startswith("masked.store.")) {
848       auto Args = F->getFunctionType()->params();
849       Type *Tys[] = { Args[0], Args[1] };
850       if (F->getName() !=
851           Intrinsic::getName(Intrinsic::masked_store, Tys, F->getParent())) {
852         rename(F);
853         NewFn = Intrinsic::getDeclaration(F->getParent(),
854                                           Intrinsic::masked_store,
855                                           Tys);
856         return true;
857       }
858     }
859     // Renaming gather/scatter intrinsics with no address space overloading
860     // to the new overload which includes an address space
861     if (Name.startswith("masked.gather.")) {
862       Type *Tys[] = {F->getReturnType(), F->arg_begin()->getType()};
863       if (F->getName() !=
864           Intrinsic::getName(Intrinsic::masked_gather, Tys, F->getParent())) {
865         rename(F);
866         NewFn = Intrinsic::getDeclaration(F->getParent(),
867                                           Intrinsic::masked_gather, Tys);
868         return true;
869       }
870     }
871     if (Name.startswith("masked.scatter.")) {
872       auto Args = F->getFunctionType()->params();
873       Type *Tys[] = {Args[0], Args[1]};
874       if (F->getName() !=
875           Intrinsic::getName(Intrinsic::masked_scatter, Tys, F->getParent())) {
876         rename(F);
877         NewFn = Intrinsic::getDeclaration(F->getParent(),
878                                           Intrinsic::masked_scatter, Tys);
879         return true;
880       }
881     }
882     // Updating the memory intrinsics (memcpy/memmove/memset) that have an
883     // alignment parameter to embedding the alignment as an attribute of
884     // the pointer args.
885     if (Name.startswith("memcpy.") && F->arg_size() == 5) {
886       rename(F);
887       // Get the types of dest, src, and len
888       ArrayRef<Type *> ParamTypes = F->getFunctionType()->params().slice(0, 3);
889       NewFn = Intrinsic::getDeclaration(F->getParent(), Intrinsic::memcpy,
890                                         ParamTypes);
891       return true;
892     }
893     if (Name.startswith("memmove.") && F->arg_size() == 5) {
894       rename(F);
895       // Get the types of dest, src, and len
896       ArrayRef<Type *> ParamTypes = F->getFunctionType()->params().slice(0, 3);
897       NewFn = Intrinsic::getDeclaration(F->getParent(), Intrinsic::memmove,
898                                         ParamTypes);
899       return true;
900     }
901     if (Name.startswith("memset.") && F->arg_size() == 5) {
902       rename(F);
903       // Get the types of dest, and len
904       const auto *FT = F->getFunctionType();
905       Type *ParamTypes[2] = {
906           FT->getParamType(0), // Dest
907           FT->getParamType(2)  // len
908       };
909       NewFn = Intrinsic::getDeclaration(F->getParent(), Intrinsic::memset,
910                                         ParamTypes);
911       return true;
912     }
913     break;
914   }
915   case 'n': {
916     if (Name.startswith("nvvm.")) {
917       Name = Name.substr(5);
918 
919       // The following nvvm intrinsics correspond exactly to an LLVM intrinsic.
920       Intrinsic::ID IID = StringSwitch<Intrinsic::ID>(Name)
921                               .Cases("brev32", "brev64", Intrinsic::bitreverse)
922                               .Case("clz.i", Intrinsic::ctlz)
923                               .Case("popc.i", Intrinsic::ctpop)
924                               .Default(Intrinsic::not_intrinsic);
925       if (IID != Intrinsic::not_intrinsic && F->arg_size() == 1) {
926         NewFn = Intrinsic::getDeclaration(F->getParent(), IID,
927                                           {F->getReturnType()});
928         return true;
929       }
930 
931       // The following nvvm intrinsics correspond exactly to an LLVM idiom, but
932       // not to an intrinsic alone.  We expand them in UpgradeIntrinsicCall.
933       //
934       // TODO: We could add lohi.i2d.
935       bool Expand = StringSwitch<bool>(Name)
936                         .Cases("abs.i", "abs.ll", true)
937                         .Cases("clz.ll", "popc.ll", "h2f", true)
938                         .Cases("max.i", "max.ll", "max.ui", "max.ull", true)
939                         .Cases("min.i", "min.ll", "min.ui", "min.ull", true)
940                         .StartsWith("atomic.load.add.f32.p", true)
941                         .StartsWith("atomic.load.add.f64.p", true)
942                         .Default(false);
943       if (Expand) {
944         NewFn = nullptr;
945         return true;
946       }
947     }
948     break;
949   }
950   case 'o':
951     // We only need to change the name to match the mangling including the
952     // address space.
953     if (Name.startswith("objectsize.")) {
954       Type *Tys[2] = { F->getReturnType(), F->arg_begin()->getType() };
955       if (F->arg_size() == 2 || F->arg_size() == 3 ||
956           F->getName() !=
957               Intrinsic::getName(Intrinsic::objectsize, Tys, F->getParent())) {
958         rename(F);
959         NewFn = Intrinsic::getDeclaration(F->getParent(), Intrinsic::objectsize,
960                                           Tys);
961         return true;
962       }
963     }
964     break;
965 
966   case 'p':
967     if (Name == "prefetch") {
968       // Handle address space overloading.
969       Type *Tys[] = {F->arg_begin()->getType()};
970       if (F->getName() !=
971           Intrinsic::getName(Intrinsic::prefetch, Tys, F->getParent())) {
972         rename(F);
973         NewFn =
974             Intrinsic::getDeclaration(F->getParent(), Intrinsic::prefetch, Tys);
975         return true;
976       }
977     } else if (Name.startswith("ptr.annotation.") && F->arg_size() == 4) {
978       rename(F);
979       NewFn = Intrinsic::getDeclaration(F->getParent(),
980                                         Intrinsic::ptr_annotation,
981                                         F->arg_begin()->getType());
982       return true;
983     }
984     break;
985 
986   case 's':
987     if (Name == "stackprotectorcheck") {
988       NewFn = nullptr;
989       return true;
990     }
991     break;
992 
993   case 'v': {
994     if (Name == "var.annotation" && F->arg_size() == 4) {
995       rename(F);
996       NewFn = Intrinsic::getDeclaration(F->getParent(),
997                                         Intrinsic::var_annotation);
998       return true;
999     }
1000     break;
1001   }
1002 
1003   case 'x':
1004     if (UpgradeX86IntrinsicFunction(F, Name, NewFn))
1005       return true;
1006   }
1007 
1008   auto *ST = dyn_cast<StructType>(F->getReturnType());
1009   if (ST && (!ST->isLiteral() || ST->isPacked())) {
1010     // Replace return type with literal non-packed struct. Only do this for
1011     // intrinsics declared to return a struct, not for intrinsics with
1012     // overloaded return type, in which case the exact struct type will be
1013     // mangled into the name.
1014     SmallVector<Intrinsic::IITDescriptor> Desc;
1015     Intrinsic::getIntrinsicInfoTableEntries(F->getIntrinsicID(), Desc);
1016     if (Desc.front().Kind == Intrinsic::IITDescriptor::Struct) {
1017       auto *FT = F->getFunctionType();
1018       auto *NewST = StructType::get(ST->getContext(), ST->elements());
1019       auto *NewFT = FunctionType::get(NewST, FT->params(), FT->isVarArg());
1020       std::string Name = F->getName().str();
1021       rename(F);
1022       NewFn = Function::Create(NewFT, F->getLinkage(), F->getAddressSpace(),
1023                                Name, F->getParent());
1024 
1025       // The new function may also need remangling.
1026       if (auto Result = llvm::Intrinsic::remangleIntrinsicFunction(F))
1027         NewFn = *Result;
1028       return true;
1029     }
1030   }
1031 
1032   // Remangle our intrinsic since we upgrade the mangling
1033   auto Result = llvm::Intrinsic::remangleIntrinsicFunction(F);
1034   if (Result != None) {
1035     NewFn = Result.getValue();
1036     return true;
1037   }
1038 
1039   //  This may not belong here. This function is effectively being overloaded
1040   //  to both detect an intrinsic which needs upgrading, and to provide the
1041   //  upgraded form of the intrinsic. We should perhaps have two separate
1042   //  functions for this.
1043   return false;
1044 }
1045 
1046 bool llvm::UpgradeIntrinsicFunction(Function *F, Function *&NewFn) {
1047   NewFn = nullptr;
1048   bool Upgraded = UpgradeIntrinsicFunction1(F, NewFn);
1049   assert(F != NewFn && "Intrinsic function upgraded to the same function");
1050 
1051   // Upgrade intrinsic attributes.  This does not change the function.
1052   if (NewFn)
1053     F = NewFn;
1054   if (Intrinsic::ID id = F->getIntrinsicID())
1055     F->setAttributes(Intrinsic::getAttributes(F->getContext(), id));
1056   return Upgraded;
1057 }
1058 
1059 GlobalVariable *llvm::UpgradeGlobalVariable(GlobalVariable *GV) {
1060   if (!(GV->hasName() && (GV->getName() == "llvm.global_ctors" ||
1061                           GV->getName() == "llvm.global_dtors")) ||
1062       !GV->hasInitializer())
1063     return nullptr;
1064   ArrayType *ATy = dyn_cast<ArrayType>(GV->getValueType());
1065   if (!ATy)
1066     return nullptr;
1067   StructType *STy = dyn_cast<StructType>(ATy->getElementType());
1068   if (!STy || STy->getNumElements() != 2)
1069     return nullptr;
1070 
1071   LLVMContext &C = GV->getContext();
1072   IRBuilder<> IRB(C);
1073   auto EltTy = StructType::get(STy->getElementType(0), STy->getElementType(1),
1074                                IRB.getInt8PtrTy());
1075   Constant *Init = GV->getInitializer();
1076   unsigned N = Init->getNumOperands();
1077   std::vector<Constant *> NewCtors(N);
1078   for (unsigned i = 0; i != N; ++i) {
1079     auto Ctor = cast<Constant>(Init->getOperand(i));
1080     NewCtors[i] = ConstantStruct::get(
1081         EltTy, Ctor->getAggregateElement(0u), Ctor->getAggregateElement(1),
1082         Constant::getNullValue(IRB.getInt8PtrTy()));
1083   }
1084   Constant *NewInit = ConstantArray::get(ArrayType::get(EltTy, N), NewCtors);
1085 
1086   return new GlobalVariable(NewInit->getType(), false, GV->getLinkage(),
1087                             NewInit, GV->getName());
1088 }
1089 
1090 // Handles upgrading SSE2/AVX2/AVX512BW PSLLDQ intrinsics by converting them
1091 // to byte shuffles.
1092 static Value *UpgradeX86PSLLDQIntrinsics(IRBuilder<> &Builder,
1093                                          Value *Op, unsigned Shift) {
1094   auto *ResultTy = cast<FixedVectorType>(Op->getType());
1095   unsigned NumElts = ResultTy->getNumElements() * 8;
1096 
1097   // Bitcast from a 64-bit element type to a byte element type.
1098   Type *VecTy = FixedVectorType::get(Builder.getInt8Ty(), NumElts);
1099   Op = Builder.CreateBitCast(Op, VecTy, "cast");
1100 
1101   // We'll be shuffling in zeroes.
1102   Value *Res = Constant::getNullValue(VecTy);
1103 
1104   // If shift is less than 16, emit a shuffle to move the bytes. Otherwise,
1105   // we'll just return the zero vector.
1106   if (Shift < 16) {
1107     int Idxs[64];
1108     // 256/512-bit version is split into 2/4 16-byte lanes.
1109     for (unsigned l = 0; l != NumElts; l += 16)
1110       for (unsigned i = 0; i != 16; ++i) {
1111         unsigned Idx = NumElts + i - Shift;
1112         if (Idx < NumElts)
1113           Idx -= NumElts - 16; // end of lane, switch operand.
1114         Idxs[l + i] = Idx + l;
1115       }
1116 
1117     Res = Builder.CreateShuffleVector(Res, Op, makeArrayRef(Idxs, NumElts));
1118   }
1119 
1120   // Bitcast back to a 64-bit element type.
1121   return Builder.CreateBitCast(Res, ResultTy, "cast");
1122 }
1123 
1124 // Handles upgrading SSE2/AVX2/AVX512BW PSRLDQ intrinsics by converting them
1125 // to byte shuffles.
1126 static Value *UpgradeX86PSRLDQIntrinsics(IRBuilder<> &Builder, Value *Op,
1127                                          unsigned Shift) {
1128   auto *ResultTy = cast<FixedVectorType>(Op->getType());
1129   unsigned NumElts = ResultTy->getNumElements() * 8;
1130 
1131   // Bitcast from a 64-bit element type to a byte element type.
1132   Type *VecTy = FixedVectorType::get(Builder.getInt8Ty(), NumElts);
1133   Op = Builder.CreateBitCast(Op, VecTy, "cast");
1134 
1135   // We'll be shuffling in zeroes.
1136   Value *Res = Constant::getNullValue(VecTy);
1137 
1138   // If shift is less than 16, emit a shuffle to move the bytes. Otherwise,
1139   // we'll just return the zero vector.
1140   if (Shift < 16) {
1141     int Idxs[64];
1142     // 256/512-bit version is split into 2/4 16-byte lanes.
1143     for (unsigned l = 0; l != NumElts; l += 16)
1144       for (unsigned i = 0; i != 16; ++i) {
1145         unsigned Idx = i + Shift;
1146         if (Idx >= 16)
1147           Idx += NumElts - 16; // end of lane, switch operand.
1148         Idxs[l + i] = Idx + l;
1149       }
1150 
1151     Res = Builder.CreateShuffleVector(Op, Res, makeArrayRef(Idxs, NumElts));
1152   }
1153 
1154   // Bitcast back to a 64-bit element type.
1155   return Builder.CreateBitCast(Res, ResultTy, "cast");
1156 }
1157 
1158 static Value *getX86MaskVec(IRBuilder<> &Builder, Value *Mask,
1159                             unsigned NumElts) {
1160   assert(isPowerOf2_32(NumElts) && "Expected power-of-2 mask elements");
1161   llvm::VectorType *MaskTy = FixedVectorType::get(
1162       Builder.getInt1Ty(), cast<IntegerType>(Mask->getType())->getBitWidth());
1163   Mask = Builder.CreateBitCast(Mask, MaskTy);
1164 
1165   // If we have less than 8 elements (1, 2 or 4), then the starting mask was an
1166   // i8 and we need to extract down to the right number of elements.
1167   if (NumElts <= 4) {
1168     int Indices[4];
1169     for (unsigned i = 0; i != NumElts; ++i)
1170       Indices[i] = i;
1171     Mask = Builder.CreateShuffleVector(
1172         Mask, Mask, makeArrayRef(Indices, NumElts), "extract");
1173   }
1174 
1175   return Mask;
1176 }
1177 
1178 static Value *EmitX86Select(IRBuilder<> &Builder, Value *Mask,
1179                             Value *Op0, Value *Op1) {
1180   // If the mask is all ones just emit the first operation.
1181   if (const auto *C = dyn_cast<Constant>(Mask))
1182     if (C->isAllOnesValue())
1183       return Op0;
1184 
1185   Mask = getX86MaskVec(Builder, Mask,
1186                        cast<FixedVectorType>(Op0->getType())->getNumElements());
1187   return Builder.CreateSelect(Mask, Op0, Op1);
1188 }
1189 
1190 static Value *EmitX86ScalarSelect(IRBuilder<> &Builder, Value *Mask,
1191                                   Value *Op0, Value *Op1) {
1192   // If the mask is all ones just emit the first operation.
1193   if (const auto *C = dyn_cast<Constant>(Mask))
1194     if (C->isAllOnesValue())
1195       return Op0;
1196 
1197   auto *MaskTy = FixedVectorType::get(Builder.getInt1Ty(),
1198                                       Mask->getType()->getIntegerBitWidth());
1199   Mask = Builder.CreateBitCast(Mask, MaskTy);
1200   Mask = Builder.CreateExtractElement(Mask, (uint64_t)0);
1201   return Builder.CreateSelect(Mask, Op0, Op1);
1202 }
1203 
1204 // Handle autoupgrade for masked PALIGNR and VALIGND/Q intrinsics.
1205 // PALIGNR handles large immediates by shifting while VALIGN masks the immediate
1206 // so we need to handle both cases. VALIGN also doesn't have 128-bit lanes.
1207 static Value *UpgradeX86ALIGNIntrinsics(IRBuilder<> &Builder, Value *Op0,
1208                                         Value *Op1, Value *Shift,
1209                                         Value *Passthru, Value *Mask,
1210                                         bool IsVALIGN) {
1211   unsigned ShiftVal = cast<llvm::ConstantInt>(Shift)->getZExtValue();
1212 
1213   unsigned NumElts = cast<FixedVectorType>(Op0->getType())->getNumElements();
1214   assert((IsVALIGN || NumElts % 16 == 0) && "Illegal NumElts for PALIGNR!");
1215   assert((!IsVALIGN || NumElts <= 16) && "NumElts too large for VALIGN!");
1216   assert(isPowerOf2_32(NumElts) && "NumElts not a power of 2!");
1217 
1218   // Mask the immediate for VALIGN.
1219   if (IsVALIGN)
1220     ShiftVal &= (NumElts - 1);
1221 
1222   // If palignr is shifting the pair of vectors more than the size of two
1223   // lanes, emit zero.
1224   if (ShiftVal >= 32)
1225     return llvm::Constant::getNullValue(Op0->getType());
1226 
1227   // If palignr is shifting the pair of input vectors more than one lane,
1228   // but less than two lanes, convert to shifting in zeroes.
1229   if (ShiftVal > 16) {
1230     ShiftVal -= 16;
1231     Op1 = Op0;
1232     Op0 = llvm::Constant::getNullValue(Op0->getType());
1233   }
1234 
1235   int Indices[64];
1236   // 256-bit palignr operates on 128-bit lanes so we need to handle that
1237   for (unsigned l = 0; l < NumElts; l += 16) {
1238     for (unsigned i = 0; i != 16; ++i) {
1239       unsigned Idx = ShiftVal + i;
1240       if (!IsVALIGN && Idx >= 16) // Disable wrap for VALIGN.
1241         Idx += NumElts - 16; // End of lane, switch operand.
1242       Indices[l + i] = Idx + l;
1243     }
1244   }
1245 
1246   Value *Align = Builder.CreateShuffleVector(Op1, Op0,
1247                                              makeArrayRef(Indices, NumElts),
1248                                              "palignr");
1249 
1250   return EmitX86Select(Builder, Mask, Align, Passthru);
1251 }
1252 
1253 static Value *UpgradeX86VPERMT2Intrinsics(IRBuilder<> &Builder, CallBase &CI,
1254                                           bool ZeroMask, bool IndexForm) {
1255   Type *Ty = CI.getType();
1256   unsigned VecWidth = Ty->getPrimitiveSizeInBits();
1257   unsigned EltWidth = Ty->getScalarSizeInBits();
1258   bool IsFloat = Ty->isFPOrFPVectorTy();
1259   Intrinsic::ID IID;
1260   if (VecWidth == 128 && EltWidth == 32 && IsFloat)
1261     IID = Intrinsic::x86_avx512_vpermi2var_ps_128;
1262   else if (VecWidth == 128 && EltWidth == 32 && !IsFloat)
1263     IID = Intrinsic::x86_avx512_vpermi2var_d_128;
1264   else if (VecWidth == 128 && EltWidth == 64 && IsFloat)
1265     IID = Intrinsic::x86_avx512_vpermi2var_pd_128;
1266   else if (VecWidth == 128 && EltWidth == 64 && !IsFloat)
1267     IID = Intrinsic::x86_avx512_vpermi2var_q_128;
1268   else if (VecWidth == 256 && EltWidth == 32 && IsFloat)
1269     IID = Intrinsic::x86_avx512_vpermi2var_ps_256;
1270   else if (VecWidth == 256 && EltWidth == 32 && !IsFloat)
1271     IID = Intrinsic::x86_avx512_vpermi2var_d_256;
1272   else if (VecWidth == 256 && EltWidth == 64 && IsFloat)
1273     IID = Intrinsic::x86_avx512_vpermi2var_pd_256;
1274   else if (VecWidth == 256 && EltWidth == 64 && !IsFloat)
1275     IID = Intrinsic::x86_avx512_vpermi2var_q_256;
1276   else if (VecWidth == 512 && EltWidth == 32 && IsFloat)
1277     IID = Intrinsic::x86_avx512_vpermi2var_ps_512;
1278   else if (VecWidth == 512 && EltWidth == 32 && !IsFloat)
1279     IID = Intrinsic::x86_avx512_vpermi2var_d_512;
1280   else if (VecWidth == 512 && EltWidth == 64 && IsFloat)
1281     IID = Intrinsic::x86_avx512_vpermi2var_pd_512;
1282   else if (VecWidth == 512 && EltWidth == 64 && !IsFloat)
1283     IID = Intrinsic::x86_avx512_vpermi2var_q_512;
1284   else if (VecWidth == 128 && EltWidth == 16)
1285     IID = Intrinsic::x86_avx512_vpermi2var_hi_128;
1286   else if (VecWidth == 256 && EltWidth == 16)
1287     IID = Intrinsic::x86_avx512_vpermi2var_hi_256;
1288   else if (VecWidth == 512 && EltWidth == 16)
1289     IID = Intrinsic::x86_avx512_vpermi2var_hi_512;
1290   else if (VecWidth == 128 && EltWidth == 8)
1291     IID = Intrinsic::x86_avx512_vpermi2var_qi_128;
1292   else if (VecWidth == 256 && EltWidth == 8)
1293     IID = Intrinsic::x86_avx512_vpermi2var_qi_256;
1294   else if (VecWidth == 512 && EltWidth == 8)
1295     IID = Intrinsic::x86_avx512_vpermi2var_qi_512;
1296   else
1297     llvm_unreachable("Unexpected intrinsic");
1298 
1299   Value *Args[] = { CI.getArgOperand(0) , CI.getArgOperand(1),
1300                     CI.getArgOperand(2) };
1301 
1302   // If this isn't index form we need to swap operand 0 and 1.
1303   if (!IndexForm)
1304     std::swap(Args[0], Args[1]);
1305 
1306   Value *V = Builder.CreateCall(Intrinsic::getDeclaration(CI.getModule(), IID),
1307                                 Args);
1308   Value *PassThru = ZeroMask ? ConstantAggregateZero::get(Ty)
1309                              : Builder.CreateBitCast(CI.getArgOperand(1),
1310                                                      Ty);
1311   return EmitX86Select(Builder, CI.getArgOperand(3), V, PassThru);
1312 }
1313 
1314 static Value *UpgradeX86BinaryIntrinsics(IRBuilder<> &Builder, CallBase &CI,
1315                                          Intrinsic::ID IID) {
1316   Type *Ty = CI.getType();
1317   Value *Op0 = CI.getOperand(0);
1318   Value *Op1 = CI.getOperand(1);
1319   Function *Intrin = Intrinsic::getDeclaration(CI.getModule(), IID, Ty);
1320   Value *Res = Builder.CreateCall(Intrin, {Op0, Op1});
1321 
1322   if (CI.arg_size() == 4) { // For masked intrinsics.
1323     Value *VecSrc = CI.getOperand(2);
1324     Value *Mask = CI.getOperand(3);
1325     Res = EmitX86Select(Builder, Mask, Res, VecSrc);
1326   }
1327   return Res;
1328 }
1329 
1330 static Value *upgradeX86Rotate(IRBuilder<> &Builder, CallBase &CI,
1331                                bool IsRotateRight) {
1332   Type *Ty = CI.getType();
1333   Value *Src = CI.getArgOperand(0);
1334   Value *Amt = CI.getArgOperand(1);
1335 
1336   // Amount may be scalar immediate, in which case create a splat vector.
1337   // Funnel shifts amounts are treated as modulo and types are all power-of-2 so
1338   // we only care about the lowest log2 bits anyway.
1339   if (Amt->getType() != Ty) {
1340     unsigned NumElts = cast<FixedVectorType>(Ty)->getNumElements();
1341     Amt = Builder.CreateIntCast(Amt, Ty->getScalarType(), false);
1342     Amt = Builder.CreateVectorSplat(NumElts, Amt);
1343   }
1344 
1345   Intrinsic::ID IID = IsRotateRight ? Intrinsic::fshr : Intrinsic::fshl;
1346   Function *Intrin = Intrinsic::getDeclaration(CI.getModule(), IID, Ty);
1347   Value *Res = Builder.CreateCall(Intrin, {Src, Src, Amt});
1348 
1349   if (CI.arg_size() == 4) { // For masked intrinsics.
1350     Value *VecSrc = CI.getOperand(2);
1351     Value *Mask = CI.getOperand(3);
1352     Res = EmitX86Select(Builder, Mask, Res, VecSrc);
1353   }
1354   return Res;
1355 }
1356 
1357 static Value *upgradeX86vpcom(IRBuilder<> &Builder, CallBase &CI, unsigned Imm,
1358                               bool IsSigned) {
1359   Type *Ty = CI.getType();
1360   Value *LHS = CI.getArgOperand(0);
1361   Value *RHS = CI.getArgOperand(1);
1362 
1363   CmpInst::Predicate Pred;
1364   switch (Imm) {
1365   case 0x0:
1366     Pred = IsSigned ? ICmpInst::ICMP_SLT : ICmpInst::ICMP_ULT;
1367     break;
1368   case 0x1:
1369     Pred = IsSigned ? ICmpInst::ICMP_SLE : ICmpInst::ICMP_ULE;
1370     break;
1371   case 0x2:
1372     Pred = IsSigned ? ICmpInst::ICMP_SGT : ICmpInst::ICMP_UGT;
1373     break;
1374   case 0x3:
1375     Pred = IsSigned ? ICmpInst::ICMP_SGE : ICmpInst::ICMP_UGE;
1376     break;
1377   case 0x4:
1378     Pred = ICmpInst::ICMP_EQ;
1379     break;
1380   case 0x5:
1381     Pred = ICmpInst::ICMP_NE;
1382     break;
1383   case 0x6:
1384     return Constant::getNullValue(Ty); // FALSE
1385   case 0x7:
1386     return Constant::getAllOnesValue(Ty); // TRUE
1387   default:
1388     llvm_unreachable("Unknown XOP vpcom/vpcomu predicate");
1389   }
1390 
1391   Value *Cmp = Builder.CreateICmp(Pred, LHS, RHS);
1392   Value *Ext = Builder.CreateSExt(Cmp, Ty);
1393   return Ext;
1394 }
1395 
1396 static Value *upgradeX86ConcatShift(IRBuilder<> &Builder, CallBase &CI,
1397                                     bool IsShiftRight, bool ZeroMask) {
1398   Type *Ty = CI.getType();
1399   Value *Op0 = CI.getArgOperand(0);
1400   Value *Op1 = CI.getArgOperand(1);
1401   Value *Amt = CI.getArgOperand(2);
1402 
1403   if (IsShiftRight)
1404     std::swap(Op0, Op1);
1405 
1406   // Amount may be scalar immediate, in which case create a splat vector.
1407   // Funnel shifts amounts are treated as modulo and types are all power-of-2 so
1408   // we only care about the lowest log2 bits anyway.
1409   if (Amt->getType() != Ty) {
1410     unsigned NumElts = cast<FixedVectorType>(Ty)->getNumElements();
1411     Amt = Builder.CreateIntCast(Amt, Ty->getScalarType(), false);
1412     Amt = Builder.CreateVectorSplat(NumElts, Amt);
1413   }
1414 
1415   Intrinsic::ID IID = IsShiftRight ? Intrinsic::fshr : Intrinsic::fshl;
1416   Function *Intrin = Intrinsic::getDeclaration(CI.getModule(), IID, Ty);
1417   Value *Res = Builder.CreateCall(Intrin, {Op0, Op1, Amt});
1418 
1419   unsigned NumArgs = CI.arg_size();
1420   if (NumArgs >= 4) { // For masked intrinsics.
1421     Value *VecSrc = NumArgs == 5 ? CI.getArgOperand(3) :
1422                     ZeroMask     ? ConstantAggregateZero::get(CI.getType()) :
1423                                    CI.getArgOperand(0);
1424     Value *Mask = CI.getOperand(NumArgs - 1);
1425     Res = EmitX86Select(Builder, Mask, Res, VecSrc);
1426   }
1427   return Res;
1428 }
1429 
1430 static Value *UpgradeMaskedStore(IRBuilder<> &Builder,
1431                                  Value *Ptr, Value *Data, Value *Mask,
1432                                  bool Aligned) {
1433   // Cast the pointer to the right type.
1434   Ptr = Builder.CreateBitCast(Ptr,
1435                               llvm::PointerType::getUnqual(Data->getType()));
1436   const Align Alignment =
1437       Aligned
1438           ? Align(Data->getType()->getPrimitiveSizeInBits().getFixedSize() / 8)
1439           : Align(1);
1440 
1441   // If the mask is all ones just emit a regular store.
1442   if (const auto *C = dyn_cast<Constant>(Mask))
1443     if (C->isAllOnesValue())
1444       return Builder.CreateAlignedStore(Data, Ptr, Alignment);
1445 
1446   // Convert the mask from an integer type to a vector of i1.
1447   unsigned NumElts = cast<FixedVectorType>(Data->getType())->getNumElements();
1448   Mask = getX86MaskVec(Builder, Mask, NumElts);
1449   return Builder.CreateMaskedStore(Data, Ptr, Alignment, Mask);
1450 }
1451 
1452 static Value *UpgradeMaskedLoad(IRBuilder<> &Builder,
1453                                 Value *Ptr, Value *Passthru, Value *Mask,
1454                                 bool Aligned) {
1455   Type *ValTy = Passthru->getType();
1456   // Cast the pointer to the right type.
1457   Ptr = Builder.CreateBitCast(Ptr, llvm::PointerType::getUnqual(ValTy));
1458   const Align Alignment =
1459       Aligned
1460           ? Align(Passthru->getType()->getPrimitiveSizeInBits().getFixedSize() /
1461                   8)
1462           : Align(1);
1463 
1464   // If the mask is all ones just emit a regular store.
1465   if (const auto *C = dyn_cast<Constant>(Mask))
1466     if (C->isAllOnesValue())
1467       return Builder.CreateAlignedLoad(ValTy, Ptr, Alignment);
1468 
1469   // Convert the mask from an integer type to a vector of i1.
1470   unsigned NumElts = cast<FixedVectorType>(ValTy)->getNumElements();
1471   Mask = getX86MaskVec(Builder, Mask, NumElts);
1472   return Builder.CreateMaskedLoad(ValTy, Ptr, Alignment, Mask, Passthru);
1473 }
1474 
1475 static Value *upgradeAbs(IRBuilder<> &Builder, CallBase &CI) {
1476   Type *Ty = CI.getType();
1477   Value *Op0 = CI.getArgOperand(0);
1478   Function *F = Intrinsic::getDeclaration(CI.getModule(), Intrinsic::abs, Ty);
1479   Value *Res = Builder.CreateCall(F, {Op0, Builder.getInt1(false)});
1480   if (CI.arg_size() == 3)
1481     Res = EmitX86Select(Builder, CI.getArgOperand(2), Res, CI.getArgOperand(1));
1482   return Res;
1483 }
1484 
1485 static Value *upgradePMULDQ(IRBuilder<> &Builder, CallBase &CI, bool IsSigned) {
1486   Type *Ty = CI.getType();
1487 
1488   // Arguments have a vXi32 type so cast to vXi64.
1489   Value *LHS = Builder.CreateBitCast(CI.getArgOperand(0), Ty);
1490   Value *RHS = Builder.CreateBitCast(CI.getArgOperand(1), Ty);
1491 
1492   if (IsSigned) {
1493     // Shift left then arithmetic shift right.
1494     Constant *ShiftAmt = ConstantInt::get(Ty, 32);
1495     LHS = Builder.CreateShl(LHS, ShiftAmt);
1496     LHS = Builder.CreateAShr(LHS, ShiftAmt);
1497     RHS = Builder.CreateShl(RHS, ShiftAmt);
1498     RHS = Builder.CreateAShr(RHS, ShiftAmt);
1499   } else {
1500     // Clear the upper bits.
1501     Constant *Mask = ConstantInt::get(Ty, 0xffffffff);
1502     LHS = Builder.CreateAnd(LHS, Mask);
1503     RHS = Builder.CreateAnd(RHS, Mask);
1504   }
1505 
1506   Value *Res = Builder.CreateMul(LHS, RHS);
1507 
1508   if (CI.arg_size() == 4)
1509     Res = EmitX86Select(Builder, CI.getArgOperand(3), Res, CI.getArgOperand(2));
1510 
1511   return Res;
1512 }
1513 
1514 // Applying mask on vector of i1's and make sure result is at least 8 bits wide.
1515 static Value *ApplyX86MaskOn1BitsVec(IRBuilder<> &Builder, Value *Vec,
1516                                      Value *Mask) {
1517   unsigned NumElts = cast<FixedVectorType>(Vec->getType())->getNumElements();
1518   if (Mask) {
1519     const auto *C = dyn_cast<Constant>(Mask);
1520     if (!C || !C->isAllOnesValue())
1521       Vec = Builder.CreateAnd(Vec, getX86MaskVec(Builder, Mask, NumElts));
1522   }
1523 
1524   if (NumElts < 8) {
1525     int Indices[8];
1526     for (unsigned i = 0; i != NumElts; ++i)
1527       Indices[i] = i;
1528     for (unsigned i = NumElts; i != 8; ++i)
1529       Indices[i] = NumElts + i % NumElts;
1530     Vec = Builder.CreateShuffleVector(Vec,
1531                                       Constant::getNullValue(Vec->getType()),
1532                                       Indices);
1533   }
1534   return Builder.CreateBitCast(Vec, Builder.getIntNTy(std::max(NumElts, 8U)));
1535 }
1536 
1537 static Value *upgradeMaskedCompare(IRBuilder<> &Builder, CallBase &CI,
1538                                    unsigned CC, bool Signed) {
1539   Value *Op0 = CI.getArgOperand(0);
1540   unsigned NumElts = cast<FixedVectorType>(Op0->getType())->getNumElements();
1541 
1542   Value *Cmp;
1543   if (CC == 3) {
1544     Cmp = Constant::getNullValue(
1545         FixedVectorType::get(Builder.getInt1Ty(), NumElts));
1546   } else if (CC == 7) {
1547     Cmp = Constant::getAllOnesValue(
1548         FixedVectorType::get(Builder.getInt1Ty(), NumElts));
1549   } else {
1550     ICmpInst::Predicate Pred;
1551     switch (CC) {
1552     default: llvm_unreachable("Unknown condition code");
1553     case 0: Pred = ICmpInst::ICMP_EQ;  break;
1554     case 1: Pred = Signed ? ICmpInst::ICMP_SLT : ICmpInst::ICMP_ULT; break;
1555     case 2: Pred = Signed ? ICmpInst::ICMP_SLE : ICmpInst::ICMP_ULE; break;
1556     case 4: Pred = ICmpInst::ICMP_NE;  break;
1557     case 5: Pred = Signed ? ICmpInst::ICMP_SGE : ICmpInst::ICMP_UGE; break;
1558     case 6: Pred = Signed ? ICmpInst::ICMP_SGT : ICmpInst::ICMP_UGT; break;
1559     }
1560     Cmp = Builder.CreateICmp(Pred, Op0, CI.getArgOperand(1));
1561   }
1562 
1563   Value *Mask = CI.getArgOperand(CI.arg_size() - 1);
1564 
1565   return ApplyX86MaskOn1BitsVec(Builder, Cmp, Mask);
1566 }
1567 
1568 // Replace a masked intrinsic with an older unmasked intrinsic.
1569 static Value *UpgradeX86MaskedShift(IRBuilder<> &Builder, CallBase &CI,
1570                                     Intrinsic::ID IID) {
1571   Function *Intrin = Intrinsic::getDeclaration(CI.getModule(), IID);
1572   Value *Rep = Builder.CreateCall(Intrin,
1573                                  { CI.getArgOperand(0), CI.getArgOperand(1) });
1574   return EmitX86Select(Builder, CI.getArgOperand(3), Rep, CI.getArgOperand(2));
1575 }
1576 
1577 static Value* upgradeMaskedMove(IRBuilder<> &Builder, CallBase &CI) {
1578   Value* A = CI.getArgOperand(0);
1579   Value* B = CI.getArgOperand(1);
1580   Value* Src = CI.getArgOperand(2);
1581   Value* Mask = CI.getArgOperand(3);
1582 
1583   Value* AndNode = Builder.CreateAnd(Mask, APInt(8, 1));
1584   Value* Cmp = Builder.CreateIsNotNull(AndNode);
1585   Value* Extract1 = Builder.CreateExtractElement(B, (uint64_t)0);
1586   Value* Extract2 = Builder.CreateExtractElement(Src, (uint64_t)0);
1587   Value* Select = Builder.CreateSelect(Cmp, Extract1, Extract2);
1588   return Builder.CreateInsertElement(A, Select, (uint64_t)0);
1589 }
1590 
1591 
1592 static Value* UpgradeMaskToInt(IRBuilder<> &Builder, CallBase &CI) {
1593   Value* Op = CI.getArgOperand(0);
1594   Type* ReturnOp = CI.getType();
1595   unsigned NumElts = cast<FixedVectorType>(CI.getType())->getNumElements();
1596   Value *Mask = getX86MaskVec(Builder, Op, NumElts);
1597   return Builder.CreateSExt(Mask, ReturnOp, "vpmovm2");
1598 }
1599 
1600 // Replace intrinsic with unmasked version and a select.
1601 static bool upgradeAVX512MaskToSelect(StringRef Name, IRBuilder<> &Builder,
1602                                       CallBase &CI, Value *&Rep) {
1603   Name = Name.substr(12); // Remove avx512.mask.
1604 
1605   unsigned VecWidth = CI.getType()->getPrimitiveSizeInBits();
1606   unsigned EltWidth = CI.getType()->getScalarSizeInBits();
1607   Intrinsic::ID IID;
1608   if (Name.startswith("max.p")) {
1609     if (VecWidth == 128 && EltWidth == 32)
1610       IID = Intrinsic::x86_sse_max_ps;
1611     else if (VecWidth == 128 && EltWidth == 64)
1612       IID = Intrinsic::x86_sse2_max_pd;
1613     else if (VecWidth == 256 && EltWidth == 32)
1614       IID = Intrinsic::x86_avx_max_ps_256;
1615     else if (VecWidth == 256 && EltWidth == 64)
1616       IID = Intrinsic::x86_avx_max_pd_256;
1617     else
1618       llvm_unreachable("Unexpected intrinsic");
1619   } else if (Name.startswith("min.p")) {
1620     if (VecWidth == 128 && EltWidth == 32)
1621       IID = Intrinsic::x86_sse_min_ps;
1622     else if (VecWidth == 128 && EltWidth == 64)
1623       IID = Intrinsic::x86_sse2_min_pd;
1624     else if (VecWidth == 256 && EltWidth == 32)
1625       IID = Intrinsic::x86_avx_min_ps_256;
1626     else if (VecWidth == 256 && EltWidth == 64)
1627       IID = Intrinsic::x86_avx_min_pd_256;
1628     else
1629       llvm_unreachable("Unexpected intrinsic");
1630   } else if (Name.startswith("pshuf.b.")) {
1631     if (VecWidth == 128)
1632       IID = Intrinsic::x86_ssse3_pshuf_b_128;
1633     else if (VecWidth == 256)
1634       IID = Intrinsic::x86_avx2_pshuf_b;
1635     else if (VecWidth == 512)
1636       IID = Intrinsic::x86_avx512_pshuf_b_512;
1637     else
1638       llvm_unreachable("Unexpected intrinsic");
1639   } else if (Name.startswith("pmul.hr.sw.")) {
1640     if (VecWidth == 128)
1641       IID = Intrinsic::x86_ssse3_pmul_hr_sw_128;
1642     else if (VecWidth == 256)
1643       IID = Intrinsic::x86_avx2_pmul_hr_sw;
1644     else if (VecWidth == 512)
1645       IID = Intrinsic::x86_avx512_pmul_hr_sw_512;
1646     else
1647       llvm_unreachable("Unexpected intrinsic");
1648   } else if (Name.startswith("pmulh.w.")) {
1649     if (VecWidth == 128)
1650       IID = Intrinsic::x86_sse2_pmulh_w;
1651     else if (VecWidth == 256)
1652       IID = Intrinsic::x86_avx2_pmulh_w;
1653     else if (VecWidth == 512)
1654       IID = Intrinsic::x86_avx512_pmulh_w_512;
1655     else
1656       llvm_unreachable("Unexpected intrinsic");
1657   } else if (Name.startswith("pmulhu.w.")) {
1658     if (VecWidth == 128)
1659       IID = Intrinsic::x86_sse2_pmulhu_w;
1660     else if (VecWidth == 256)
1661       IID = Intrinsic::x86_avx2_pmulhu_w;
1662     else if (VecWidth == 512)
1663       IID = Intrinsic::x86_avx512_pmulhu_w_512;
1664     else
1665       llvm_unreachable("Unexpected intrinsic");
1666   } else if (Name.startswith("pmaddw.d.")) {
1667     if (VecWidth == 128)
1668       IID = Intrinsic::x86_sse2_pmadd_wd;
1669     else if (VecWidth == 256)
1670       IID = Intrinsic::x86_avx2_pmadd_wd;
1671     else if (VecWidth == 512)
1672       IID = Intrinsic::x86_avx512_pmaddw_d_512;
1673     else
1674       llvm_unreachable("Unexpected intrinsic");
1675   } else if (Name.startswith("pmaddubs.w.")) {
1676     if (VecWidth == 128)
1677       IID = Intrinsic::x86_ssse3_pmadd_ub_sw_128;
1678     else if (VecWidth == 256)
1679       IID = Intrinsic::x86_avx2_pmadd_ub_sw;
1680     else if (VecWidth == 512)
1681       IID = Intrinsic::x86_avx512_pmaddubs_w_512;
1682     else
1683       llvm_unreachable("Unexpected intrinsic");
1684   } else if (Name.startswith("packsswb.")) {
1685     if (VecWidth == 128)
1686       IID = Intrinsic::x86_sse2_packsswb_128;
1687     else if (VecWidth == 256)
1688       IID = Intrinsic::x86_avx2_packsswb;
1689     else if (VecWidth == 512)
1690       IID = Intrinsic::x86_avx512_packsswb_512;
1691     else
1692       llvm_unreachable("Unexpected intrinsic");
1693   } else if (Name.startswith("packssdw.")) {
1694     if (VecWidth == 128)
1695       IID = Intrinsic::x86_sse2_packssdw_128;
1696     else if (VecWidth == 256)
1697       IID = Intrinsic::x86_avx2_packssdw;
1698     else if (VecWidth == 512)
1699       IID = Intrinsic::x86_avx512_packssdw_512;
1700     else
1701       llvm_unreachable("Unexpected intrinsic");
1702   } else if (Name.startswith("packuswb.")) {
1703     if (VecWidth == 128)
1704       IID = Intrinsic::x86_sse2_packuswb_128;
1705     else if (VecWidth == 256)
1706       IID = Intrinsic::x86_avx2_packuswb;
1707     else if (VecWidth == 512)
1708       IID = Intrinsic::x86_avx512_packuswb_512;
1709     else
1710       llvm_unreachable("Unexpected intrinsic");
1711   } else if (Name.startswith("packusdw.")) {
1712     if (VecWidth == 128)
1713       IID = Intrinsic::x86_sse41_packusdw;
1714     else if (VecWidth == 256)
1715       IID = Intrinsic::x86_avx2_packusdw;
1716     else if (VecWidth == 512)
1717       IID = Intrinsic::x86_avx512_packusdw_512;
1718     else
1719       llvm_unreachable("Unexpected intrinsic");
1720   } else if (Name.startswith("vpermilvar.")) {
1721     if (VecWidth == 128 && EltWidth == 32)
1722       IID = Intrinsic::x86_avx_vpermilvar_ps;
1723     else if (VecWidth == 128 && EltWidth == 64)
1724       IID = Intrinsic::x86_avx_vpermilvar_pd;
1725     else if (VecWidth == 256 && EltWidth == 32)
1726       IID = Intrinsic::x86_avx_vpermilvar_ps_256;
1727     else if (VecWidth == 256 && EltWidth == 64)
1728       IID = Intrinsic::x86_avx_vpermilvar_pd_256;
1729     else if (VecWidth == 512 && EltWidth == 32)
1730       IID = Intrinsic::x86_avx512_vpermilvar_ps_512;
1731     else if (VecWidth == 512 && EltWidth == 64)
1732       IID = Intrinsic::x86_avx512_vpermilvar_pd_512;
1733     else
1734       llvm_unreachable("Unexpected intrinsic");
1735   } else if (Name == "cvtpd2dq.256") {
1736     IID = Intrinsic::x86_avx_cvt_pd2dq_256;
1737   } else if (Name == "cvtpd2ps.256") {
1738     IID = Intrinsic::x86_avx_cvt_pd2_ps_256;
1739   } else if (Name == "cvttpd2dq.256") {
1740     IID = Intrinsic::x86_avx_cvtt_pd2dq_256;
1741   } else if (Name == "cvttps2dq.128") {
1742     IID = Intrinsic::x86_sse2_cvttps2dq;
1743   } else if (Name == "cvttps2dq.256") {
1744     IID = Intrinsic::x86_avx_cvtt_ps2dq_256;
1745   } else if (Name.startswith("permvar.")) {
1746     bool IsFloat = CI.getType()->isFPOrFPVectorTy();
1747     if (VecWidth == 256 && EltWidth == 32 && IsFloat)
1748       IID = Intrinsic::x86_avx2_permps;
1749     else if (VecWidth == 256 && EltWidth == 32 && !IsFloat)
1750       IID = Intrinsic::x86_avx2_permd;
1751     else if (VecWidth == 256 && EltWidth == 64 && IsFloat)
1752       IID = Intrinsic::x86_avx512_permvar_df_256;
1753     else if (VecWidth == 256 && EltWidth == 64 && !IsFloat)
1754       IID = Intrinsic::x86_avx512_permvar_di_256;
1755     else if (VecWidth == 512 && EltWidth == 32 && IsFloat)
1756       IID = Intrinsic::x86_avx512_permvar_sf_512;
1757     else if (VecWidth == 512 && EltWidth == 32 && !IsFloat)
1758       IID = Intrinsic::x86_avx512_permvar_si_512;
1759     else if (VecWidth == 512 && EltWidth == 64 && IsFloat)
1760       IID = Intrinsic::x86_avx512_permvar_df_512;
1761     else if (VecWidth == 512 && EltWidth == 64 && !IsFloat)
1762       IID = Intrinsic::x86_avx512_permvar_di_512;
1763     else if (VecWidth == 128 && EltWidth == 16)
1764       IID = Intrinsic::x86_avx512_permvar_hi_128;
1765     else if (VecWidth == 256 && EltWidth == 16)
1766       IID = Intrinsic::x86_avx512_permvar_hi_256;
1767     else if (VecWidth == 512 && EltWidth == 16)
1768       IID = Intrinsic::x86_avx512_permvar_hi_512;
1769     else if (VecWidth == 128 && EltWidth == 8)
1770       IID = Intrinsic::x86_avx512_permvar_qi_128;
1771     else if (VecWidth == 256 && EltWidth == 8)
1772       IID = Intrinsic::x86_avx512_permvar_qi_256;
1773     else if (VecWidth == 512 && EltWidth == 8)
1774       IID = Intrinsic::x86_avx512_permvar_qi_512;
1775     else
1776       llvm_unreachable("Unexpected intrinsic");
1777   } else if (Name.startswith("dbpsadbw.")) {
1778     if (VecWidth == 128)
1779       IID = Intrinsic::x86_avx512_dbpsadbw_128;
1780     else if (VecWidth == 256)
1781       IID = Intrinsic::x86_avx512_dbpsadbw_256;
1782     else if (VecWidth == 512)
1783       IID = Intrinsic::x86_avx512_dbpsadbw_512;
1784     else
1785       llvm_unreachable("Unexpected intrinsic");
1786   } else if (Name.startswith("pmultishift.qb.")) {
1787     if (VecWidth == 128)
1788       IID = Intrinsic::x86_avx512_pmultishift_qb_128;
1789     else if (VecWidth == 256)
1790       IID = Intrinsic::x86_avx512_pmultishift_qb_256;
1791     else if (VecWidth == 512)
1792       IID = Intrinsic::x86_avx512_pmultishift_qb_512;
1793     else
1794       llvm_unreachable("Unexpected intrinsic");
1795   } else if (Name.startswith("conflict.")) {
1796     if (Name[9] == 'd' && VecWidth == 128)
1797       IID = Intrinsic::x86_avx512_conflict_d_128;
1798     else if (Name[9] == 'd' && VecWidth == 256)
1799       IID = Intrinsic::x86_avx512_conflict_d_256;
1800     else if (Name[9] == 'd' && VecWidth == 512)
1801       IID = Intrinsic::x86_avx512_conflict_d_512;
1802     else if (Name[9] == 'q' && VecWidth == 128)
1803       IID = Intrinsic::x86_avx512_conflict_q_128;
1804     else if (Name[9] == 'q' && VecWidth == 256)
1805       IID = Intrinsic::x86_avx512_conflict_q_256;
1806     else if (Name[9] == 'q' && VecWidth == 512)
1807       IID = Intrinsic::x86_avx512_conflict_q_512;
1808     else
1809       llvm_unreachable("Unexpected intrinsic");
1810   } else if (Name.startswith("pavg.")) {
1811     if (Name[5] == 'b' && VecWidth == 128)
1812       IID = Intrinsic::x86_sse2_pavg_b;
1813     else if (Name[5] == 'b' && VecWidth == 256)
1814       IID = Intrinsic::x86_avx2_pavg_b;
1815     else if (Name[5] == 'b' && VecWidth == 512)
1816       IID = Intrinsic::x86_avx512_pavg_b_512;
1817     else if (Name[5] == 'w' && VecWidth == 128)
1818       IID = Intrinsic::x86_sse2_pavg_w;
1819     else if (Name[5] == 'w' && VecWidth == 256)
1820       IID = Intrinsic::x86_avx2_pavg_w;
1821     else if (Name[5] == 'w' && VecWidth == 512)
1822       IID = Intrinsic::x86_avx512_pavg_w_512;
1823     else
1824       llvm_unreachable("Unexpected intrinsic");
1825   } else
1826     return false;
1827 
1828   SmallVector<Value *, 4> Args(CI.args());
1829   Args.pop_back();
1830   Args.pop_back();
1831   Rep = Builder.CreateCall(Intrinsic::getDeclaration(CI.getModule(), IID),
1832                            Args);
1833   unsigned NumArgs = CI.arg_size();
1834   Rep = EmitX86Select(Builder, CI.getArgOperand(NumArgs - 1), Rep,
1835                       CI.getArgOperand(NumArgs - 2));
1836   return true;
1837 }
1838 
1839 /// Upgrade comment in call to inline asm that represents an objc retain release
1840 /// marker.
1841 void llvm::UpgradeInlineAsmString(std::string *AsmStr) {
1842   size_t Pos;
1843   if (AsmStr->find("mov\tfp") == 0 &&
1844       AsmStr->find("objc_retainAutoreleaseReturnValue") != std::string::npos &&
1845       (Pos = AsmStr->find("# marker")) != std::string::npos) {
1846     AsmStr->replace(Pos, 1, ";");
1847   }
1848 }
1849 
1850 static Value *UpgradeARMIntrinsicCall(StringRef Name, CallBase *CI, Function *F,
1851                                       IRBuilder<> &Builder) {
1852   if (Name == "mve.vctp64.old") {
1853     // Replace the old v4i1 vctp64 with a v2i1 vctp and predicate-casts to the
1854     // correct type.
1855     Value *VCTP = Builder.CreateCall(
1856         Intrinsic::getDeclaration(F->getParent(), Intrinsic::arm_mve_vctp64),
1857         CI->getArgOperand(0), CI->getName());
1858     Value *C1 = Builder.CreateCall(
1859         Intrinsic::getDeclaration(
1860             F->getParent(), Intrinsic::arm_mve_pred_v2i,
1861             {VectorType::get(Builder.getInt1Ty(), 2, false)}),
1862         VCTP);
1863     return Builder.CreateCall(
1864         Intrinsic::getDeclaration(
1865             F->getParent(), Intrinsic::arm_mve_pred_i2v,
1866             {VectorType::get(Builder.getInt1Ty(), 4, false)}),
1867         C1);
1868   } else if (Name == "mve.mull.int.predicated.v2i64.v4i32.v4i1" ||
1869              Name == "mve.vqdmull.predicated.v2i64.v4i32.v4i1" ||
1870              Name == "mve.vldr.gather.base.predicated.v2i64.v2i64.v4i1" ||
1871              Name == "mve.vldr.gather.base.wb.predicated.v2i64.v2i64.v4i1" ||
1872              Name == "mve.vldr.gather.offset.predicated.v2i64.p0i64.v2i64.v4i1" ||
1873              Name == "mve.vstr.scatter.base.predicated.v2i64.v2i64.v4i1" ||
1874              Name == "mve.vstr.scatter.base.wb.predicated.v2i64.v2i64.v4i1" ||
1875              Name == "mve.vstr.scatter.offset.predicated.p0i64.v2i64.v2i64.v4i1" ||
1876              Name == "cde.vcx1q.predicated.v2i64.v4i1" ||
1877              Name == "cde.vcx1qa.predicated.v2i64.v4i1" ||
1878              Name == "cde.vcx2q.predicated.v2i64.v4i1" ||
1879              Name == "cde.vcx2qa.predicated.v2i64.v4i1" ||
1880              Name == "cde.vcx3q.predicated.v2i64.v4i1" ||
1881              Name == "cde.vcx3qa.predicated.v2i64.v4i1") {
1882     std::vector<Type *> Tys;
1883     unsigned ID = CI->getIntrinsicID();
1884     Type *V2I1Ty = FixedVectorType::get(Builder.getInt1Ty(), 2);
1885     switch (ID) {
1886     case Intrinsic::arm_mve_mull_int_predicated:
1887     case Intrinsic::arm_mve_vqdmull_predicated:
1888     case Intrinsic::arm_mve_vldr_gather_base_predicated:
1889       Tys = {CI->getType(), CI->getOperand(0)->getType(), V2I1Ty};
1890       break;
1891     case Intrinsic::arm_mve_vldr_gather_base_wb_predicated:
1892     case Intrinsic::arm_mve_vstr_scatter_base_predicated:
1893     case Intrinsic::arm_mve_vstr_scatter_base_wb_predicated:
1894       Tys = {CI->getOperand(0)->getType(), CI->getOperand(0)->getType(),
1895              V2I1Ty};
1896       break;
1897     case Intrinsic::arm_mve_vldr_gather_offset_predicated:
1898       Tys = {CI->getType(), CI->getOperand(0)->getType(),
1899              CI->getOperand(1)->getType(), V2I1Ty};
1900       break;
1901     case Intrinsic::arm_mve_vstr_scatter_offset_predicated:
1902       Tys = {CI->getOperand(0)->getType(), CI->getOperand(1)->getType(),
1903              CI->getOperand(2)->getType(), V2I1Ty};
1904       break;
1905     case Intrinsic::arm_cde_vcx1q_predicated:
1906     case Intrinsic::arm_cde_vcx1qa_predicated:
1907     case Intrinsic::arm_cde_vcx2q_predicated:
1908     case Intrinsic::arm_cde_vcx2qa_predicated:
1909     case Intrinsic::arm_cde_vcx3q_predicated:
1910     case Intrinsic::arm_cde_vcx3qa_predicated:
1911       Tys = {CI->getOperand(1)->getType(), V2I1Ty};
1912       break;
1913     default:
1914       llvm_unreachable("Unhandled Intrinsic!");
1915     }
1916 
1917     std::vector<Value *> Ops;
1918     for (Value *Op : CI->args()) {
1919       Type *Ty = Op->getType();
1920       if (Ty->getScalarSizeInBits() == 1) {
1921         Value *C1 = Builder.CreateCall(
1922             Intrinsic::getDeclaration(
1923                 F->getParent(), Intrinsic::arm_mve_pred_v2i,
1924                 {VectorType::get(Builder.getInt1Ty(), 4, false)}),
1925             Op);
1926         Op = Builder.CreateCall(
1927             Intrinsic::getDeclaration(F->getParent(),
1928                                       Intrinsic::arm_mve_pred_i2v, {V2I1Ty}),
1929             C1);
1930       }
1931       Ops.push_back(Op);
1932     }
1933 
1934     Function *Fn = Intrinsic::getDeclaration(F->getParent(), ID, Tys);
1935     return Builder.CreateCall(Fn, Ops, CI->getName());
1936   }
1937   llvm_unreachable("Unknown function for ARM CallBase upgrade.");
1938 }
1939 
1940 /// Upgrade a call to an old intrinsic. All argument and return casting must be
1941 /// provided to seamlessly integrate with existing context.
1942 void llvm::UpgradeIntrinsicCall(CallBase *CI, Function *NewFn) {
1943   Function *F = CI->getCalledFunction();
1944   LLVMContext &C = CI->getContext();
1945   IRBuilder<> Builder(C);
1946   Builder.SetInsertPoint(CI->getParent(), CI->getIterator());
1947 
1948   assert(F && "Intrinsic call is not direct?");
1949 
1950   if (!NewFn) {
1951     // Get the Function's name.
1952     StringRef Name = F->getName();
1953 
1954     assert(Name.startswith("llvm.") && "Intrinsic doesn't start with 'llvm.'");
1955     Name = Name.substr(5);
1956 
1957     bool IsX86 = Name.startswith("x86.");
1958     if (IsX86)
1959       Name = Name.substr(4);
1960     bool IsNVVM = Name.startswith("nvvm.");
1961     if (IsNVVM)
1962       Name = Name.substr(5);
1963     bool IsARM = Name.startswith("arm.");
1964     if (IsARM)
1965       Name = Name.substr(4);
1966 
1967     if (IsX86 && Name.startswith("sse4a.movnt.")) {
1968       Module *M = F->getParent();
1969       SmallVector<Metadata *, 1> Elts;
1970       Elts.push_back(
1971           ConstantAsMetadata::get(ConstantInt::get(Type::getInt32Ty(C), 1)));
1972       MDNode *Node = MDNode::get(C, Elts);
1973 
1974       Value *Arg0 = CI->getArgOperand(0);
1975       Value *Arg1 = CI->getArgOperand(1);
1976 
1977       // Nontemporal (unaligned) store of the 0'th element of the float/double
1978       // vector.
1979       Type *SrcEltTy = cast<VectorType>(Arg1->getType())->getElementType();
1980       PointerType *EltPtrTy = PointerType::getUnqual(SrcEltTy);
1981       Value *Addr = Builder.CreateBitCast(Arg0, EltPtrTy, "cast");
1982       Value *Extract =
1983           Builder.CreateExtractElement(Arg1, (uint64_t)0, "extractelement");
1984 
1985       StoreInst *SI = Builder.CreateAlignedStore(Extract, Addr, Align(1));
1986       SI->setMetadata(M->getMDKindID("nontemporal"), Node);
1987 
1988       // Remove intrinsic.
1989       CI->eraseFromParent();
1990       return;
1991     }
1992 
1993     if (IsX86 && (Name.startswith("avx.movnt.") ||
1994                   Name.startswith("avx512.storent."))) {
1995       Module *M = F->getParent();
1996       SmallVector<Metadata *, 1> Elts;
1997       Elts.push_back(
1998           ConstantAsMetadata::get(ConstantInt::get(Type::getInt32Ty(C), 1)));
1999       MDNode *Node = MDNode::get(C, Elts);
2000 
2001       Value *Arg0 = CI->getArgOperand(0);
2002       Value *Arg1 = CI->getArgOperand(1);
2003 
2004       // Convert the type of the pointer to a pointer to the stored type.
2005       Value *BC = Builder.CreateBitCast(Arg0,
2006                                         PointerType::getUnqual(Arg1->getType()),
2007                                         "cast");
2008       StoreInst *SI = Builder.CreateAlignedStore(
2009           Arg1, BC,
2010           Align(Arg1->getType()->getPrimitiveSizeInBits().getFixedSize() / 8));
2011       SI->setMetadata(M->getMDKindID("nontemporal"), Node);
2012 
2013       // Remove intrinsic.
2014       CI->eraseFromParent();
2015       return;
2016     }
2017 
2018     if (IsX86 && Name == "sse2.storel.dq") {
2019       Value *Arg0 = CI->getArgOperand(0);
2020       Value *Arg1 = CI->getArgOperand(1);
2021 
2022       auto *NewVecTy = FixedVectorType::get(Type::getInt64Ty(C), 2);
2023       Value *BC0 = Builder.CreateBitCast(Arg1, NewVecTy, "cast");
2024       Value *Elt = Builder.CreateExtractElement(BC0, (uint64_t)0);
2025       Value *BC = Builder.CreateBitCast(Arg0,
2026                                         PointerType::getUnqual(Elt->getType()),
2027                                         "cast");
2028       Builder.CreateAlignedStore(Elt, BC, Align(1));
2029 
2030       // Remove intrinsic.
2031       CI->eraseFromParent();
2032       return;
2033     }
2034 
2035     if (IsX86 && (Name.startswith("sse.storeu.") ||
2036                   Name.startswith("sse2.storeu.") ||
2037                   Name.startswith("avx.storeu."))) {
2038       Value *Arg0 = CI->getArgOperand(0);
2039       Value *Arg1 = CI->getArgOperand(1);
2040 
2041       Arg0 = Builder.CreateBitCast(Arg0,
2042                                    PointerType::getUnqual(Arg1->getType()),
2043                                    "cast");
2044       Builder.CreateAlignedStore(Arg1, Arg0, Align(1));
2045 
2046       // Remove intrinsic.
2047       CI->eraseFromParent();
2048       return;
2049     }
2050 
2051     if (IsX86 && Name == "avx512.mask.store.ss") {
2052       Value *Mask = Builder.CreateAnd(CI->getArgOperand(2), Builder.getInt8(1));
2053       UpgradeMaskedStore(Builder, CI->getArgOperand(0), CI->getArgOperand(1),
2054                          Mask, false);
2055 
2056       // Remove intrinsic.
2057       CI->eraseFromParent();
2058       return;
2059     }
2060 
2061     if (IsX86 && (Name.startswith("avx512.mask.store"))) {
2062       // "avx512.mask.storeu." or "avx512.mask.store."
2063       bool Aligned = Name[17] != 'u'; // "avx512.mask.storeu".
2064       UpgradeMaskedStore(Builder, CI->getArgOperand(0), CI->getArgOperand(1),
2065                          CI->getArgOperand(2), Aligned);
2066 
2067       // Remove intrinsic.
2068       CI->eraseFromParent();
2069       return;
2070     }
2071 
2072     Value *Rep;
2073     // Upgrade packed integer vector compare intrinsics to compare instructions.
2074     if (IsX86 && (Name.startswith("sse2.pcmp") ||
2075                   Name.startswith("avx2.pcmp"))) {
2076       // "sse2.pcpmpeq." "sse2.pcmpgt." "avx2.pcmpeq." or "avx2.pcmpgt."
2077       bool CmpEq = Name[9] == 'e';
2078       Rep = Builder.CreateICmp(CmpEq ? ICmpInst::ICMP_EQ : ICmpInst::ICMP_SGT,
2079                                CI->getArgOperand(0), CI->getArgOperand(1));
2080       Rep = Builder.CreateSExt(Rep, CI->getType(), "");
2081     } else if (IsX86 && (Name.startswith("avx512.broadcastm"))) {
2082       Type *ExtTy = Type::getInt32Ty(C);
2083       if (CI->getOperand(0)->getType()->isIntegerTy(8))
2084         ExtTy = Type::getInt64Ty(C);
2085       unsigned NumElts = CI->getType()->getPrimitiveSizeInBits() /
2086                          ExtTy->getPrimitiveSizeInBits();
2087       Rep = Builder.CreateZExt(CI->getArgOperand(0), ExtTy);
2088       Rep = Builder.CreateVectorSplat(NumElts, Rep);
2089     } else if (IsX86 && (Name == "sse.sqrt.ss" ||
2090                          Name == "sse2.sqrt.sd")) {
2091       Value *Vec = CI->getArgOperand(0);
2092       Value *Elt0 = Builder.CreateExtractElement(Vec, (uint64_t)0);
2093       Function *Intr = Intrinsic::getDeclaration(F->getParent(),
2094                                                  Intrinsic::sqrt, Elt0->getType());
2095       Elt0 = Builder.CreateCall(Intr, Elt0);
2096       Rep = Builder.CreateInsertElement(Vec, Elt0, (uint64_t)0);
2097     } else if (IsX86 && (Name.startswith("avx.sqrt.p") ||
2098                          Name.startswith("sse2.sqrt.p") ||
2099                          Name.startswith("sse.sqrt.p"))) {
2100       Rep = Builder.CreateCall(Intrinsic::getDeclaration(F->getParent(),
2101                                                          Intrinsic::sqrt,
2102                                                          CI->getType()),
2103                                {CI->getArgOperand(0)});
2104     } else if (IsX86 && (Name.startswith("avx512.mask.sqrt.p"))) {
2105       if (CI->arg_size() == 4 &&
2106           (!isa<ConstantInt>(CI->getArgOperand(3)) ||
2107            cast<ConstantInt>(CI->getArgOperand(3))->getZExtValue() != 4)) {
2108         Intrinsic::ID IID = Name[18] == 's' ? Intrinsic::x86_avx512_sqrt_ps_512
2109                                             : Intrinsic::x86_avx512_sqrt_pd_512;
2110 
2111         Value *Args[] = { CI->getArgOperand(0), CI->getArgOperand(3) };
2112         Rep = Builder.CreateCall(Intrinsic::getDeclaration(CI->getModule(),
2113                                                            IID), Args);
2114       } else {
2115         Rep = Builder.CreateCall(Intrinsic::getDeclaration(F->getParent(),
2116                                                            Intrinsic::sqrt,
2117                                                            CI->getType()),
2118                                  {CI->getArgOperand(0)});
2119       }
2120       Rep = EmitX86Select(Builder, CI->getArgOperand(2), Rep,
2121                           CI->getArgOperand(1));
2122     } else if (IsX86 && (Name.startswith("avx512.ptestm") ||
2123                          Name.startswith("avx512.ptestnm"))) {
2124       Value *Op0 = CI->getArgOperand(0);
2125       Value *Op1 = CI->getArgOperand(1);
2126       Value *Mask = CI->getArgOperand(2);
2127       Rep = Builder.CreateAnd(Op0, Op1);
2128       llvm::Type *Ty = Op0->getType();
2129       Value *Zero = llvm::Constant::getNullValue(Ty);
2130       ICmpInst::Predicate Pred =
2131         Name.startswith("avx512.ptestm") ? ICmpInst::ICMP_NE : ICmpInst::ICMP_EQ;
2132       Rep = Builder.CreateICmp(Pred, Rep, Zero);
2133       Rep = ApplyX86MaskOn1BitsVec(Builder, Rep, Mask);
2134     } else if (IsX86 && (Name.startswith("avx512.mask.pbroadcast"))){
2135       unsigned NumElts = cast<FixedVectorType>(CI->getArgOperand(1)->getType())
2136                              ->getNumElements();
2137       Rep = Builder.CreateVectorSplat(NumElts, CI->getArgOperand(0));
2138       Rep = EmitX86Select(Builder, CI->getArgOperand(2), Rep,
2139                           CI->getArgOperand(1));
2140     } else if (IsX86 && (Name.startswith("avx512.kunpck"))) {
2141       unsigned NumElts = CI->getType()->getScalarSizeInBits();
2142       Value *LHS = getX86MaskVec(Builder, CI->getArgOperand(0), NumElts);
2143       Value *RHS = getX86MaskVec(Builder, CI->getArgOperand(1), NumElts);
2144       int Indices[64];
2145       for (unsigned i = 0; i != NumElts; ++i)
2146         Indices[i] = i;
2147 
2148       // First extract half of each vector. This gives better codegen than
2149       // doing it in a single shuffle.
2150       LHS = Builder.CreateShuffleVector(LHS, LHS,
2151                                         makeArrayRef(Indices, NumElts / 2));
2152       RHS = Builder.CreateShuffleVector(RHS, RHS,
2153                                         makeArrayRef(Indices, NumElts / 2));
2154       // Concat the vectors.
2155       // NOTE: Operands have to be swapped to match intrinsic definition.
2156       Rep = Builder.CreateShuffleVector(RHS, LHS,
2157                                         makeArrayRef(Indices, NumElts));
2158       Rep = Builder.CreateBitCast(Rep, CI->getType());
2159     } else if (IsX86 && Name == "avx512.kand.w") {
2160       Value *LHS = getX86MaskVec(Builder, CI->getArgOperand(0), 16);
2161       Value *RHS = getX86MaskVec(Builder, CI->getArgOperand(1), 16);
2162       Rep = Builder.CreateAnd(LHS, RHS);
2163       Rep = Builder.CreateBitCast(Rep, CI->getType());
2164     } else if (IsX86 && Name == "avx512.kandn.w") {
2165       Value *LHS = getX86MaskVec(Builder, CI->getArgOperand(0), 16);
2166       Value *RHS = getX86MaskVec(Builder, CI->getArgOperand(1), 16);
2167       LHS = Builder.CreateNot(LHS);
2168       Rep = Builder.CreateAnd(LHS, RHS);
2169       Rep = Builder.CreateBitCast(Rep, CI->getType());
2170     } else if (IsX86 && Name == "avx512.kor.w") {
2171       Value *LHS = getX86MaskVec(Builder, CI->getArgOperand(0), 16);
2172       Value *RHS = getX86MaskVec(Builder, CI->getArgOperand(1), 16);
2173       Rep = Builder.CreateOr(LHS, RHS);
2174       Rep = Builder.CreateBitCast(Rep, CI->getType());
2175     } else if (IsX86 && Name == "avx512.kxor.w") {
2176       Value *LHS = getX86MaskVec(Builder, CI->getArgOperand(0), 16);
2177       Value *RHS = getX86MaskVec(Builder, CI->getArgOperand(1), 16);
2178       Rep = Builder.CreateXor(LHS, RHS);
2179       Rep = Builder.CreateBitCast(Rep, CI->getType());
2180     } else if (IsX86 && Name == "avx512.kxnor.w") {
2181       Value *LHS = getX86MaskVec(Builder, CI->getArgOperand(0), 16);
2182       Value *RHS = getX86MaskVec(Builder, CI->getArgOperand(1), 16);
2183       LHS = Builder.CreateNot(LHS);
2184       Rep = Builder.CreateXor(LHS, RHS);
2185       Rep = Builder.CreateBitCast(Rep, CI->getType());
2186     } else if (IsX86 && Name == "avx512.knot.w") {
2187       Rep = getX86MaskVec(Builder, CI->getArgOperand(0), 16);
2188       Rep = Builder.CreateNot(Rep);
2189       Rep = Builder.CreateBitCast(Rep, CI->getType());
2190     } else if (IsX86 &&
2191                (Name == "avx512.kortestz.w" || Name == "avx512.kortestc.w")) {
2192       Value *LHS = getX86MaskVec(Builder, CI->getArgOperand(0), 16);
2193       Value *RHS = getX86MaskVec(Builder, CI->getArgOperand(1), 16);
2194       Rep = Builder.CreateOr(LHS, RHS);
2195       Rep = Builder.CreateBitCast(Rep, Builder.getInt16Ty());
2196       Value *C;
2197       if (Name[14] == 'c')
2198         C = ConstantInt::getAllOnesValue(Builder.getInt16Ty());
2199       else
2200         C = ConstantInt::getNullValue(Builder.getInt16Ty());
2201       Rep = Builder.CreateICmpEQ(Rep, C);
2202       Rep = Builder.CreateZExt(Rep, Builder.getInt32Ty());
2203     } else if (IsX86 && (Name == "sse.add.ss" || Name == "sse2.add.sd" ||
2204                          Name == "sse.sub.ss" || Name == "sse2.sub.sd" ||
2205                          Name == "sse.mul.ss" || Name == "sse2.mul.sd" ||
2206                          Name == "sse.div.ss" || Name == "sse2.div.sd")) {
2207       Type *I32Ty = Type::getInt32Ty(C);
2208       Value *Elt0 = Builder.CreateExtractElement(CI->getArgOperand(0),
2209                                                  ConstantInt::get(I32Ty, 0));
2210       Value *Elt1 = Builder.CreateExtractElement(CI->getArgOperand(1),
2211                                                  ConstantInt::get(I32Ty, 0));
2212       Value *EltOp;
2213       if (Name.contains(".add."))
2214         EltOp = Builder.CreateFAdd(Elt0, Elt1);
2215       else if (Name.contains(".sub."))
2216         EltOp = Builder.CreateFSub(Elt0, Elt1);
2217       else if (Name.contains(".mul."))
2218         EltOp = Builder.CreateFMul(Elt0, Elt1);
2219       else
2220         EltOp = Builder.CreateFDiv(Elt0, Elt1);
2221       Rep = Builder.CreateInsertElement(CI->getArgOperand(0), EltOp,
2222                                         ConstantInt::get(I32Ty, 0));
2223     } else if (IsX86 && Name.startswith("avx512.mask.pcmp")) {
2224       // "avx512.mask.pcmpeq." or "avx512.mask.pcmpgt."
2225       bool CmpEq = Name[16] == 'e';
2226       Rep = upgradeMaskedCompare(Builder, *CI, CmpEq ? 0 : 6, true);
2227     } else if (IsX86 && Name.startswith("avx512.mask.vpshufbitqmb.")) {
2228       Type *OpTy = CI->getArgOperand(0)->getType();
2229       unsigned VecWidth = OpTy->getPrimitiveSizeInBits();
2230       Intrinsic::ID IID;
2231       switch (VecWidth) {
2232       default: llvm_unreachable("Unexpected intrinsic");
2233       case 128: IID = Intrinsic::x86_avx512_vpshufbitqmb_128; break;
2234       case 256: IID = Intrinsic::x86_avx512_vpshufbitqmb_256; break;
2235       case 512: IID = Intrinsic::x86_avx512_vpshufbitqmb_512; break;
2236       }
2237 
2238       Rep = Builder.CreateCall(Intrinsic::getDeclaration(F->getParent(), IID),
2239                                { CI->getOperand(0), CI->getArgOperand(1) });
2240       Rep = ApplyX86MaskOn1BitsVec(Builder, Rep, CI->getArgOperand(2));
2241     } else if (IsX86 && Name.startswith("avx512.mask.fpclass.p")) {
2242       Type *OpTy = CI->getArgOperand(0)->getType();
2243       unsigned VecWidth = OpTy->getPrimitiveSizeInBits();
2244       unsigned EltWidth = OpTy->getScalarSizeInBits();
2245       Intrinsic::ID IID;
2246       if (VecWidth == 128 && EltWidth == 32)
2247         IID = Intrinsic::x86_avx512_fpclass_ps_128;
2248       else if (VecWidth == 256 && EltWidth == 32)
2249         IID = Intrinsic::x86_avx512_fpclass_ps_256;
2250       else if (VecWidth == 512 && EltWidth == 32)
2251         IID = Intrinsic::x86_avx512_fpclass_ps_512;
2252       else if (VecWidth == 128 && EltWidth == 64)
2253         IID = Intrinsic::x86_avx512_fpclass_pd_128;
2254       else if (VecWidth == 256 && EltWidth == 64)
2255         IID = Intrinsic::x86_avx512_fpclass_pd_256;
2256       else if (VecWidth == 512 && EltWidth == 64)
2257         IID = Intrinsic::x86_avx512_fpclass_pd_512;
2258       else
2259         llvm_unreachable("Unexpected intrinsic");
2260 
2261       Rep = Builder.CreateCall(Intrinsic::getDeclaration(F->getParent(), IID),
2262                                { CI->getOperand(0), CI->getArgOperand(1) });
2263       Rep = ApplyX86MaskOn1BitsVec(Builder, Rep, CI->getArgOperand(2));
2264     } else if (IsX86 && Name.startswith("avx512.cmp.p")) {
2265       SmallVector<Value *, 4> Args(CI->args());
2266       Type *OpTy = Args[0]->getType();
2267       unsigned VecWidth = OpTy->getPrimitiveSizeInBits();
2268       unsigned EltWidth = OpTy->getScalarSizeInBits();
2269       Intrinsic::ID IID;
2270       if (VecWidth == 128 && EltWidth == 32)
2271         IID = Intrinsic::x86_avx512_mask_cmp_ps_128;
2272       else if (VecWidth == 256 && EltWidth == 32)
2273         IID = Intrinsic::x86_avx512_mask_cmp_ps_256;
2274       else if (VecWidth == 512 && EltWidth == 32)
2275         IID = Intrinsic::x86_avx512_mask_cmp_ps_512;
2276       else if (VecWidth == 128 && EltWidth == 64)
2277         IID = Intrinsic::x86_avx512_mask_cmp_pd_128;
2278       else if (VecWidth == 256 && EltWidth == 64)
2279         IID = Intrinsic::x86_avx512_mask_cmp_pd_256;
2280       else if (VecWidth == 512 && EltWidth == 64)
2281         IID = Intrinsic::x86_avx512_mask_cmp_pd_512;
2282       else
2283         llvm_unreachable("Unexpected intrinsic");
2284 
2285       Value *Mask = Constant::getAllOnesValue(CI->getType());
2286       if (VecWidth == 512)
2287         std::swap(Mask, Args.back());
2288       Args.push_back(Mask);
2289 
2290       Rep = Builder.CreateCall(Intrinsic::getDeclaration(F->getParent(), IID),
2291                                Args);
2292     } else if (IsX86 && Name.startswith("avx512.mask.cmp.")) {
2293       // Integer compare intrinsics.
2294       unsigned Imm = cast<ConstantInt>(CI->getArgOperand(2))->getZExtValue();
2295       Rep = upgradeMaskedCompare(Builder, *CI, Imm, true);
2296     } else if (IsX86 && Name.startswith("avx512.mask.ucmp.")) {
2297       unsigned Imm = cast<ConstantInt>(CI->getArgOperand(2))->getZExtValue();
2298       Rep = upgradeMaskedCompare(Builder, *CI, Imm, false);
2299     } else if (IsX86 && (Name.startswith("avx512.cvtb2mask.") ||
2300                          Name.startswith("avx512.cvtw2mask.") ||
2301                          Name.startswith("avx512.cvtd2mask.") ||
2302                          Name.startswith("avx512.cvtq2mask."))) {
2303       Value *Op = CI->getArgOperand(0);
2304       Value *Zero = llvm::Constant::getNullValue(Op->getType());
2305       Rep = Builder.CreateICmp(ICmpInst::ICMP_SLT, Op, Zero);
2306       Rep = ApplyX86MaskOn1BitsVec(Builder, Rep, nullptr);
2307     } else if(IsX86 && (Name == "ssse3.pabs.b.128" ||
2308                         Name == "ssse3.pabs.w.128" ||
2309                         Name == "ssse3.pabs.d.128" ||
2310                         Name.startswith("avx2.pabs") ||
2311                         Name.startswith("avx512.mask.pabs"))) {
2312       Rep = upgradeAbs(Builder, *CI);
2313     } else if (IsX86 && (Name == "sse41.pmaxsb" ||
2314                          Name == "sse2.pmaxs.w" ||
2315                          Name == "sse41.pmaxsd" ||
2316                          Name.startswith("avx2.pmaxs") ||
2317                          Name.startswith("avx512.mask.pmaxs"))) {
2318       Rep = UpgradeX86BinaryIntrinsics(Builder, *CI, Intrinsic::smax);
2319     } else if (IsX86 && (Name == "sse2.pmaxu.b" ||
2320                          Name == "sse41.pmaxuw" ||
2321                          Name == "sse41.pmaxud" ||
2322                          Name.startswith("avx2.pmaxu") ||
2323                          Name.startswith("avx512.mask.pmaxu"))) {
2324       Rep = UpgradeX86BinaryIntrinsics(Builder, *CI, Intrinsic::umax);
2325     } else if (IsX86 && (Name == "sse41.pminsb" ||
2326                          Name == "sse2.pmins.w" ||
2327                          Name == "sse41.pminsd" ||
2328                          Name.startswith("avx2.pmins") ||
2329                          Name.startswith("avx512.mask.pmins"))) {
2330       Rep = UpgradeX86BinaryIntrinsics(Builder, *CI, Intrinsic::smin);
2331     } else if (IsX86 && (Name == "sse2.pminu.b" ||
2332                          Name == "sse41.pminuw" ||
2333                          Name == "sse41.pminud" ||
2334                          Name.startswith("avx2.pminu") ||
2335                          Name.startswith("avx512.mask.pminu"))) {
2336       Rep = UpgradeX86BinaryIntrinsics(Builder, *CI, Intrinsic::umin);
2337     } else if (IsX86 && (Name == "sse2.pmulu.dq" ||
2338                          Name == "avx2.pmulu.dq" ||
2339                          Name == "avx512.pmulu.dq.512" ||
2340                          Name.startswith("avx512.mask.pmulu.dq."))) {
2341       Rep = upgradePMULDQ(Builder, *CI, /*Signed*/false);
2342     } else if (IsX86 && (Name == "sse41.pmuldq" ||
2343                          Name == "avx2.pmul.dq" ||
2344                          Name == "avx512.pmul.dq.512" ||
2345                          Name.startswith("avx512.mask.pmul.dq."))) {
2346       Rep = upgradePMULDQ(Builder, *CI, /*Signed*/true);
2347     } else if (IsX86 && (Name == "sse.cvtsi2ss" ||
2348                          Name == "sse2.cvtsi2sd" ||
2349                          Name == "sse.cvtsi642ss" ||
2350                          Name == "sse2.cvtsi642sd")) {
2351       Rep = Builder.CreateSIToFP(
2352           CI->getArgOperand(1),
2353           cast<VectorType>(CI->getType())->getElementType());
2354       Rep = Builder.CreateInsertElement(CI->getArgOperand(0), Rep, (uint64_t)0);
2355     } else if (IsX86 && Name == "avx512.cvtusi2sd") {
2356       Rep = Builder.CreateUIToFP(
2357           CI->getArgOperand(1),
2358           cast<VectorType>(CI->getType())->getElementType());
2359       Rep = Builder.CreateInsertElement(CI->getArgOperand(0), Rep, (uint64_t)0);
2360     } else if (IsX86 && Name == "sse2.cvtss2sd") {
2361       Rep = Builder.CreateExtractElement(CI->getArgOperand(1), (uint64_t)0);
2362       Rep = Builder.CreateFPExt(
2363           Rep, cast<VectorType>(CI->getType())->getElementType());
2364       Rep = Builder.CreateInsertElement(CI->getArgOperand(0), Rep, (uint64_t)0);
2365     } else if (IsX86 && (Name == "sse2.cvtdq2pd" ||
2366                          Name == "sse2.cvtdq2ps" ||
2367                          Name == "avx.cvtdq2.pd.256" ||
2368                          Name == "avx.cvtdq2.ps.256" ||
2369                          Name.startswith("avx512.mask.cvtdq2pd.") ||
2370                          Name.startswith("avx512.mask.cvtudq2pd.") ||
2371                          Name.startswith("avx512.mask.cvtdq2ps.") ||
2372                          Name.startswith("avx512.mask.cvtudq2ps.") ||
2373                          Name.startswith("avx512.mask.cvtqq2pd.") ||
2374                          Name.startswith("avx512.mask.cvtuqq2pd.") ||
2375                          Name == "avx512.mask.cvtqq2ps.256" ||
2376                          Name == "avx512.mask.cvtqq2ps.512" ||
2377                          Name == "avx512.mask.cvtuqq2ps.256" ||
2378                          Name == "avx512.mask.cvtuqq2ps.512" ||
2379                          Name == "sse2.cvtps2pd" ||
2380                          Name == "avx.cvt.ps2.pd.256" ||
2381                          Name == "avx512.mask.cvtps2pd.128" ||
2382                          Name == "avx512.mask.cvtps2pd.256")) {
2383       auto *DstTy = cast<FixedVectorType>(CI->getType());
2384       Rep = CI->getArgOperand(0);
2385       auto *SrcTy = cast<FixedVectorType>(Rep->getType());
2386 
2387       unsigned NumDstElts = DstTy->getNumElements();
2388       if (NumDstElts < SrcTy->getNumElements()) {
2389         assert(NumDstElts == 2 && "Unexpected vector size");
2390         Rep = Builder.CreateShuffleVector(Rep, Rep, ArrayRef<int>{0, 1});
2391       }
2392 
2393       bool IsPS2PD = SrcTy->getElementType()->isFloatTy();
2394       bool IsUnsigned = (StringRef::npos != Name.find("cvtu"));
2395       if (IsPS2PD)
2396         Rep = Builder.CreateFPExt(Rep, DstTy, "cvtps2pd");
2397       else if (CI->arg_size() == 4 &&
2398                (!isa<ConstantInt>(CI->getArgOperand(3)) ||
2399                 cast<ConstantInt>(CI->getArgOperand(3))->getZExtValue() != 4)) {
2400         Intrinsic::ID IID = IsUnsigned ? Intrinsic::x86_avx512_uitofp_round
2401                                        : Intrinsic::x86_avx512_sitofp_round;
2402         Function *F = Intrinsic::getDeclaration(CI->getModule(), IID,
2403                                                 { DstTy, SrcTy });
2404         Rep = Builder.CreateCall(F, { Rep, CI->getArgOperand(3) });
2405       } else {
2406         Rep = IsUnsigned ? Builder.CreateUIToFP(Rep, DstTy, "cvt")
2407                          : Builder.CreateSIToFP(Rep, DstTy, "cvt");
2408       }
2409 
2410       if (CI->arg_size() >= 3)
2411         Rep = EmitX86Select(Builder, CI->getArgOperand(2), Rep,
2412                             CI->getArgOperand(1));
2413     } else if (IsX86 && (Name.startswith("avx512.mask.vcvtph2ps.") ||
2414                          Name.startswith("vcvtph2ps."))) {
2415       auto *DstTy = cast<FixedVectorType>(CI->getType());
2416       Rep = CI->getArgOperand(0);
2417       auto *SrcTy = cast<FixedVectorType>(Rep->getType());
2418       unsigned NumDstElts = DstTy->getNumElements();
2419       if (NumDstElts != SrcTy->getNumElements()) {
2420         assert(NumDstElts == 4 && "Unexpected vector size");
2421         Rep = Builder.CreateShuffleVector(Rep, Rep, ArrayRef<int>{0, 1, 2, 3});
2422       }
2423       Rep = Builder.CreateBitCast(
2424           Rep, FixedVectorType::get(Type::getHalfTy(C), NumDstElts));
2425       Rep = Builder.CreateFPExt(Rep, DstTy, "cvtph2ps");
2426       if (CI->arg_size() >= 3)
2427         Rep = EmitX86Select(Builder, CI->getArgOperand(2), Rep,
2428                             CI->getArgOperand(1));
2429     } else if (IsX86 && Name.startswith("avx512.mask.load")) {
2430       // "avx512.mask.loadu." or "avx512.mask.load."
2431       bool Aligned = Name[16] != 'u'; // "avx512.mask.loadu".
2432       Rep =
2433           UpgradeMaskedLoad(Builder, CI->getArgOperand(0), CI->getArgOperand(1),
2434                             CI->getArgOperand(2), Aligned);
2435     } else if (IsX86 && Name.startswith("avx512.mask.expand.load.")) {
2436       auto *ResultTy = cast<FixedVectorType>(CI->getType());
2437       Type *PtrTy = ResultTy->getElementType();
2438 
2439       // Cast the pointer to element type.
2440       Value *Ptr = Builder.CreateBitCast(CI->getOperand(0),
2441                                          llvm::PointerType::getUnqual(PtrTy));
2442 
2443       Value *MaskVec = getX86MaskVec(Builder, CI->getArgOperand(2),
2444                                      ResultTy->getNumElements());
2445 
2446       Function *ELd = Intrinsic::getDeclaration(F->getParent(),
2447                                                 Intrinsic::masked_expandload,
2448                                                 ResultTy);
2449       Rep = Builder.CreateCall(ELd, { Ptr, MaskVec, CI->getOperand(1) });
2450     } else if (IsX86 && Name.startswith("avx512.mask.compress.store.")) {
2451       auto *ResultTy = cast<VectorType>(CI->getArgOperand(1)->getType());
2452       Type *PtrTy = ResultTy->getElementType();
2453 
2454       // Cast the pointer to element type.
2455       Value *Ptr = Builder.CreateBitCast(CI->getOperand(0),
2456                                          llvm::PointerType::getUnqual(PtrTy));
2457 
2458       Value *MaskVec =
2459           getX86MaskVec(Builder, CI->getArgOperand(2),
2460                         cast<FixedVectorType>(ResultTy)->getNumElements());
2461 
2462       Function *CSt = Intrinsic::getDeclaration(F->getParent(),
2463                                                 Intrinsic::masked_compressstore,
2464                                                 ResultTy);
2465       Rep = Builder.CreateCall(CSt, { CI->getArgOperand(1), Ptr, MaskVec });
2466     } else if (IsX86 && (Name.startswith("avx512.mask.compress.") ||
2467                          Name.startswith("avx512.mask.expand."))) {
2468       auto *ResultTy = cast<FixedVectorType>(CI->getType());
2469 
2470       Value *MaskVec = getX86MaskVec(Builder, CI->getArgOperand(2),
2471                                      ResultTy->getNumElements());
2472 
2473       bool IsCompress = Name[12] == 'c';
2474       Intrinsic::ID IID = IsCompress ? Intrinsic::x86_avx512_mask_compress
2475                                      : Intrinsic::x86_avx512_mask_expand;
2476       Function *Intr = Intrinsic::getDeclaration(F->getParent(), IID, ResultTy);
2477       Rep = Builder.CreateCall(Intr, { CI->getOperand(0), CI->getOperand(1),
2478                                        MaskVec });
2479     } else if (IsX86 && Name.startswith("xop.vpcom")) {
2480       bool IsSigned;
2481       if (Name.endswith("ub") || Name.endswith("uw") || Name.endswith("ud") ||
2482           Name.endswith("uq"))
2483         IsSigned = false;
2484       else if (Name.endswith("b") || Name.endswith("w") || Name.endswith("d") ||
2485                Name.endswith("q"))
2486         IsSigned = true;
2487       else
2488         llvm_unreachable("Unknown suffix");
2489 
2490       unsigned Imm;
2491       if (CI->arg_size() == 3) {
2492         Imm = cast<ConstantInt>(CI->getArgOperand(2))->getZExtValue();
2493       } else {
2494         Name = Name.substr(9); // strip off "xop.vpcom"
2495         if (Name.startswith("lt"))
2496           Imm = 0;
2497         else if (Name.startswith("le"))
2498           Imm = 1;
2499         else if (Name.startswith("gt"))
2500           Imm = 2;
2501         else if (Name.startswith("ge"))
2502           Imm = 3;
2503         else if (Name.startswith("eq"))
2504           Imm = 4;
2505         else if (Name.startswith("ne"))
2506           Imm = 5;
2507         else if (Name.startswith("false"))
2508           Imm = 6;
2509         else if (Name.startswith("true"))
2510           Imm = 7;
2511         else
2512           llvm_unreachable("Unknown condition");
2513       }
2514 
2515       Rep = upgradeX86vpcom(Builder, *CI, Imm, IsSigned);
2516     } else if (IsX86 && Name.startswith("xop.vpcmov")) {
2517       Value *Sel = CI->getArgOperand(2);
2518       Value *NotSel = Builder.CreateNot(Sel);
2519       Value *Sel0 = Builder.CreateAnd(CI->getArgOperand(0), Sel);
2520       Value *Sel1 = Builder.CreateAnd(CI->getArgOperand(1), NotSel);
2521       Rep = Builder.CreateOr(Sel0, Sel1);
2522     } else if (IsX86 && (Name.startswith("xop.vprot") ||
2523                          Name.startswith("avx512.prol") ||
2524                          Name.startswith("avx512.mask.prol"))) {
2525       Rep = upgradeX86Rotate(Builder, *CI, false);
2526     } else if (IsX86 && (Name.startswith("avx512.pror") ||
2527                          Name.startswith("avx512.mask.pror"))) {
2528       Rep = upgradeX86Rotate(Builder, *CI, true);
2529     } else if (IsX86 && (Name.startswith("avx512.vpshld.") ||
2530                          Name.startswith("avx512.mask.vpshld") ||
2531                          Name.startswith("avx512.maskz.vpshld"))) {
2532       bool ZeroMask = Name[11] == 'z';
2533       Rep = upgradeX86ConcatShift(Builder, *CI, false, ZeroMask);
2534     } else if (IsX86 && (Name.startswith("avx512.vpshrd.") ||
2535                          Name.startswith("avx512.mask.vpshrd") ||
2536                          Name.startswith("avx512.maskz.vpshrd"))) {
2537       bool ZeroMask = Name[11] == 'z';
2538       Rep = upgradeX86ConcatShift(Builder, *CI, true, ZeroMask);
2539     } else if (IsX86 && Name == "sse42.crc32.64.8") {
2540       Function *CRC32 = Intrinsic::getDeclaration(F->getParent(),
2541                                                Intrinsic::x86_sse42_crc32_32_8);
2542       Value *Trunc0 = Builder.CreateTrunc(CI->getArgOperand(0), Type::getInt32Ty(C));
2543       Rep = Builder.CreateCall(CRC32, {Trunc0, CI->getArgOperand(1)});
2544       Rep = Builder.CreateZExt(Rep, CI->getType(), "");
2545     } else if (IsX86 && (Name.startswith("avx.vbroadcast.s") ||
2546                          Name.startswith("avx512.vbroadcast.s"))) {
2547       // Replace broadcasts with a series of insertelements.
2548       auto *VecTy = cast<FixedVectorType>(CI->getType());
2549       Type *EltTy = VecTy->getElementType();
2550       unsigned EltNum = VecTy->getNumElements();
2551       Value *Cast = Builder.CreateBitCast(CI->getArgOperand(0),
2552                                           EltTy->getPointerTo());
2553       Value *Load = Builder.CreateLoad(EltTy, Cast);
2554       Type *I32Ty = Type::getInt32Ty(C);
2555       Rep = PoisonValue::get(VecTy);
2556       for (unsigned I = 0; I < EltNum; ++I)
2557         Rep = Builder.CreateInsertElement(Rep, Load,
2558                                           ConstantInt::get(I32Ty, I));
2559     } else if (IsX86 && (Name.startswith("sse41.pmovsx") ||
2560                          Name.startswith("sse41.pmovzx") ||
2561                          Name.startswith("avx2.pmovsx") ||
2562                          Name.startswith("avx2.pmovzx") ||
2563                          Name.startswith("avx512.mask.pmovsx") ||
2564                          Name.startswith("avx512.mask.pmovzx"))) {
2565       auto *DstTy = cast<FixedVectorType>(CI->getType());
2566       unsigned NumDstElts = DstTy->getNumElements();
2567 
2568       // Extract a subvector of the first NumDstElts lanes and sign/zero extend.
2569       SmallVector<int, 8> ShuffleMask(NumDstElts);
2570       for (unsigned i = 0; i != NumDstElts; ++i)
2571         ShuffleMask[i] = i;
2572 
2573       Value *SV =
2574           Builder.CreateShuffleVector(CI->getArgOperand(0), ShuffleMask);
2575 
2576       bool DoSext = (StringRef::npos != Name.find("pmovsx"));
2577       Rep = DoSext ? Builder.CreateSExt(SV, DstTy)
2578                    : Builder.CreateZExt(SV, DstTy);
2579       // If there are 3 arguments, it's a masked intrinsic so we need a select.
2580       if (CI->arg_size() == 3)
2581         Rep = EmitX86Select(Builder, CI->getArgOperand(2), Rep,
2582                             CI->getArgOperand(1));
2583     } else if (Name == "avx512.mask.pmov.qd.256" ||
2584                Name == "avx512.mask.pmov.qd.512" ||
2585                Name == "avx512.mask.pmov.wb.256" ||
2586                Name == "avx512.mask.pmov.wb.512") {
2587       Type *Ty = CI->getArgOperand(1)->getType();
2588       Rep = Builder.CreateTrunc(CI->getArgOperand(0), Ty);
2589       Rep = EmitX86Select(Builder, CI->getArgOperand(2), Rep,
2590                           CI->getArgOperand(1));
2591     } else if (IsX86 && (Name.startswith("avx.vbroadcastf128") ||
2592                          Name == "avx2.vbroadcasti128")) {
2593       // Replace vbroadcastf128/vbroadcasti128 with a vector load+shuffle.
2594       Type *EltTy = cast<VectorType>(CI->getType())->getElementType();
2595       unsigned NumSrcElts = 128 / EltTy->getPrimitiveSizeInBits();
2596       auto *VT = FixedVectorType::get(EltTy, NumSrcElts);
2597       Value *Op = Builder.CreatePointerCast(CI->getArgOperand(0),
2598                                             PointerType::getUnqual(VT));
2599       Value *Load = Builder.CreateAlignedLoad(VT, Op, Align(1));
2600       if (NumSrcElts == 2)
2601         Rep = Builder.CreateShuffleVector(Load, ArrayRef<int>{0, 1, 0, 1});
2602       else
2603         Rep = Builder.CreateShuffleVector(
2604             Load, ArrayRef<int>{0, 1, 2, 3, 0, 1, 2, 3});
2605     } else if (IsX86 && (Name.startswith("avx512.mask.shuf.i") ||
2606                          Name.startswith("avx512.mask.shuf.f"))) {
2607       unsigned Imm = cast<ConstantInt>(CI->getArgOperand(2))->getZExtValue();
2608       Type *VT = CI->getType();
2609       unsigned NumLanes = VT->getPrimitiveSizeInBits() / 128;
2610       unsigned NumElementsInLane = 128 / VT->getScalarSizeInBits();
2611       unsigned ControlBitsMask = NumLanes - 1;
2612       unsigned NumControlBits = NumLanes / 2;
2613       SmallVector<int, 8> ShuffleMask(0);
2614 
2615       for (unsigned l = 0; l != NumLanes; ++l) {
2616         unsigned LaneMask = (Imm >> (l * NumControlBits)) & ControlBitsMask;
2617         // We actually need the other source.
2618         if (l >= NumLanes / 2)
2619           LaneMask += NumLanes;
2620         for (unsigned i = 0; i != NumElementsInLane; ++i)
2621           ShuffleMask.push_back(LaneMask * NumElementsInLane + i);
2622       }
2623       Rep = Builder.CreateShuffleVector(CI->getArgOperand(0),
2624                                         CI->getArgOperand(1), ShuffleMask);
2625       Rep = EmitX86Select(Builder, CI->getArgOperand(4), Rep,
2626                           CI->getArgOperand(3));
2627     }else if (IsX86 && (Name.startswith("avx512.mask.broadcastf") ||
2628                          Name.startswith("avx512.mask.broadcasti"))) {
2629       unsigned NumSrcElts =
2630           cast<FixedVectorType>(CI->getArgOperand(0)->getType())
2631               ->getNumElements();
2632       unsigned NumDstElts =
2633           cast<FixedVectorType>(CI->getType())->getNumElements();
2634 
2635       SmallVector<int, 8> ShuffleMask(NumDstElts);
2636       for (unsigned i = 0; i != NumDstElts; ++i)
2637         ShuffleMask[i] = i % NumSrcElts;
2638 
2639       Rep = Builder.CreateShuffleVector(CI->getArgOperand(0),
2640                                         CI->getArgOperand(0),
2641                                         ShuffleMask);
2642       Rep = EmitX86Select(Builder, CI->getArgOperand(2), Rep,
2643                           CI->getArgOperand(1));
2644     } else if (IsX86 && (Name.startswith("avx2.pbroadcast") ||
2645                          Name.startswith("avx2.vbroadcast") ||
2646                          Name.startswith("avx512.pbroadcast") ||
2647                          Name.startswith("avx512.mask.broadcast.s"))) {
2648       // Replace vp?broadcasts with a vector shuffle.
2649       Value *Op = CI->getArgOperand(0);
2650       ElementCount EC = cast<VectorType>(CI->getType())->getElementCount();
2651       Type *MaskTy = VectorType::get(Type::getInt32Ty(C), EC);
2652       SmallVector<int, 8> M;
2653       ShuffleVectorInst::getShuffleMask(Constant::getNullValue(MaskTy), M);
2654       Rep = Builder.CreateShuffleVector(Op, M);
2655 
2656       if (CI->arg_size() == 3)
2657         Rep = EmitX86Select(Builder, CI->getArgOperand(2), Rep,
2658                             CI->getArgOperand(1));
2659     } else if (IsX86 && (Name.startswith("sse2.padds.") ||
2660                          Name.startswith("avx2.padds.") ||
2661                          Name.startswith("avx512.padds.") ||
2662                          Name.startswith("avx512.mask.padds."))) {
2663       Rep = UpgradeX86BinaryIntrinsics(Builder, *CI, Intrinsic::sadd_sat);
2664     } else if (IsX86 && (Name.startswith("sse2.psubs.") ||
2665                          Name.startswith("avx2.psubs.") ||
2666                          Name.startswith("avx512.psubs.") ||
2667                          Name.startswith("avx512.mask.psubs."))) {
2668       Rep = UpgradeX86BinaryIntrinsics(Builder, *CI, Intrinsic::ssub_sat);
2669     } else if (IsX86 && (Name.startswith("sse2.paddus.") ||
2670                          Name.startswith("avx2.paddus.") ||
2671                          Name.startswith("avx512.mask.paddus."))) {
2672       Rep = UpgradeX86BinaryIntrinsics(Builder, *CI, Intrinsic::uadd_sat);
2673     } else if (IsX86 && (Name.startswith("sse2.psubus.") ||
2674                          Name.startswith("avx2.psubus.") ||
2675                          Name.startswith("avx512.mask.psubus."))) {
2676       Rep = UpgradeX86BinaryIntrinsics(Builder, *CI, Intrinsic::usub_sat);
2677     } else if (IsX86 && Name.startswith("avx512.mask.palignr.")) {
2678       Rep = UpgradeX86ALIGNIntrinsics(Builder, CI->getArgOperand(0),
2679                                       CI->getArgOperand(1),
2680                                       CI->getArgOperand(2),
2681                                       CI->getArgOperand(3),
2682                                       CI->getArgOperand(4),
2683                                       false);
2684     } else if (IsX86 && Name.startswith("avx512.mask.valign.")) {
2685       Rep = UpgradeX86ALIGNIntrinsics(Builder, CI->getArgOperand(0),
2686                                       CI->getArgOperand(1),
2687                                       CI->getArgOperand(2),
2688                                       CI->getArgOperand(3),
2689                                       CI->getArgOperand(4),
2690                                       true);
2691     } else if (IsX86 && (Name == "sse2.psll.dq" ||
2692                          Name == "avx2.psll.dq")) {
2693       // 128/256-bit shift left specified in bits.
2694       unsigned Shift = cast<ConstantInt>(CI->getArgOperand(1))->getZExtValue();
2695       Rep = UpgradeX86PSLLDQIntrinsics(Builder, CI->getArgOperand(0),
2696                                        Shift / 8); // Shift is in bits.
2697     } else if (IsX86 && (Name == "sse2.psrl.dq" ||
2698                          Name == "avx2.psrl.dq")) {
2699       // 128/256-bit shift right specified in bits.
2700       unsigned Shift = cast<ConstantInt>(CI->getArgOperand(1))->getZExtValue();
2701       Rep = UpgradeX86PSRLDQIntrinsics(Builder, CI->getArgOperand(0),
2702                                        Shift / 8); // Shift is in bits.
2703     } else if (IsX86 && (Name == "sse2.psll.dq.bs" ||
2704                          Name == "avx2.psll.dq.bs" ||
2705                          Name == "avx512.psll.dq.512")) {
2706       // 128/256/512-bit shift left specified in bytes.
2707       unsigned Shift = cast<ConstantInt>(CI->getArgOperand(1))->getZExtValue();
2708       Rep = UpgradeX86PSLLDQIntrinsics(Builder, CI->getArgOperand(0), Shift);
2709     } else if (IsX86 && (Name == "sse2.psrl.dq.bs" ||
2710                          Name == "avx2.psrl.dq.bs" ||
2711                          Name == "avx512.psrl.dq.512")) {
2712       // 128/256/512-bit shift right specified in bytes.
2713       unsigned Shift = cast<ConstantInt>(CI->getArgOperand(1))->getZExtValue();
2714       Rep = UpgradeX86PSRLDQIntrinsics(Builder, CI->getArgOperand(0), Shift);
2715     } else if (IsX86 && (Name == "sse41.pblendw" ||
2716                          Name.startswith("sse41.blendp") ||
2717                          Name.startswith("avx.blend.p") ||
2718                          Name == "avx2.pblendw" ||
2719                          Name.startswith("avx2.pblendd."))) {
2720       Value *Op0 = CI->getArgOperand(0);
2721       Value *Op1 = CI->getArgOperand(1);
2722       unsigned Imm = cast <ConstantInt>(CI->getArgOperand(2))->getZExtValue();
2723       auto *VecTy = cast<FixedVectorType>(CI->getType());
2724       unsigned NumElts = VecTy->getNumElements();
2725 
2726       SmallVector<int, 16> Idxs(NumElts);
2727       for (unsigned i = 0; i != NumElts; ++i)
2728         Idxs[i] = ((Imm >> (i%8)) & 1) ? i + NumElts : i;
2729 
2730       Rep = Builder.CreateShuffleVector(Op0, Op1, Idxs);
2731     } else if (IsX86 && (Name.startswith("avx.vinsertf128.") ||
2732                          Name == "avx2.vinserti128" ||
2733                          Name.startswith("avx512.mask.insert"))) {
2734       Value *Op0 = CI->getArgOperand(0);
2735       Value *Op1 = CI->getArgOperand(1);
2736       unsigned Imm = cast<ConstantInt>(CI->getArgOperand(2))->getZExtValue();
2737       unsigned DstNumElts =
2738           cast<FixedVectorType>(CI->getType())->getNumElements();
2739       unsigned SrcNumElts =
2740           cast<FixedVectorType>(Op1->getType())->getNumElements();
2741       unsigned Scale = DstNumElts / SrcNumElts;
2742 
2743       // Mask off the high bits of the immediate value; hardware ignores those.
2744       Imm = Imm % Scale;
2745 
2746       // Extend the second operand into a vector the size of the destination.
2747       SmallVector<int, 8> Idxs(DstNumElts);
2748       for (unsigned i = 0; i != SrcNumElts; ++i)
2749         Idxs[i] = i;
2750       for (unsigned i = SrcNumElts; i != DstNumElts; ++i)
2751         Idxs[i] = SrcNumElts;
2752       Rep = Builder.CreateShuffleVector(Op1, Idxs);
2753 
2754       // Insert the second operand into the first operand.
2755 
2756       // Note that there is no guarantee that instruction lowering will actually
2757       // produce a vinsertf128 instruction for the created shuffles. In
2758       // particular, the 0 immediate case involves no lane changes, so it can
2759       // be handled as a blend.
2760 
2761       // Example of shuffle mask for 32-bit elements:
2762       // Imm = 1  <i32 0, i32 1, i32 2,  i32 3,  i32 8, i32 9, i32 10, i32 11>
2763       // Imm = 0  <i32 8, i32 9, i32 10, i32 11, i32 4, i32 5, i32 6,  i32 7 >
2764 
2765       // First fill with identify mask.
2766       for (unsigned i = 0; i != DstNumElts; ++i)
2767         Idxs[i] = i;
2768       // Then replace the elements where we need to insert.
2769       for (unsigned i = 0; i != SrcNumElts; ++i)
2770         Idxs[i + Imm * SrcNumElts] = i + DstNumElts;
2771       Rep = Builder.CreateShuffleVector(Op0, Rep, Idxs);
2772 
2773       // If the intrinsic has a mask operand, handle that.
2774       if (CI->arg_size() == 5)
2775         Rep = EmitX86Select(Builder, CI->getArgOperand(4), Rep,
2776                             CI->getArgOperand(3));
2777     } else if (IsX86 && (Name.startswith("avx.vextractf128.") ||
2778                          Name == "avx2.vextracti128" ||
2779                          Name.startswith("avx512.mask.vextract"))) {
2780       Value *Op0 = CI->getArgOperand(0);
2781       unsigned Imm = cast<ConstantInt>(CI->getArgOperand(1))->getZExtValue();
2782       unsigned DstNumElts =
2783           cast<FixedVectorType>(CI->getType())->getNumElements();
2784       unsigned SrcNumElts =
2785           cast<FixedVectorType>(Op0->getType())->getNumElements();
2786       unsigned Scale = SrcNumElts / DstNumElts;
2787 
2788       // Mask off the high bits of the immediate value; hardware ignores those.
2789       Imm = Imm % Scale;
2790 
2791       // Get indexes for the subvector of the input vector.
2792       SmallVector<int, 8> Idxs(DstNumElts);
2793       for (unsigned i = 0; i != DstNumElts; ++i) {
2794         Idxs[i] = i + (Imm * DstNumElts);
2795       }
2796       Rep = Builder.CreateShuffleVector(Op0, Op0, Idxs);
2797 
2798       // If the intrinsic has a mask operand, handle that.
2799       if (CI->arg_size() == 4)
2800         Rep = EmitX86Select(Builder, CI->getArgOperand(3), Rep,
2801                             CI->getArgOperand(2));
2802     } else if (!IsX86 && Name == "stackprotectorcheck") {
2803       Rep = nullptr;
2804     } else if (IsX86 && (Name.startswith("avx512.mask.perm.df.") ||
2805                          Name.startswith("avx512.mask.perm.di."))) {
2806       Value *Op0 = CI->getArgOperand(0);
2807       unsigned Imm = cast<ConstantInt>(CI->getArgOperand(1))->getZExtValue();
2808       auto *VecTy = cast<FixedVectorType>(CI->getType());
2809       unsigned NumElts = VecTy->getNumElements();
2810 
2811       SmallVector<int, 8> Idxs(NumElts);
2812       for (unsigned i = 0; i != NumElts; ++i)
2813         Idxs[i] = (i & ~0x3) + ((Imm >> (2 * (i & 0x3))) & 3);
2814 
2815       Rep = Builder.CreateShuffleVector(Op0, Op0, Idxs);
2816 
2817       if (CI->arg_size() == 4)
2818         Rep = EmitX86Select(Builder, CI->getArgOperand(3), Rep,
2819                             CI->getArgOperand(2));
2820     } else if (IsX86 && (Name.startswith("avx.vperm2f128.") ||
2821                          Name == "avx2.vperm2i128")) {
2822       // The immediate permute control byte looks like this:
2823       //    [1:0] - select 128 bits from sources for low half of destination
2824       //    [2]   - ignore
2825       //    [3]   - zero low half of destination
2826       //    [5:4] - select 128 bits from sources for high half of destination
2827       //    [6]   - ignore
2828       //    [7]   - zero high half of destination
2829 
2830       uint8_t Imm = cast<ConstantInt>(CI->getArgOperand(2))->getZExtValue();
2831 
2832       unsigned NumElts = cast<FixedVectorType>(CI->getType())->getNumElements();
2833       unsigned HalfSize = NumElts / 2;
2834       SmallVector<int, 8> ShuffleMask(NumElts);
2835 
2836       // Determine which operand(s) are actually in use for this instruction.
2837       Value *V0 = (Imm & 0x02) ? CI->getArgOperand(1) : CI->getArgOperand(0);
2838       Value *V1 = (Imm & 0x20) ? CI->getArgOperand(1) : CI->getArgOperand(0);
2839 
2840       // If needed, replace operands based on zero mask.
2841       V0 = (Imm & 0x08) ? ConstantAggregateZero::get(CI->getType()) : V0;
2842       V1 = (Imm & 0x80) ? ConstantAggregateZero::get(CI->getType()) : V1;
2843 
2844       // Permute low half of result.
2845       unsigned StartIndex = (Imm & 0x01) ? HalfSize : 0;
2846       for (unsigned i = 0; i < HalfSize; ++i)
2847         ShuffleMask[i] = StartIndex + i;
2848 
2849       // Permute high half of result.
2850       StartIndex = (Imm & 0x10) ? HalfSize : 0;
2851       for (unsigned i = 0; i < HalfSize; ++i)
2852         ShuffleMask[i + HalfSize] = NumElts + StartIndex + i;
2853 
2854       Rep = Builder.CreateShuffleVector(V0, V1, ShuffleMask);
2855 
2856     } else if (IsX86 && (Name.startswith("avx.vpermil.") ||
2857                          Name == "sse2.pshuf.d" ||
2858                          Name.startswith("avx512.mask.vpermil.p") ||
2859                          Name.startswith("avx512.mask.pshuf.d."))) {
2860       Value *Op0 = CI->getArgOperand(0);
2861       unsigned Imm = cast<ConstantInt>(CI->getArgOperand(1))->getZExtValue();
2862       auto *VecTy = cast<FixedVectorType>(CI->getType());
2863       unsigned NumElts = VecTy->getNumElements();
2864       // Calculate the size of each index in the immediate.
2865       unsigned IdxSize = 64 / VecTy->getScalarSizeInBits();
2866       unsigned IdxMask = ((1 << IdxSize) - 1);
2867 
2868       SmallVector<int, 8> Idxs(NumElts);
2869       // Lookup the bits for this element, wrapping around the immediate every
2870       // 8-bits. Elements are grouped into sets of 2 or 4 elements so we need
2871       // to offset by the first index of each group.
2872       for (unsigned i = 0; i != NumElts; ++i)
2873         Idxs[i] = ((Imm >> ((i * IdxSize) % 8)) & IdxMask) | (i & ~IdxMask);
2874 
2875       Rep = Builder.CreateShuffleVector(Op0, Op0, Idxs);
2876 
2877       if (CI->arg_size() == 4)
2878         Rep = EmitX86Select(Builder, CI->getArgOperand(3), Rep,
2879                             CI->getArgOperand(2));
2880     } else if (IsX86 && (Name == "sse2.pshufl.w" ||
2881                          Name.startswith("avx512.mask.pshufl.w."))) {
2882       Value *Op0 = CI->getArgOperand(0);
2883       unsigned Imm = cast<ConstantInt>(CI->getArgOperand(1))->getZExtValue();
2884       unsigned NumElts = cast<FixedVectorType>(CI->getType())->getNumElements();
2885 
2886       SmallVector<int, 16> Idxs(NumElts);
2887       for (unsigned l = 0; l != NumElts; l += 8) {
2888         for (unsigned i = 0; i != 4; ++i)
2889           Idxs[i + l] = ((Imm >> (2 * i)) & 0x3) + l;
2890         for (unsigned i = 4; i != 8; ++i)
2891           Idxs[i + l] = i + l;
2892       }
2893 
2894       Rep = Builder.CreateShuffleVector(Op0, Op0, Idxs);
2895 
2896       if (CI->arg_size() == 4)
2897         Rep = EmitX86Select(Builder, CI->getArgOperand(3), Rep,
2898                             CI->getArgOperand(2));
2899     } else if (IsX86 && (Name == "sse2.pshufh.w" ||
2900                          Name.startswith("avx512.mask.pshufh.w."))) {
2901       Value *Op0 = CI->getArgOperand(0);
2902       unsigned Imm = cast<ConstantInt>(CI->getArgOperand(1))->getZExtValue();
2903       unsigned NumElts = cast<FixedVectorType>(CI->getType())->getNumElements();
2904 
2905       SmallVector<int, 16> Idxs(NumElts);
2906       for (unsigned l = 0; l != NumElts; l += 8) {
2907         for (unsigned i = 0; i != 4; ++i)
2908           Idxs[i + l] = i + l;
2909         for (unsigned i = 0; i != 4; ++i)
2910           Idxs[i + l + 4] = ((Imm >> (2 * i)) & 0x3) + 4 + l;
2911       }
2912 
2913       Rep = Builder.CreateShuffleVector(Op0, Op0, Idxs);
2914 
2915       if (CI->arg_size() == 4)
2916         Rep = EmitX86Select(Builder, CI->getArgOperand(3), Rep,
2917                             CI->getArgOperand(2));
2918     } else if (IsX86 && Name.startswith("avx512.mask.shuf.p")) {
2919       Value *Op0 = CI->getArgOperand(0);
2920       Value *Op1 = CI->getArgOperand(1);
2921       unsigned Imm = cast<ConstantInt>(CI->getArgOperand(2))->getZExtValue();
2922       unsigned NumElts = cast<FixedVectorType>(CI->getType())->getNumElements();
2923 
2924       unsigned NumLaneElts = 128/CI->getType()->getScalarSizeInBits();
2925       unsigned HalfLaneElts = NumLaneElts / 2;
2926 
2927       SmallVector<int, 16> Idxs(NumElts);
2928       for (unsigned i = 0; i != NumElts; ++i) {
2929         // Base index is the starting element of the lane.
2930         Idxs[i] = i - (i % NumLaneElts);
2931         // If we are half way through the lane switch to the other source.
2932         if ((i % NumLaneElts) >= HalfLaneElts)
2933           Idxs[i] += NumElts;
2934         // Now select the specific element. By adding HalfLaneElts bits from
2935         // the immediate. Wrapping around the immediate every 8-bits.
2936         Idxs[i] += (Imm >> ((i * HalfLaneElts) % 8)) & ((1 << HalfLaneElts) - 1);
2937       }
2938 
2939       Rep = Builder.CreateShuffleVector(Op0, Op1, Idxs);
2940 
2941       Rep = EmitX86Select(Builder, CI->getArgOperand(4), Rep,
2942                           CI->getArgOperand(3));
2943     } else if (IsX86 && (Name.startswith("avx512.mask.movddup") ||
2944                          Name.startswith("avx512.mask.movshdup") ||
2945                          Name.startswith("avx512.mask.movsldup"))) {
2946       Value *Op0 = CI->getArgOperand(0);
2947       unsigned NumElts = cast<FixedVectorType>(CI->getType())->getNumElements();
2948       unsigned NumLaneElts = 128/CI->getType()->getScalarSizeInBits();
2949 
2950       unsigned Offset = 0;
2951       if (Name.startswith("avx512.mask.movshdup."))
2952         Offset = 1;
2953 
2954       SmallVector<int, 16> Idxs(NumElts);
2955       for (unsigned l = 0; l != NumElts; l += NumLaneElts)
2956         for (unsigned i = 0; i != NumLaneElts; i += 2) {
2957           Idxs[i + l + 0] = i + l + Offset;
2958           Idxs[i + l + 1] = i + l + Offset;
2959         }
2960 
2961       Rep = Builder.CreateShuffleVector(Op0, Op0, Idxs);
2962 
2963       Rep = EmitX86Select(Builder, CI->getArgOperand(2), Rep,
2964                           CI->getArgOperand(1));
2965     } else if (IsX86 && (Name.startswith("avx512.mask.punpckl") ||
2966                          Name.startswith("avx512.mask.unpckl."))) {
2967       Value *Op0 = CI->getArgOperand(0);
2968       Value *Op1 = CI->getArgOperand(1);
2969       int NumElts = cast<FixedVectorType>(CI->getType())->getNumElements();
2970       int NumLaneElts = 128/CI->getType()->getScalarSizeInBits();
2971 
2972       SmallVector<int, 64> Idxs(NumElts);
2973       for (int l = 0; l != NumElts; l += NumLaneElts)
2974         for (int i = 0; i != NumLaneElts; ++i)
2975           Idxs[i + l] = l + (i / 2) + NumElts * (i % 2);
2976 
2977       Rep = Builder.CreateShuffleVector(Op0, Op1, Idxs);
2978 
2979       Rep = EmitX86Select(Builder, CI->getArgOperand(3), Rep,
2980                           CI->getArgOperand(2));
2981     } else if (IsX86 && (Name.startswith("avx512.mask.punpckh") ||
2982                          Name.startswith("avx512.mask.unpckh."))) {
2983       Value *Op0 = CI->getArgOperand(0);
2984       Value *Op1 = CI->getArgOperand(1);
2985       int NumElts = cast<FixedVectorType>(CI->getType())->getNumElements();
2986       int NumLaneElts = 128/CI->getType()->getScalarSizeInBits();
2987 
2988       SmallVector<int, 64> Idxs(NumElts);
2989       for (int l = 0; l != NumElts; l += NumLaneElts)
2990         for (int i = 0; i != NumLaneElts; ++i)
2991           Idxs[i + l] = (NumLaneElts / 2) + l + (i / 2) + NumElts * (i % 2);
2992 
2993       Rep = Builder.CreateShuffleVector(Op0, Op1, Idxs);
2994 
2995       Rep = EmitX86Select(Builder, CI->getArgOperand(3), Rep,
2996                           CI->getArgOperand(2));
2997     } else if (IsX86 && (Name.startswith("avx512.mask.and.") ||
2998                          Name.startswith("avx512.mask.pand."))) {
2999       VectorType *FTy = cast<VectorType>(CI->getType());
3000       VectorType *ITy = VectorType::getInteger(FTy);
3001       Rep = Builder.CreateAnd(Builder.CreateBitCast(CI->getArgOperand(0), ITy),
3002                               Builder.CreateBitCast(CI->getArgOperand(1), ITy));
3003       Rep = Builder.CreateBitCast(Rep, FTy);
3004       Rep = EmitX86Select(Builder, CI->getArgOperand(3), Rep,
3005                           CI->getArgOperand(2));
3006     } else if (IsX86 && (Name.startswith("avx512.mask.andn.") ||
3007                          Name.startswith("avx512.mask.pandn."))) {
3008       VectorType *FTy = cast<VectorType>(CI->getType());
3009       VectorType *ITy = VectorType::getInteger(FTy);
3010       Rep = Builder.CreateNot(Builder.CreateBitCast(CI->getArgOperand(0), ITy));
3011       Rep = Builder.CreateAnd(Rep,
3012                               Builder.CreateBitCast(CI->getArgOperand(1), ITy));
3013       Rep = Builder.CreateBitCast(Rep, FTy);
3014       Rep = EmitX86Select(Builder, CI->getArgOperand(3), Rep,
3015                           CI->getArgOperand(2));
3016     } else if (IsX86 && (Name.startswith("avx512.mask.or.") ||
3017                          Name.startswith("avx512.mask.por."))) {
3018       VectorType *FTy = cast<VectorType>(CI->getType());
3019       VectorType *ITy = VectorType::getInteger(FTy);
3020       Rep = Builder.CreateOr(Builder.CreateBitCast(CI->getArgOperand(0), ITy),
3021                              Builder.CreateBitCast(CI->getArgOperand(1), ITy));
3022       Rep = Builder.CreateBitCast(Rep, FTy);
3023       Rep = EmitX86Select(Builder, CI->getArgOperand(3), Rep,
3024                           CI->getArgOperand(2));
3025     } else if (IsX86 && (Name.startswith("avx512.mask.xor.") ||
3026                          Name.startswith("avx512.mask.pxor."))) {
3027       VectorType *FTy = cast<VectorType>(CI->getType());
3028       VectorType *ITy = VectorType::getInteger(FTy);
3029       Rep = Builder.CreateXor(Builder.CreateBitCast(CI->getArgOperand(0), ITy),
3030                               Builder.CreateBitCast(CI->getArgOperand(1), ITy));
3031       Rep = Builder.CreateBitCast(Rep, FTy);
3032       Rep = EmitX86Select(Builder, CI->getArgOperand(3), Rep,
3033                           CI->getArgOperand(2));
3034     } else if (IsX86 && Name.startswith("avx512.mask.padd.")) {
3035       Rep = Builder.CreateAdd(CI->getArgOperand(0), CI->getArgOperand(1));
3036       Rep = EmitX86Select(Builder, CI->getArgOperand(3), Rep,
3037                           CI->getArgOperand(2));
3038     } else if (IsX86 && Name.startswith("avx512.mask.psub.")) {
3039       Rep = Builder.CreateSub(CI->getArgOperand(0), CI->getArgOperand(1));
3040       Rep = EmitX86Select(Builder, CI->getArgOperand(3), Rep,
3041                           CI->getArgOperand(2));
3042     } else if (IsX86 && Name.startswith("avx512.mask.pmull.")) {
3043       Rep = Builder.CreateMul(CI->getArgOperand(0), CI->getArgOperand(1));
3044       Rep = EmitX86Select(Builder, CI->getArgOperand(3), Rep,
3045                           CI->getArgOperand(2));
3046     } else if (IsX86 && Name.startswith("avx512.mask.add.p")) {
3047       if (Name.endswith(".512")) {
3048         Intrinsic::ID IID;
3049         if (Name[17] == 's')
3050           IID = Intrinsic::x86_avx512_add_ps_512;
3051         else
3052           IID = Intrinsic::x86_avx512_add_pd_512;
3053 
3054         Rep = Builder.CreateCall(Intrinsic::getDeclaration(F->getParent(), IID),
3055                                  { CI->getArgOperand(0), CI->getArgOperand(1),
3056                                    CI->getArgOperand(4) });
3057       } else {
3058         Rep = Builder.CreateFAdd(CI->getArgOperand(0), CI->getArgOperand(1));
3059       }
3060       Rep = EmitX86Select(Builder, CI->getArgOperand(3), Rep,
3061                           CI->getArgOperand(2));
3062     } else if (IsX86 && Name.startswith("avx512.mask.div.p")) {
3063       if (Name.endswith(".512")) {
3064         Intrinsic::ID IID;
3065         if (Name[17] == 's')
3066           IID = Intrinsic::x86_avx512_div_ps_512;
3067         else
3068           IID = Intrinsic::x86_avx512_div_pd_512;
3069 
3070         Rep = Builder.CreateCall(Intrinsic::getDeclaration(F->getParent(), IID),
3071                                  { CI->getArgOperand(0), CI->getArgOperand(1),
3072                                    CI->getArgOperand(4) });
3073       } else {
3074         Rep = Builder.CreateFDiv(CI->getArgOperand(0), CI->getArgOperand(1));
3075       }
3076       Rep = EmitX86Select(Builder, CI->getArgOperand(3), Rep,
3077                           CI->getArgOperand(2));
3078     } else if (IsX86 && Name.startswith("avx512.mask.mul.p")) {
3079       if (Name.endswith(".512")) {
3080         Intrinsic::ID IID;
3081         if (Name[17] == 's')
3082           IID = Intrinsic::x86_avx512_mul_ps_512;
3083         else
3084           IID = Intrinsic::x86_avx512_mul_pd_512;
3085 
3086         Rep = Builder.CreateCall(Intrinsic::getDeclaration(F->getParent(), IID),
3087                                  { CI->getArgOperand(0), CI->getArgOperand(1),
3088                                    CI->getArgOperand(4) });
3089       } else {
3090         Rep = Builder.CreateFMul(CI->getArgOperand(0), CI->getArgOperand(1));
3091       }
3092       Rep = EmitX86Select(Builder, CI->getArgOperand(3), Rep,
3093                           CI->getArgOperand(2));
3094     } else if (IsX86 && Name.startswith("avx512.mask.sub.p")) {
3095       if (Name.endswith(".512")) {
3096         Intrinsic::ID IID;
3097         if (Name[17] == 's')
3098           IID = Intrinsic::x86_avx512_sub_ps_512;
3099         else
3100           IID = Intrinsic::x86_avx512_sub_pd_512;
3101 
3102         Rep = Builder.CreateCall(Intrinsic::getDeclaration(F->getParent(), IID),
3103                                  { CI->getArgOperand(0), CI->getArgOperand(1),
3104                                    CI->getArgOperand(4) });
3105       } else {
3106         Rep = Builder.CreateFSub(CI->getArgOperand(0), CI->getArgOperand(1));
3107       }
3108       Rep = EmitX86Select(Builder, CI->getArgOperand(3), Rep,
3109                           CI->getArgOperand(2));
3110     } else if (IsX86 && (Name.startswith("avx512.mask.max.p") ||
3111                          Name.startswith("avx512.mask.min.p")) &&
3112                Name.drop_front(18) == ".512") {
3113       bool IsDouble = Name[17] == 'd';
3114       bool IsMin = Name[13] == 'i';
3115       static const Intrinsic::ID MinMaxTbl[2][2] = {
3116         { Intrinsic::x86_avx512_max_ps_512, Intrinsic::x86_avx512_max_pd_512 },
3117         { Intrinsic::x86_avx512_min_ps_512, Intrinsic::x86_avx512_min_pd_512 }
3118       };
3119       Intrinsic::ID IID = MinMaxTbl[IsMin][IsDouble];
3120 
3121       Rep = Builder.CreateCall(Intrinsic::getDeclaration(F->getParent(), IID),
3122                                { CI->getArgOperand(0), CI->getArgOperand(1),
3123                                  CI->getArgOperand(4) });
3124       Rep = EmitX86Select(Builder, CI->getArgOperand(3), Rep,
3125                           CI->getArgOperand(2));
3126     } else if (IsX86 && Name.startswith("avx512.mask.lzcnt.")) {
3127       Rep = Builder.CreateCall(Intrinsic::getDeclaration(F->getParent(),
3128                                                          Intrinsic::ctlz,
3129                                                          CI->getType()),
3130                                { CI->getArgOperand(0), Builder.getInt1(false) });
3131       Rep = EmitX86Select(Builder, CI->getArgOperand(2), Rep,
3132                           CI->getArgOperand(1));
3133     } else if (IsX86 && Name.startswith("avx512.mask.psll")) {
3134       bool IsImmediate = Name[16] == 'i' ||
3135                          (Name.size() > 18 && Name[18] == 'i');
3136       bool IsVariable = Name[16] == 'v';
3137       char Size = Name[16] == '.' ? Name[17] :
3138                   Name[17] == '.' ? Name[18] :
3139                   Name[18] == '.' ? Name[19] :
3140                                     Name[20];
3141 
3142       Intrinsic::ID IID;
3143       if (IsVariable && Name[17] != '.') {
3144         if (Size == 'd' && Name[17] == '2') // avx512.mask.psllv2.di
3145           IID = Intrinsic::x86_avx2_psllv_q;
3146         else if (Size == 'd' && Name[17] == '4') // avx512.mask.psllv4.di
3147           IID = Intrinsic::x86_avx2_psllv_q_256;
3148         else if (Size == 's' && Name[17] == '4') // avx512.mask.psllv4.si
3149           IID = Intrinsic::x86_avx2_psllv_d;
3150         else if (Size == 's' && Name[17] == '8') // avx512.mask.psllv8.si
3151           IID = Intrinsic::x86_avx2_psllv_d_256;
3152         else if (Size == 'h' && Name[17] == '8') // avx512.mask.psllv8.hi
3153           IID = Intrinsic::x86_avx512_psllv_w_128;
3154         else if (Size == 'h' && Name[17] == '1') // avx512.mask.psllv16.hi
3155           IID = Intrinsic::x86_avx512_psllv_w_256;
3156         else if (Name[17] == '3' && Name[18] == '2') // avx512.mask.psllv32hi
3157           IID = Intrinsic::x86_avx512_psllv_w_512;
3158         else
3159           llvm_unreachable("Unexpected size");
3160       } else if (Name.endswith(".128")) {
3161         if (Size == 'd') // avx512.mask.psll.d.128, avx512.mask.psll.di.128
3162           IID = IsImmediate ? Intrinsic::x86_sse2_pslli_d
3163                             : Intrinsic::x86_sse2_psll_d;
3164         else if (Size == 'q') // avx512.mask.psll.q.128, avx512.mask.psll.qi.128
3165           IID = IsImmediate ? Intrinsic::x86_sse2_pslli_q
3166                             : Intrinsic::x86_sse2_psll_q;
3167         else if (Size == 'w') // avx512.mask.psll.w.128, avx512.mask.psll.wi.128
3168           IID = IsImmediate ? Intrinsic::x86_sse2_pslli_w
3169                             : Intrinsic::x86_sse2_psll_w;
3170         else
3171           llvm_unreachable("Unexpected size");
3172       } else if (Name.endswith(".256")) {
3173         if (Size == 'd') // avx512.mask.psll.d.256, avx512.mask.psll.di.256
3174           IID = IsImmediate ? Intrinsic::x86_avx2_pslli_d
3175                             : Intrinsic::x86_avx2_psll_d;
3176         else if (Size == 'q') // avx512.mask.psll.q.256, avx512.mask.psll.qi.256
3177           IID = IsImmediate ? Intrinsic::x86_avx2_pslli_q
3178                             : Intrinsic::x86_avx2_psll_q;
3179         else if (Size == 'w') // avx512.mask.psll.w.256, avx512.mask.psll.wi.256
3180           IID = IsImmediate ? Intrinsic::x86_avx2_pslli_w
3181                             : Intrinsic::x86_avx2_psll_w;
3182         else
3183           llvm_unreachable("Unexpected size");
3184       } else {
3185         if (Size == 'd') // psll.di.512, pslli.d, psll.d, psllv.d.512
3186           IID = IsImmediate ? Intrinsic::x86_avx512_pslli_d_512 :
3187                 IsVariable  ? Intrinsic::x86_avx512_psllv_d_512 :
3188                               Intrinsic::x86_avx512_psll_d_512;
3189         else if (Size == 'q') // psll.qi.512, pslli.q, psll.q, psllv.q.512
3190           IID = IsImmediate ? Intrinsic::x86_avx512_pslli_q_512 :
3191                 IsVariable  ? Intrinsic::x86_avx512_psllv_q_512 :
3192                               Intrinsic::x86_avx512_psll_q_512;
3193         else if (Size == 'w') // psll.wi.512, pslli.w, psll.w
3194           IID = IsImmediate ? Intrinsic::x86_avx512_pslli_w_512
3195                             : Intrinsic::x86_avx512_psll_w_512;
3196         else
3197           llvm_unreachable("Unexpected size");
3198       }
3199 
3200       Rep = UpgradeX86MaskedShift(Builder, *CI, IID);
3201     } else if (IsX86 && Name.startswith("avx512.mask.psrl")) {
3202       bool IsImmediate = Name[16] == 'i' ||
3203                          (Name.size() > 18 && Name[18] == 'i');
3204       bool IsVariable = Name[16] == 'v';
3205       char Size = Name[16] == '.' ? Name[17] :
3206                   Name[17] == '.' ? Name[18] :
3207                   Name[18] == '.' ? Name[19] :
3208                                     Name[20];
3209 
3210       Intrinsic::ID IID;
3211       if (IsVariable && Name[17] != '.') {
3212         if (Size == 'd' && Name[17] == '2') // avx512.mask.psrlv2.di
3213           IID = Intrinsic::x86_avx2_psrlv_q;
3214         else if (Size == 'd' && Name[17] == '4') // avx512.mask.psrlv4.di
3215           IID = Intrinsic::x86_avx2_psrlv_q_256;
3216         else if (Size == 's' && Name[17] == '4') // avx512.mask.psrlv4.si
3217           IID = Intrinsic::x86_avx2_psrlv_d;
3218         else if (Size == 's' && Name[17] == '8') // avx512.mask.psrlv8.si
3219           IID = Intrinsic::x86_avx2_psrlv_d_256;
3220         else if (Size == 'h' && Name[17] == '8') // avx512.mask.psrlv8.hi
3221           IID = Intrinsic::x86_avx512_psrlv_w_128;
3222         else if (Size == 'h' && Name[17] == '1') // avx512.mask.psrlv16.hi
3223           IID = Intrinsic::x86_avx512_psrlv_w_256;
3224         else if (Name[17] == '3' && Name[18] == '2') // avx512.mask.psrlv32hi
3225           IID = Intrinsic::x86_avx512_psrlv_w_512;
3226         else
3227           llvm_unreachable("Unexpected size");
3228       } else if (Name.endswith(".128")) {
3229         if (Size == 'd') // avx512.mask.psrl.d.128, avx512.mask.psrl.di.128
3230           IID = IsImmediate ? Intrinsic::x86_sse2_psrli_d
3231                             : Intrinsic::x86_sse2_psrl_d;
3232         else if (Size == 'q') // avx512.mask.psrl.q.128, avx512.mask.psrl.qi.128
3233           IID = IsImmediate ? Intrinsic::x86_sse2_psrli_q
3234                             : Intrinsic::x86_sse2_psrl_q;
3235         else if (Size == 'w') // avx512.mask.psrl.w.128, avx512.mask.psrl.wi.128
3236           IID = IsImmediate ? Intrinsic::x86_sse2_psrli_w
3237                             : Intrinsic::x86_sse2_psrl_w;
3238         else
3239           llvm_unreachable("Unexpected size");
3240       } else if (Name.endswith(".256")) {
3241         if (Size == 'd') // avx512.mask.psrl.d.256, avx512.mask.psrl.di.256
3242           IID = IsImmediate ? Intrinsic::x86_avx2_psrli_d
3243                             : Intrinsic::x86_avx2_psrl_d;
3244         else if (Size == 'q') // avx512.mask.psrl.q.256, avx512.mask.psrl.qi.256
3245           IID = IsImmediate ? Intrinsic::x86_avx2_psrli_q
3246                             : Intrinsic::x86_avx2_psrl_q;
3247         else if (Size == 'w') // avx512.mask.psrl.w.256, avx512.mask.psrl.wi.256
3248           IID = IsImmediate ? Intrinsic::x86_avx2_psrli_w
3249                             : Intrinsic::x86_avx2_psrl_w;
3250         else
3251           llvm_unreachable("Unexpected size");
3252       } else {
3253         if (Size == 'd') // psrl.di.512, psrli.d, psrl.d, psrl.d.512
3254           IID = IsImmediate ? Intrinsic::x86_avx512_psrli_d_512 :
3255                 IsVariable  ? Intrinsic::x86_avx512_psrlv_d_512 :
3256                               Intrinsic::x86_avx512_psrl_d_512;
3257         else if (Size == 'q') // psrl.qi.512, psrli.q, psrl.q, psrl.q.512
3258           IID = IsImmediate ? Intrinsic::x86_avx512_psrli_q_512 :
3259                 IsVariable  ? Intrinsic::x86_avx512_psrlv_q_512 :
3260                               Intrinsic::x86_avx512_psrl_q_512;
3261         else if (Size == 'w') // psrl.wi.512, psrli.w, psrl.w)
3262           IID = IsImmediate ? Intrinsic::x86_avx512_psrli_w_512
3263                             : Intrinsic::x86_avx512_psrl_w_512;
3264         else
3265           llvm_unreachable("Unexpected size");
3266       }
3267 
3268       Rep = UpgradeX86MaskedShift(Builder, *CI, IID);
3269     } else if (IsX86 && Name.startswith("avx512.mask.psra")) {
3270       bool IsImmediate = Name[16] == 'i' ||
3271                          (Name.size() > 18 && Name[18] == 'i');
3272       bool IsVariable = Name[16] == 'v';
3273       char Size = Name[16] == '.' ? Name[17] :
3274                   Name[17] == '.' ? Name[18] :
3275                   Name[18] == '.' ? Name[19] :
3276                                     Name[20];
3277 
3278       Intrinsic::ID IID;
3279       if (IsVariable && Name[17] != '.') {
3280         if (Size == 's' && Name[17] == '4') // avx512.mask.psrav4.si
3281           IID = Intrinsic::x86_avx2_psrav_d;
3282         else if (Size == 's' && Name[17] == '8') // avx512.mask.psrav8.si
3283           IID = Intrinsic::x86_avx2_psrav_d_256;
3284         else if (Size == 'h' && Name[17] == '8') // avx512.mask.psrav8.hi
3285           IID = Intrinsic::x86_avx512_psrav_w_128;
3286         else if (Size == 'h' && Name[17] == '1') // avx512.mask.psrav16.hi
3287           IID = Intrinsic::x86_avx512_psrav_w_256;
3288         else if (Name[17] == '3' && Name[18] == '2') // avx512.mask.psrav32hi
3289           IID = Intrinsic::x86_avx512_psrav_w_512;
3290         else
3291           llvm_unreachable("Unexpected size");
3292       } else if (Name.endswith(".128")) {
3293         if (Size == 'd') // avx512.mask.psra.d.128, avx512.mask.psra.di.128
3294           IID = IsImmediate ? Intrinsic::x86_sse2_psrai_d
3295                             : Intrinsic::x86_sse2_psra_d;
3296         else if (Size == 'q') // avx512.mask.psra.q.128, avx512.mask.psra.qi.128
3297           IID = IsImmediate ? Intrinsic::x86_avx512_psrai_q_128 :
3298                 IsVariable  ? Intrinsic::x86_avx512_psrav_q_128 :
3299                               Intrinsic::x86_avx512_psra_q_128;
3300         else if (Size == 'w') // avx512.mask.psra.w.128, avx512.mask.psra.wi.128
3301           IID = IsImmediate ? Intrinsic::x86_sse2_psrai_w
3302                             : Intrinsic::x86_sse2_psra_w;
3303         else
3304           llvm_unreachable("Unexpected size");
3305       } else if (Name.endswith(".256")) {
3306         if (Size == 'd') // avx512.mask.psra.d.256, avx512.mask.psra.di.256
3307           IID = IsImmediate ? Intrinsic::x86_avx2_psrai_d
3308                             : Intrinsic::x86_avx2_psra_d;
3309         else if (Size == 'q') // avx512.mask.psra.q.256, avx512.mask.psra.qi.256
3310           IID = IsImmediate ? Intrinsic::x86_avx512_psrai_q_256 :
3311                 IsVariable  ? Intrinsic::x86_avx512_psrav_q_256 :
3312                               Intrinsic::x86_avx512_psra_q_256;
3313         else if (Size == 'w') // avx512.mask.psra.w.256, avx512.mask.psra.wi.256
3314           IID = IsImmediate ? Intrinsic::x86_avx2_psrai_w
3315                             : Intrinsic::x86_avx2_psra_w;
3316         else
3317           llvm_unreachable("Unexpected size");
3318       } else {
3319         if (Size == 'd') // psra.di.512, psrai.d, psra.d, psrav.d.512
3320           IID = IsImmediate ? Intrinsic::x86_avx512_psrai_d_512 :
3321                 IsVariable  ? Intrinsic::x86_avx512_psrav_d_512 :
3322                               Intrinsic::x86_avx512_psra_d_512;
3323         else if (Size == 'q') // psra.qi.512, psrai.q, psra.q
3324           IID = IsImmediate ? Intrinsic::x86_avx512_psrai_q_512 :
3325                 IsVariable  ? Intrinsic::x86_avx512_psrav_q_512 :
3326                               Intrinsic::x86_avx512_psra_q_512;
3327         else if (Size == 'w') // psra.wi.512, psrai.w, psra.w
3328           IID = IsImmediate ? Intrinsic::x86_avx512_psrai_w_512
3329                             : Intrinsic::x86_avx512_psra_w_512;
3330         else
3331           llvm_unreachable("Unexpected size");
3332       }
3333 
3334       Rep = UpgradeX86MaskedShift(Builder, *CI, IID);
3335     } else if (IsX86 && Name.startswith("avx512.mask.move.s")) {
3336       Rep = upgradeMaskedMove(Builder, *CI);
3337     } else if (IsX86 && Name.startswith("avx512.cvtmask2")) {
3338       Rep = UpgradeMaskToInt(Builder, *CI);
3339     } else if (IsX86 && Name.endswith(".movntdqa")) {
3340       Module *M = F->getParent();
3341       MDNode *Node = MDNode::get(
3342           C, ConstantAsMetadata::get(ConstantInt::get(Type::getInt32Ty(C), 1)));
3343 
3344       Value *Ptr = CI->getArgOperand(0);
3345 
3346       // Convert the type of the pointer to a pointer to the stored type.
3347       Value *BC = Builder.CreateBitCast(
3348           Ptr, PointerType::getUnqual(CI->getType()), "cast");
3349       LoadInst *LI = Builder.CreateAlignedLoad(
3350           CI->getType(), BC,
3351           Align(CI->getType()->getPrimitiveSizeInBits().getFixedSize() / 8));
3352       LI->setMetadata(M->getMDKindID("nontemporal"), Node);
3353       Rep = LI;
3354     } else if (IsX86 && (Name.startswith("fma.vfmadd.") ||
3355                          Name.startswith("fma.vfmsub.") ||
3356                          Name.startswith("fma.vfnmadd.") ||
3357                          Name.startswith("fma.vfnmsub."))) {
3358       bool NegMul = Name[6] == 'n';
3359       bool NegAcc = NegMul ? Name[8] == 's' : Name[7] == 's';
3360       bool IsScalar = NegMul ? Name[12] == 's' : Name[11] == 's';
3361 
3362       Value *Ops[] = { CI->getArgOperand(0), CI->getArgOperand(1),
3363                        CI->getArgOperand(2) };
3364 
3365       if (IsScalar) {
3366         Ops[0] = Builder.CreateExtractElement(Ops[0], (uint64_t)0);
3367         Ops[1] = Builder.CreateExtractElement(Ops[1], (uint64_t)0);
3368         Ops[2] = Builder.CreateExtractElement(Ops[2], (uint64_t)0);
3369       }
3370 
3371       if (NegMul && !IsScalar)
3372         Ops[0] = Builder.CreateFNeg(Ops[0]);
3373       if (NegMul && IsScalar)
3374         Ops[1] = Builder.CreateFNeg(Ops[1]);
3375       if (NegAcc)
3376         Ops[2] = Builder.CreateFNeg(Ops[2]);
3377 
3378       Rep = Builder.CreateCall(Intrinsic::getDeclaration(CI->getModule(),
3379                                                          Intrinsic::fma,
3380                                                          Ops[0]->getType()),
3381                                Ops);
3382 
3383       if (IsScalar)
3384         Rep = Builder.CreateInsertElement(CI->getArgOperand(0), Rep,
3385                                           (uint64_t)0);
3386     } else if (IsX86 && Name.startswith("fma4.vfmadd.s")) {
3387       Value *Ops[] = { CI->getArgOperand(0), CI->getArgOperand(1),
3388                        CI->getArgOperand(2) };
3389 
3390       Ops[0] = Builder.CreateExtractElement(Ops[0], (uint64_t)0);
3391       Ops[1] = Builder.CreateExtractElement(Ops[1], (uint64_t)0);
3392       Ops[2] = Builder.CreateExtractElement(Ops[2], (uint64_t)0);
3393 
3394       Rep = Builder.CreateCall(Intrinsic::getDeclaration(CI->getModule(),
3395                                                          Intrinsic::fma,
3396                                                          Ops[0]->getType()),
3397                                Ops);
3398 
3399       Rep = Builder.CreateInsertElement(Constant::getNullValue(CI->getType()),
3400                                         Rep, (uint64_t)0);
3401     } else if (IsX86 && (Name.startswith("avx512.mask.vfmadd.s") ||
3402                          Name.startswith("avx512.maskz.vfmadd.s") ||
3403                          Name.startswith("avx512.mask3.vfmadd.s") ||
3404                          Name.startswith("avx512.mask3.vfmsub.s") ||
3405                          Name.startswith("avx512.mask3.vfnmsub.s"))) {
3406       bool IsMask3 = Name[11] == '3';
3407       bool IsMaskZ = Name[11] == 'z';
3408       // Drop the "avx512.mask." to make it easier.
3409       Name = Name.drop_front(IsMask3 || IsMaskZ ? 13 : 12);
3410       bool NegMul = Name[2] == 'n';
3411       bool NegAcc = NegMul ? Name[4] == 's' : Name[3] == 's';
3412 
3413       Value *A = CI->getArgOperand(0);
3414       Value *B = CI->getArgOperand(1);
3415       Value *C = CI->getArgOperand(2);
3416 
3417       if (NegMul && (IsMask3 || IsMaskZ))
3418         A = Builder.CreateFNeg(A);
3419       if (NegMul && !(IsMask3 || IsMaskZ))
3420         B = Builder.CreateFNeg(B);
3421       if (NegAcc)
3422         C = Builder.CreateFNeg(C);
3423 
3424       A = Builder.CreateExtractElement(A, (uint64_t)0);
3425       B = Builder.CreateExtractElement(B, (uint64_t)0);
3426       C = Builder.CreateExtractElement(C, (uint64_t)0);
3427 
3428       if (!isa<ConstantInt>(CI->getArgOperand(4)) ||
3429           cast<ConstantInt>(CI->getArgOperand(4))->getZExtValue() != 4) {
3430         Value *Ops[] = { A, B, C, CI->getArgOperand(4) };
3431 
3432         Intrinsic::ID IID;
3433         if (Name.back() == 'd')
3434           IID = Intrinsic::x86_avx512_vfmadd_f64;
3435         else
3436           IID = Intrinsic::x86_avx512_vfmadd_f32;
3437         Function *FMA = Intrinsic::getDeclaration(CI->getModule(), IID);
3438         Rep = Builder.CreateCall(FMA, Ops);
3439       } else {
3440         Function *FMA = Intrinsic::getDeclaration(CI->getModule(),
3441                                                   Intrinsic::fma,
3442                                                   A->getType());
3443         Rep = Builder.CreateCall(FMA, { A, B, C });
3444       }
3445 
3446       Value *PassThru = IsMaskZ ? Constant::getNullValue(Rep->getType()) :
3447                         IsMask3 ? C : A;
3448 
3449       // For Mask3 with NegAcc, we need to create a new extractelement that
3450       // avoids the negation above.
3451       if (NegAcc && IsMask3)
3452         PassThru = Builder.CreateExtractElement(CI->getArgOperand(2),
3453                                                 (uint64_t)0);
3454 
3455       Rep = EmitX86ScalarSelect(Builder, CI->getArgOperand(3),
3456                                 Rep, PassThru);
3457       Rep = Builder.CreateInsertElement(CI->getArgOperand(IsMask3 ? 2 : 0),
3458                                         Rep, (uint64_t)0);
3459     } else if (IsX86 && (Name.startswith("avx512.mask.vfmadd.p") ||
3460                          Name.startswith("avx512.mask.vfnmadd.p") ||
3461                          Name.startswith("avx512.mask.vfnmsub.p") ||
3462                          Name.startswith("avx512.mask3.vfmadd.p") ||
3463                          Name.startswith("avx512.mask3.vfmsub.p") ||
3464                          Name.startswith("avx512.mask3.vfnmsub.p") ||
3465                          Name.startswith("avx512.maskz.vfmadd.p"))) {
3466       bool IsMask3 = Name[11] == '3';
3467       bool IsMaskZ = Name[11] == 'z';
3468       // Drop the "avx512.mask." to make it easier.
3469       Name = Name.drop_front(IsMask3 || IsMaskZ ? 13 : 12);
3470       bool NegMul = Name[2] == 'n';
3471       bool NegAcc = NegMul ? Name[4] == 's' : Name[3] == 's';
3472 
3473       Value *A = CI->getArgOperand(0);
3474       Value *B = CI->getArgOperand(1);
3475       Value *C = CI->getArgOperand(2);
3476 
3477       if (NegMul && (IsMask3 || IsMaskZ))
3478         A = Builder.CreateFNeg(A);
3479       if (NegMul && !(IsMask3 || IsMaskZ))
3480         B = Builder.CreateFNeg(B);
3481       if (NegAcc)
3482         C = Builder.CreateFNeg(C);
3483 
3484       if (CI->arg_size() == 5 &&
3485           (!isa<ConstantInt>(CI->getArgOperand(4)) ||
3486            cast<ConstantInt>(CI->getArgOperand(4))->getZExtValue() != 4)) {
3487         Intrinsic::ID IID;
3488         // Check the character before ".512" in string.
3489         if (Name[Name.size()-5] == 's')
3490           IID = Intrinsic::x86_avx512_vfmadd_ps_512;
3491         else
3492           IID = Intrinsic::x86_avx512_vfmadd_pd_512;
3493 
3494         Rep = Builder.CreateCall(Intrinsic::getDeclaration(F->getParent(), IID),
3495                                  { A, B, C, CI->getArgOperand(4) });
3496       } else {
3497         Function *FMA = Intrinsic::getDeclaration(CI->getModule(),
3498                                                   Intrinsic::fma,
3499                                                   A->getType());
3500         Rep = Builder.CreateCall(FMA, { A, B, C });
3501       }
3502 
3503       Value *PassThru = IsMaskZ ? llvm::Constant::getNullValue(CI->getType()) :
3504                         IsMask3 ? CI->getArgOperand(2) :
3505                                   CI->getArgOperand(0);
3506 
3507       Rep = EmitX86Select(Builder, CI->getArgOperand(3), Rep, PassThru);
3508     } else if (IsX86 &&  Name.startswith("fma.vfmsubadd.p")) {
3509       unsigned VecWidth = CI->getType()->getPrimitiveSizeInBits();
3510       unsigned EltWidth = CI->getType()->getScalarSizeInBits();
3511       Intrinsic::ID IID;
3512       if (VecWidth == 128 && EltWidth == 32)
3513         IID = Intrinsic::x86_fma_vfmaddsub_ps;
3514       else if (VecWidth == 256 && EltWidth == 32)
3515         IID = Intrinsic::x86_fma_vfmaddsub_ps_256;
3516       else if (VecWidth == 128 && EltWidth == 64)
3517         IID = Intrinsic::x86_fma_vfmaddsub_pd;
3518       else if (VecWidth == 256 && EltWidth == 64)
3519         IID = Intrinsic::x86_fma_vfmaddsub_pd_256;
3520       else
3521         llvm_unreachable("Unexpected intrinsic");
3522 
3523       Value *Ops[] = { CI->getArgOperand(0), CI->getArgOperand(1),
3524                        CI->getArgOperand(2) };
3525       Ops[2] = Builder.CreateFNeg(Ops[2]);
3526       Rep = Builder.CreateCall(Intrinsic::getDeclaration(F->getParent(), IID),
3527                                Ops);
3528     } else if (IsX86 && (Name.startswith("avx512.mask.vfmaddsub.p") ||
3529                          Name.startswith("avx512.mask3.vfmaddsub.p") ||
3530                          Name.startswith("avx512.maskz.vfmaddsub.p") ||
3531                          Name.startswith("avx512.mask3.vfmsubadd.p"))) {
3532       bool IsMask3 = Name[11] == '3';
3533       bool IsMaskZ = Name[11] == 'z';
3534       // Drop the "avx512.mask." to make it easier.
3535       Name = Name.drop_front(IsMask3 || IsMaskZ ? 13 : 12);
3536       bool IsSubAdd = Name[3] == 's';
3537       if (CI->arg_size() == 5) {
3538         Intrinsic::ID IID;
3539         // Check the character before ".512" in string.
3540         if (Name[Name.size()-5] == 's')
3541           IID = Intrinsic::x86_avx512_vfmaddsub_ps_512;
3542         else
3543           IID = Intrinsic::x86_avx512_vfmaddsub_pd_512;
3544 
3545         Value *Ops[] = { CI->getArgOperand(0), CI->getArgOperand(1),
3546                          CI->getArgOperand(2), CI->getArgOperand(4) };
3547         if (IsSubAdd)
3548           Ops[2] = Builder.CreateFNeg(Ops[2]);
3549 
3550         Rep = Builder.CreateCall(Intrinsic::getDeclaration(F->getParent(), IID),
3551                                  Ops);
3552       } else {
3553         int NumElts = cast<FixedVectorType>(CI->getType())->getNumElements();
3554 
3555         Value *Ops[] = { CI->getArgOperand(0), CI->getArgOperand(1),
3556                          CI->getArgOperand(2) };
3557 
3558         Function *FMA = Intrinsic::getDeclaration(CI->getModule(), Intrinsic::fma,
3559                                                   Ops[0]->getType());
3560         Value *Odd = Builder.CreateCall(FMA, Ops);
3561         Ops[2] = Builder.CreateFNeg(Ops[2]);
3562         Value *Even = Builder.CreateCall(FMA, Ops);
3563 
3564         if (IsSubAdd)
3565           std::swap(Even, Odd);
3566 
3567         SmallVector<int, 32> Idxs(NumElts);
3568         for (int i = 0; i != NumElts; ++i)
3569           Idxs[i] = i + (i % 2) * NumElts;
3570 
3571         Rep = Builder.CreateShuffleVector(Even, Odd, Idxs);
3572       }
3573 
3574       Value *PassThru = IsMaskZ ? llvm::Constant::getNullValue(CI->getType()) :
3575                         IsMask3 ? CI->getArgOperand(2) :
3576                                   CI->getArgOperand(0);
3577 
3578       Rep = EmitX86Select(Builder, CI->getArgOperand(3), Rep, PassThru);
3579     } else if (IsX86 && (Name.startswith("avx512.mask.pternlog.") ||
3580                          Name.startswith("avx512.maskz.pternlog."))) {
3581       bool ZeroMask = Name[11] == 'z';
3582       unsigned VecWidth = CI->getType()->getPrimitiveSizeInBits();
3583       unsigned EltWidth = CI->getType()->getScalarSizeInBits();
3584       Intrinsic::ID IID;
3585       if (VecWidth == 128 && EltWidth == 32)
3586         IID = Intrinsic::x86_avx512_pternlog_d_128;
3587       else if (VecWidth == 256 && EltWidth == 32)
3588         IID = Intrinsic::x86_avx512_pternlog_d_256;
3589       else if (VecWidth == 512 && EltWidth == 32)
3590         IID = Intrinsic::x86_avx512_pternlog_d_512;
3591       else if (VecWidth == 128 && EltWidth == 64)
3592         IID = Intrinsic::x86_avx512_pternlog_q_128;
3593       else if (VecWidth == 256 && EltWidth == 64)
3594         IID = Intrinsic::x86_avx512_pternlog_q_256;
3595       else if (VecWidth == 512 && EltWidth == 64)
3596         IID = Intrinsic::x86_avx512_pternlog_q_512;
3597       else
3598         llvm_unreachable("Unexpected intrinsic");
3599 
3600       Value *Args[] = { CI->getArgOperand(0) , CI->getArgOperand(1),
3601                         CI->getArgOperand(2), CI->getArgOperand(3) };
3602       Rep = Builder.CreateCall(Intrinsic::getDeclaration(CI->getModule(), IID),
3603                                Args);
3604       Value *PassThru = ZeroMask ? ConstantAggregateZero::get(CI->getType())
3605                                  : CI->getArgOperand(0);
3606       Rep = EmitX86Select(Builder, CI->getArgOperand(4), Rep, PassThru);
3607     } else if (IsX86 && (Name.startswith("avx512.mask.vpmadd52") ||
3608                          Name.startswith("avx512.maskz.vpmadd52"))) {
3609       bool ZeroMask = Name[11] == 'z';
3610       bool High = Name[20] == 'h' || Name[21] == 'h';
3611       unsigned VecWidth = CI->getType()->getPrimitiveSizeInBits();
3612       Intrinsic::ID IID;
3613       if (VecWidth == 128 && !High)
3614         IID = Intrinsic::x86_avx512_vpmadd52l_uq_128;
3615       else if (VecWidth == 256 && !High)
3616         IID = Intrinsic::x86_avx512_vpmadd52l_uq_256;
3617       else if (VecWidth == 512 && !High)
3618         IID = Intrinsic::x86_avx512_vpmadd52l_uq_512;
3619       else if (VecWidth == 128 && High)
3620         IID = Intrinsic::x86_avx512_vpmadd52h_uq_128;
3621       else if (VecWidth == 256 && High)
3622         IID = Intrinsic::x86_avx512_vpmadd52h_uq_256;
3623       else if (VecWidth == 512 && High)
3624         IID = Intrinsic::x86_avx512_vpmadd52h_uq_512;
3625       else
3626         llvm_unreachable("Unexpected intrinsic");
3627 
3628       Value *Args[] = { CI->getArgOperand(0) , CI->getArgOperand(1),
3629                         CI->getArgOperand(2) };
3630       Rep = Builder.CreateCall(Intrinsic::getDeclaration(CI->getModule(), IID),
3631                                Args);
3632       Value *PassThru = ZeroMask ? ConstantAggregateZero::get(CI->getType())
3633                                  : CI->getArgOperand(0);
3634       Rep = EmitX86Select(Builder, CI->getArgOperand(3), Rep, PassThru);
3635     } else if (IsX86 && (Name.startswith("avx512.mask.vpermi2var.") ||
3636                          Name.startswith("avx512.mask.vpermt2var.") ||
3637                          Name.startswith("avx512.maskz.vpermt2var."))) {
3638       bool ZeroMask = Name[11] == 'z';
3639       bool IndexForm = Name[17] == 'i';
3640       Rep = UpgradeX86VPERMT2Intrinsics(Builder, *CI, ZeroMask, IndexForm);
3641     } else if (IsX86 && (Name.startswith("avx512.mask.vpdpbusd.") ||
3642                          Name.startswith("avx512.maskz.vpdpbusd.") ||
3643                          Name.startswith("avx512.mask.vpdpbusds.") ||
3644                          Name.startswith("avx512.maskz.vpdpbusds."))) {
3645       bool ZeroMask = Name[11] == 'z';
3646       bool IsSaturating = Name[ZeroMask ? 21 : 20] == 's';
3647       unsigned VecWidth = CI->getType()->getPrimitiveSizeInBits();
3648       Intrinsic::ID IID;
3649       if (VecWidth == 128 && !IsSaturating)
3650         IID = Intrinsic::x86_avx512_vpdpbusd_128;
3651       else if (VecWidth == 256 && !IsSaturating)
3652         IID = Intrinsic::x86_avx512_vpdpbusd_256;
3653       else if (VecWidth == 512 && !IsSaturating)
3654         IID = Intrinsic::x86_avx512_vpdpbusd_512;
3655       else if (VecWidth == 128 && IsSaturating)
3656         IID = Intrinsic::x86_avx512_vpdpbusds_128;
3657       else if (VecWidth == 256 && IsSaturating)
3658         IID = Intrinsic::x86_avx512_vpdpbusds_256;
3659       else if (VecWidth == 512 && IsSaturating)
3660         IID = Intrinsic::x86_avx512_vpdpbusds_512;
3661       else
3662         llvm_unreachable("Unexpected intrinsic");
3663 
3664       Value *Args[] = { CI->getArgOperand(0), CI->getArgOperand(1),
3665                         CI->getArgOperand(2)  };
3666       Rep = Builder.CreateCall(Intrinsic::getDeclaration(CI->getModule(), IID),
3667                                Args);
3668       Value *PassThru = ZeroMask ? ConstantAggregateZero::get(CI->getType())
3669                                  : CI->getArgOperand(0);
3670       Rep = EmitX86Select(Builder, CI->getArgOperand(3), Rep, PassThru);
3671     } else if (IsX86 && (Name.startswith("avx512.mask.vpdpwssd.") ||
3672                          Name.startswith("avx512.maskz.vpdpwssd.") ||
3673                          Name.startswith("avx512.mask.vpdpwssds.") ||
3674                          Name.startswith("avx512.maskz.vpdpwssds."))) {
3675       bool ZeroMask = Name[11] == 'z';
3676       bool IsSaturating = Name[ZeroMask ? 21 : 20] == 's';
3677       unsigned VecWidth = CI->getType()->getPrimitiveSizeInBits();
3678       Intrinsic::ID IID;
3679       if (VecWidth == 128 && !IsSaturating)
3680         IID = Intrinsic::x86_avx512_vpdpwssd_128;
3681       else if (VecWidth == 256 && !IsSaturating)
3682         IID = Intrinsic::x86_avx512_vpdpwssd_256;
3683       else if (VecWidth == 512 && !IsSaturating)
3684         IID = Intrinsic::x86_avx512_vpdpwssd_512;
3685       else if (VecWidth == 128 && IsSaturating)
3686         IID = Intrinsic::x86_avx512_vpdpwssds_128;
3687       else if (VecWidth == 256 && IsSaturating)
3688         IID = Intrinsic::x86_avx512_vpdpwssds_256;
3689       else if (VecWidth == 512 && IsSaturating)
3690         IID = Intrinsic::x86_avx512_vpdpwssds_512;
3691       else
3692         llvm_unreachable("Unexpected intrinsic");
3693 
3694       Value *Args[] = { CI->getArgOperand(0), CI->getArgOperand(1),
3695                         CI->getArgOperand(2)  };
3696       Rep = Builder.CreateCall(Intrinsic::getDeclaration(CI->getModule(), IID),
3697                                Args);
3698       Value *PassThru = ZeroMask ? ConstantAggregateZero::get(CI->getType())
3699                                  : CI->getArgOperand(0);
3700       Rep = EmitX86Select(Builder, CI->getArgOperand(3), Rep, PassThru);
3701     } else if (IsX86 && (Name == "addcarryx.u32" || Name == "addcarryx.u64" ||
3702                          Name == "addcarry.u32" || Name == "addcarry.u64" ||
3703                          Name == "subborrow.u32" || Name == "subborrow.u64")) {
3704       Intrinsic::ID IID;
3705       if (Name[0] == 'a' && Name.back() == '2')
3706         IID = Intrinsic::x86_addcarry_32;
3707       else if (Name[0] == 'a' && Name.back() == '4')
3708         IID = Intrinsic::x86_addcarry_64;
3709       else if (Name[0] == 's' && Name.back() == '2')
3710         IID = Intrinsic::x86_subborrow_32;
3711       else if (Name[0] == 's' && Name.back() == '4')
3712         IID = Intrinsic::x86_subborrow_64;
3713       else
3714         llvm_unreachable("Unexpected intrinsic");
3715 
3716       // Make a call with 3 operands.
3717       Value *Args[] = { CI->getArgOperand(0), CI->getArgOperand(1),
3718                         CI->getArgOperand(2)};
3719       Value *NewCall = Builder.CreateCall(
3720                                 Intrinsic::getDeclaration(CI->getModule(), IID),
3721                                 Args);
3722 
3723       // Extract the second result and store it.
3724       Value *Data = Builder.CreateExtractValue(NewCall, 1);
3725       // Cast the pointer to the right type.
3726       Value *Ptr = Builder.CreateBitCast(CI->getArgOperand(3),
3727                                  llvm::PointerType::getUnqual(Data->getType()));
3728       Builder.CreateAlignedStore(Data, Ptr, Align(1));
3729       // Replace the original call result with the first result of the new call.
3730       Value *CF = Builder.CreateExtractValue(NewCall, 0);
3731 
3732       CI->replaceAllUsesWith(CF);
3733       Rep = nullptr;
3734     } else if (IsX86 && Name.startswith("avx512.mask.") &&
3735                upgradeAVX512MaskToSelect(Name, Builder, *CI, Rep)) {
3736       // Rep will be updated by the call in the condition.
3737     } else if (IsNVVM && (Name == "abs.i" || Name == "abs.ll")) {
3738       Value *Arg = CI->getArgOperand(0);
3739       Value *Neg = Builder.CreateNeg(Arg, "neg");
3740       Value *Cmp = Builder.CreateICmpSGE(
3741           Arg, llvm::Constant::getNullValue(Arg->getType()), "abs.cond");
3742       Rep = Builder.CreateSelect(Cmp, Arg, Neg, "abs");
3743     } else if (IsNVVM && (Name.startswith("atomic.load.add.f32.p") ||
3744                           Name.startswith("atomic.load.add.f64.p"))) {
3745       Value *Ptr = CI->getArgOperand(0);
3746       Value *Val = CI->getArgOperand(1);
3747       Rep = Builder.CreateAtomicRMW(AtomicRMWInst::FAdd, Ptr, Val, MaybeAlign(),
3748                                     AtomicOrdering::SequentiallyConsistent);
3749     } else if (IsNVVM && (Name == "max.i" || Name == "max.ll" ||
3750                           Name == "max.ui" || Name == "max.ull")) {
3751       Value *Arg0 = CI->getArgOperand(0);
3752       Value *Arg1 = CI->getArgOperand(1);
3753       Value *Cmp = Name.endswith(".ui") || Name.endswith(".ull")
3754                        ? Builder.CreateICmpUGE(Arg0, Arg1, "max.cond")
3755                        : Builder.CreateICmpSGE(Arg0, Arg1, "max.cond");
3756       Rep = Builder.CreateSelect(Cmp, Arg0, Arg1, "max");
3757     } else if (IsNVVM && (Name == "min.i" || Name == "min.ll" ||
3758                           Name == "min.ui" || Name == "min.ull")) {
3759       Value *Arg0 = CI->getArgOperand(0);
3760       Value *Arg1 = CI->getArgOperand(1);
3761       Value *Cmp = Name.endswith(".ui") || Name.endswith(".ull")
3762                        ? Builder.CreateICmpULE(Arg0, Arg1, "min.cond")
3763                        : Builder.CreateICmpSLE(Arg0, Arg1, "min.cond");
3764       Rep = Builder.CreateSelect(Cmp, Arg0, Arg1, "min");
3765     } else if (IsNVVM && Name == "clz.ll") {
3766       // llvm.nvvm.clz.ll returns an i32, but llvm.ctlz.i64 and returns an i64.
3767       Value *Arg = CI->getArgOperand(0);
3768       Value *Ctlz = Builder.CreateCall(
3769           Intrinsic::getDeclaration(F->getParent(), Intrinsic::ctlz,
3770                                     {Arg->getType()}),
3771           {Arg, Builder.getFalse()}, "ctlz");
3772       Rep = Builder.CreateTrunc(Ctlz, Builder.getInt32Ty(), "ctlz.trunc");
3773     } else if (IsNVVM && Name == "popc.ll") {
3774       // llvm.nvvm.popc.ll returns an i32, but llvm.ctpop.i64 and returns an
3775       // i64.
3776       Value *Arg = CI->getArgOperand(0);
3777       Value *Popc = Builder.CreateCall(
3778           Intrinsic::getDeclaration(F->getParent(), Intrinsic::ctpop,
3779                                     {Arg->getType()}),
3780           Arg, "ctpop");
3781       Rep = Builder.CreateTrunc(Popc, Builder.getInt32Ty(), "ctpop.trunc");
3782     } else if (IsNVVM && Name == "h2f") {
3783       Rep = Builder.CreateCall(Intrinsic::getDeclaration(
3784                                    F->getParent(), Intrinsic::convert_from_fp16,
3785                                    {Builder.getFloatTy()}),
3786                                CI->getArgOperand(0), "h2f");
3787     } else if (IsARM) {
3788       Rep = UpgradeARMIntrinsicCall(Name, CI, F, Builder);
3789     } else {
3790       llvm_unreachable("Unknown function for CallBase upgrade.");
3791     }
3792 
3793     if (Rep)
3794       CI->replaceAllUsesWith(Rep);
3795     CI->eraseFromParent();
3796     return;
3797   }
3798 
3799   const auto &DefaultCase = [&]() -> void {
3800     if (CI->getFunctionType() == NewFn->getFunctionType()) {
3801       // Handle generic mangling change.
3802       assert(
3803           (CI->getCalledFunction()->getName() != NewFn->getName()) &&
3804           "Unknown function for CallBase upgrade and isn't just a name change");
3805       CI->setCalledFunction(NewFn);
3806       return;
3807     }
3808 
3809     // This must be an upgrade from a named to a literal struct.
3810     auto *OldST = cast<StructType>(CI->getType());
3811     assert(OldST != NewFn->getReturnType() && "Return type must have changed");
3812     assert(OldST->getNumElements() ==
3813                cast<StructType>(NewFn->getReturnType())->getNumElements() &&
3814            "Must have same number of elements");
3815 
3816     SmallVector<Value *> Args(CI->args());
3817     Value *NewCI = Builder.CreateCall(NewFn, Args);
3818     Value *Res = PoisonValue::get(OldST);
3819     for (unsigned Idx = 0; Idx < OldST->getNumElements(); ++Idx) {
3820       Value *Elem = Builder.CreateExtractValue(NewCI, Idx);
3821       Res = Builder.CreateInsertValue(Res, Elem, Idx);
3822     }
3823     CI->replaceAllUsesWith(Res);
3824     CI->eraseFromParent();
3825     return;
3826   };
3827   CallInst *NewCall = nullptr;
3828   switch (NewFn->getIntrinsicID()) {
3829   default: {
3830     DefaultCase();
3831     return;
3832   }
3833   case Intrinsic::arm_neon_vst1:
3834   case Intrinsic::arm_neon_vst2:
3835   case Intrinsic::arm_neon_vst3:
3836   case Intrinsic::arm_neon_vst4:
3837   case Intrinsic::arm_neon_vst2lane:
3838   case Intrinsic::arm_neon_vst3lane:
3839   case Intrinsic::arm_neon_vst4lane: {
3840     SmallVector<Value *, 4> Args(CI->args());
3841     NewCall = Builder.CreateCall(NewFn, Args);
3842     break;
3843   }
3844 
3845   case Intrinsic::arm_neon_bfdot:
3846   case Intrinsic::arm_neon_bfmmla:
3847   case Intrinsic::arm_neon_bfmlalb:
3848   case Intrinsic::arm_neon_bfmlalt:
3849   case Intrinsic::aarch64_neon_bfdot:
3850   case Intrinsic::aarch64_neon_bfmmla:
3851   case Intrinsic::aarch64_neon_bfmlalb:
3852   case Intrinsic::aarch64_neon_bfmlalt: {
3853     SmallVector<Value *, 3> Args;
3854     assert(CI->arg_size() == 3 &&
3855            "Mismatch between function args and call args");
3856     size_t OperandWidth =
3857         CI->getArgOperand(1)->getType()->getPrimitiveSizeInBits();
3858     assert((OperandWidth == 64 || OperandWidth == 128) &&
3859            "Unexpected operand width");
3860     Type *NewTy = FixedVectorType::get(Type::getBFloatTy(C), OperandWidth / 16);
3861     auto Iter = CI->args().begin();
3862     Args.push_back(*Iter++);
3863     Args.push_back(Builder.CreateBitCast(*Iter++, NewTy));
3864     Args.push_back(Builder.CreateBitCast(*Iter++, NewTy));
3865     NewCall = Builder.CreateCall(NewFn, Args);
3866     break;
3867   }
3868 
3869   case Intrinsic::bitreverse:
3870     NewCall = Builder.CreateCall(NewFn, {CI->getArgOperand(0)});
3871     break;
3872 
3873   case Intrinsic::ctlz:
3874   case Intrinsic::cttz:
3875     assert(CI->arg_size() == 1 &&
3876            "Mismatch between function args and call args");
3877     NewCall =
3878         Builder.CreateCall(NewFn, {CI->getArgOperand(0), Builder.getFalse()});
3879     break;
3880 
3881   case Intrinsic::objectsize: {
3882     Value *NullIsUnknownSize =
3883         CI->arg_size() == 2 ? Builder.getFalse() : CI->getArgOperand(2);
3884     Value *Dynamic =
3885         CI->arg_size() < 4 ? Builder.getFalse() : CI->getArgOperand(3);
3886     NewCall = Builder.CreateCall(
3887         NewFn, {CI->getArgOperand(0), CI->getArgOperand(1), NullIsUnknownSize, Dynamic});
3888     break;
3889   }
3890 
3891   case Intrinsic::ctpop:
3892     NewCall = Builder.CreateCall(NewFn, {CI->getArgOperand(0)});
3893     break;
3894 
3895   case Intrinsic::convert_from_fp16:
3896     NewCall = Builder.CreateCall(NewFn, {CI->getArgOperand(0)});
3897     break;
3898 
3899   case Intrinsic::dbg_value:
3900     // Upgrade from the old version that had an extra offset argument.
3901     assert(CI->arg_size() == 4);
3902     // Drop nonzero offsets instead of attempting to upgrade them.
3903     if (auto *Offset = dyn_cast_or_null<Constant>(CI->getArgOperand(1)))
3904       if (Offset->isZeroValue()) {
3905         NewCall = Builder.CreateCall(
3906             NewFn,
3907             {CI->getArgOperand(0), CI->getArgOperand(2), CI->getArgOperand(3)});
3908         break;
3909       }
3910     CI->eraseFromParent();
3911     return;
3912 
3913   case Intrinsic::ptr_annotation:
3914     // Upgrade from versions that lacked the annotation attribute argument.
3915     if (CI->arg_size() != 4) {
3916       DefaultCase();
3917       return;
3918     }
3919 
3920     // Create a new call with an added null annotation attribute argument.
3921     NewCall = Builder.CreateCall(
3922         NewFn,
3923         {CI->getArgOperand(0), CI->getArgOperand(1), CI->getArgOperand(2),
3924          CI->getArgOperand(3), Constant::getNullValue(Builder.getInt8PtrTy())});
3925     NewCall->takeName(CI);
3926     CI->replaceAllUsesWith(NewCall);
3927     CI->eraseFromParent();
3928     return;
3929 
3930   case Intrinsic::var_annotation:
3931     // Upgrade from versions that lacked the annotation attribute argument.
3932     assert(CI->arg_size() == 4 &&
3933            "Before LLVM 12.0 this intrinsic took four arguments");
3934     // Create a new call with an added null annotation attribute argument.
3935     NewCall = Builder.CreateCall(
3936         NewFn,
3937         {CI->getArgOperand(0), CI->getArgOperand(1), CI->getArgOperand(2),
3938          CI->getArgOperand(3), Constant::getNullValue(Builder.getInt8PtrTy())});
3939     CI->eraseFromParent();
3940     return;
3941 
3942   case Intrinsic::x86_xop_vfrcz_ss:
3943   case Intrinsic::x86_xop_vfrcz_sd:
3944     NewCall = Builder.CreateCall(NewFn, {CI->getArgOperand(1)});
3945     break;
3946 
3947   case Intrinsic::x86_xop_vpermil2pd:
3948   case Intrinsic::x86_xop_vpermil2ps:
3949   case Intrinsic::x86_xop_vpermil2pd_256:
3950   case Intrinsic::x86_xop_vpermil2ps_256: {
3951     SmallVector<Value *, 4> Args(CI->args());
3952     VectorType *FltIdxTy = cast<VectorType>(Args[2]->getType());
3953     VectorType *IntIdxTy = VectorType::getInteger(FltIdxTy);
3954     Args[2] = Builder.CreateBitCast(Args[2], IntIdxTy);
3955     NewCall = Builder.CreateCall(NewFn, Args);
3956     break;
3957   }
3958 
3959   case Intrinsic::x86_sse41_ptestc:
3960   case Intrinsic::x86_sse41_ptestz:
3961   case Intrinsic::x86_sse41_ptestnzc: {
3962     // The arguments for these intrinsics used to be v4f32, and changed
3963     // to v2i64. This is purely a nop, since those are bitwise intrinsics.
3964     // So, the only thing required is a bitcast for both arguments.
3965     // First, check the arguments have the old type.
3966     Value *Arg0 = CI->getArgOperand(0);
3967     if (Arg0->getType() != FixedVectorType::get(Type::getFloatTy(C), 4))
3968       return;
3969 
3970     // Old intrinsic, add bitcasts
3971     Value *Arg1 = CI->getArgOperand(1);
3972 
3973     auto *NewVecTy = FixedVectorType::get(Type::getInt64Ty(C), 2);
3974 
3975     Value *BC0 = Builder.CreateBitCast(Arg0, NewVecTy, "cast");
3976     Value *BC1 = Builder.CreateBitCast(Arg1, NewVecTy, "cast");
3977 
3978     NewCall = Builder.CreateCall(NewFn, {BC0, BC1});
3979     break;
3980   }
3981 
3982   case Intrinsic::x86_rdtscp: {
3983     // This used to take 1 arguments. If we have no arguments, it is already
3984     // upgraded.
3985     if (CI->getNumOperands() == 0)
3986       return;
3987 
3988     NewCall = Builder.CreateCall(NewFn);
3989     // Extract the second result and store it.
3990     Value *Data = Builder.CreateExtractValue(NewCall, 1);
3991     // Cast the pointer to the right type.
3992     Value *Ptr = Builder.CreateBitCast(CI->getArgOperand(0),
3993                                  llvm::PointerType::getUnqual(Data->getType()));
3994     Builder.CreateAlignedStore(Data, Ptr, Align(1));
3995     // Replace the original call result with the first result of the new call.
3996     Value *TSC = Builder.CreateExtractValue(NewCall, 0);
3997 
3998     NewCall->takeName(CI);
3999     CI->replaceAllUsesWith(TSC);
4000     CI->eraseFromParent();
4001     return;
4002   }
4003 
4004   case Intrinsic::x86_sse41_insertps:
4005   case Intrinsic::x86_sse41_dppd:
4006   case Intrinsic::x86_sse41_dpps:
4007   case Intrinsic::x86_sse41_mpsadbw:
4008   case Intrinsic::x86_avx_dp_ps_256:
4009   case Intrinsic::x86_avx2_mpsadbw: {
4010     // Need to truncate the last argument from i32 to i8 -- this argument models
4011     // an inherently 8-bit immediate operand to these x86 instructions.
4012     SmallVector<Value *, 4> Args(CI->args());
4013 
4014     // Replace the last argument with a trunc.
4015     Args.back() = Builder.CreateTrunc(Args.back(), Type::getInt8Ty(C), "trunc");
4016     NewCall = Builder.CreateCall(NewFn, Args);
4017     break;
4018   }
4019 
4020   case Intrinsic::x86_avx512_mask_cmp_pd_128:
4021   case Intrinsic::x86_avx512_mask_cmp_pd_256:
4022   case Intrinsic::x86_avx512_mask_cmp_pd_512:
4023   case Intrinsic::x86_avx512_mask_cmp_ps_128:
4024   case Intrinsic::x86_avx512_mask_cmp_ps_256:
4025   case Intrinsic::x86_avx512_mask_cmp_ps_512: {
4026     SmallVector<Value *, 4> Args(CI->args());
4027     unsigned NumElts =
4028         cast<FixedVectorType>(Args[0]->getType())->getNumElements();
4029     Args[3] = getX86MaskVec(Builder, Args[3], NumElts);
4030 
4031     NewCall = Builder.CreateCall(NewFn, Args);
4032     Value *Res = ApplyX86MaskOn1BitsVec(Builder, NewCall, nullptr);
4033 
4034     NewCall->takeName(CI);
4035     CI->replaceAllUsesWith(Res);
4036     CI->eraseFromParent();
4037     return;
4038   }
4039 
4040   case Intrinsic::thread_pointer: {
4041     NewCall = Builder.CreateCall(NewFn, {});
4042     break;
4043   }
4044 
4045   case Intrinsic::invariant_start:
4046   case Intrinsic::invariant_end: {
4047     SmallVector<Value *, 4> Args(CI->args());
4048     NewCall = Builder.CreateCall(NewFn, Args);
4049     break;
4050   }
4051   case Intrinsic::masked_load:
4052   case Intrinsic::masked_store:
4053   case Intrinsic::masked_gather:
4054   case Intrinsic::masked_scatter: {
4055     SmallVector<Value *, 4> Args(CI->args());
4056     NewCall = Builder.CreateCall(NewFn, Args);
4057     NewCall->copyMetadata(*CI);
4058     break;
4059   }
4060 
4061   case Intrinsic::memcpy:
4062   case Intrinsic::memmove:
4063   case Intrinsic::memset: {
4064     // We have to make sure that the call signature is what we're expecting.
4065     // We only want to change the old signatures by removing the alignment arg:
4066     //  @llvm.mem[cpy|move]...(i8*, i8*, i[32|i64], i32, i1)
4067     //    -> @llvm.mem[cpy|move]...(i8*, i8*, i[32|i64], i1)
4068     //  @llvm.memset...(i8*, i8, i[32|64], i32, i1)
4069     //    -> @llvm.memset...(i8*, i8, i[32|64], i1)
4070     // Note: i8*'s in the above can be any pointer type
4071     if (CI->arg_size() != 5) {
4072       DefaultCase();
4073       return;
4074     }
4075     // Remove alignment argument (3), and add alignment attributes to the
4076     // dest/src pointers.
4077     Value *Args[4] = {CI->getArgOperand(0), CI->getArgOperand(1),
4078                       CI->getArgOperand(2), CI->getArgOperand(4)};
4079     NewCall = Builder.CreateCall(NewFn, Args);
4080     AttributeList OldAttrs = CI->getAttributes();
4081     AttributeList NewAttrs = AttributeList::get(
4082         C, OldAttrs.getFnAttrs(), OldAttrs.getRetAttrs(),
4083         {OldAttrs.getParamAttrs(0), OldAttrs.getParamAttrs(1),
4084          OldAttrs.getParamAttrs(2), OldAttrs.getParamAttrs(4)});
4085     NewCall->setAttributes(NewAttrs);
4086     auto *MemCI = cast<MemIntrinsic>(NewCall);
4087     // All mem intrinsics support dest alignment.
4088     const ConstantInt *Align = cast<ConstantInt>(CI->getArgOperand(3));
4089     MemCI->setDestAlignment(Align->getMaybeAlignValue());
4090     // Memcpy/Memmove also support source alignment.
4091     if (auto *MTI = dyn_cast<MemTransferInst>(MemCI))
4092       MTI->setSourceAlignment(Align->getMaybeAlignValue());
4093     break;
4094   }
4095   }
4096   assert(NewCall && "Should have either set this variable or returned through "
4097                     "the default case");
4098   NewCall->takeName(CI);
4099   CI->replaceAllUsesWith(NewCall);
4100   CI->eraseFromParent();
4101 }
4102 
4103 void llvm::UpgradeCallsToIntrinsic(Function *F) {
4104   assert(F && "Illegal attempt to upgrade a non-existent intrinsic.");
4105 
4106   // Check if this function should be upgraded and get the replacement function
4107   // if there is one.
4108   Function *NewFn;
4109   if (UpgradeIntrinsicFunction(F, NewFn)) {
4110     // Replace all users of the old function with the new function or new
4111     // instructions. This is not a range loop because the call is deleted.
4112     for (User *U : make_early_inc_range(F->users()))
4113       if (CallBase *CB = dyn_cast<CallBase>(U))
4114         UpgradeIntrinsicCall(CB, NewFn);
4115 
4116     // Remove old function, no longer used, from the module.
4117     F->eraseFromParent();
4118   }
4119 }
4120 
4121 MDNode *llvm::UpgradeTBAANode(MDNode &MD) {
4122   // Check if the tag uses struct-path aware TBAA format.
4123   if (isa<MDNode>(MD.getOperand(0)) && MD.getNumOperands() >= 3)
4124     return &MD;
4125 
4126   auto &Context = MD.getContext();
4127   if (MD.getNumOperands() == 3) {
4128     Metadata *Elts[] = {MD.getOperand(0), MD.getOperand(1)};
4129     MDNode *ScalarType = MDNode::get(Context, Elts);
4130     // Create a MDNode <ScalarType, ScalarType, offset 0, const>
4131     Metadata *Elts2[] = {ScalarType, ScalarType,
4132                          ConstantAsMetadata::get(
4133                              Constant::getNullValue(Type::getInt64Ty(Context))),
4134                          MD.getOperand(2)};
4135     return MDNode::get(Context, Elts2);
4136   }
4137   // Create a MDNode <MD, MD, offset 0>
4138   Metadata *Elts[] = {&MD, &MD, ConstantAsMetadata::get(Constant::getNullValue(
4139                                     Type::getInt64Ty(Context)))};
4140   return MDNode::get(Context, Elts);
4141 }
4142 
4143 Instruction *llvm::UpgradeBitCastInst(unsigned Opc, Value *V, Type *DestTy,
4144                                       Instruction *&Temp) {
4145   if (Opc != Instruction::BitCast)
4146     return nullptr;
4147 
4148   Temp = nullptr;
4149   Type *SrcTy = V->getType();
4150   if (SrcTy->isPtrOrPtrVectorTy() && DestTy->isPtrOrPtrVectorTy() &&
4151       SrcTy->getPointerAddressSpace() != DestTy->getPointerAddressSpace()) {
4152     LLVMContext &Context = V->getContext();
4153 
4154     // We have no information about target data layout, so we assume that
4155     // the maximum pointer size is 64bit.
4156     Type *MidTy = Type::getInt64Ty(Context);
4157     Temp = CastInst::Create(Instruction::PtrToInt, V, MidTy);
4158 
4159     return CastInst::Create(Instruction::IntToPtr, Temp, DestTy);
4160   }
4161 
4162   return nullptr;
4163 }
4164 
4165 Value *llvm::UpgradeBitCastExpr(unsigned Opc, Constant *C, Type *DestTy) {
4166   if (Opc != Instruction::BitCast)
4167     return nullptr;
4168 
4169   Type *SrcTy = C->getType();
4170   if (SrcTy->isPtrOrPtrVectorTy() && DestTy->isPtrOrPtrVectorTy() &&
4171       SrcTy->getPointerAddressSpace() != DestTy->getPointerAddressSpace()) {
4172     LLVMContext &Context = C->getContext();
4173 
4174     // We have no information about target data layout, so we assume that
4175     // the maximum pointer size is 64bit.
4176     Type *MidTy = Type::getInt64Ty(Context);
4177 
4178     return ConstantExpr::getIntToPtr(ConstantExpr::getPtrToInt(C, MidTy),
4179                                      DestTy);
4180   }
4181 
4182   return nullptr;
4183 }
4184 
4185 /// Check the debug info version number, if it is out-dated, drop the debug
4186 /// info. Return true if module is modified.
4187 bool llvm::UpgradeDebugInfo(Module &M) {
4188   unsigned Version = getDebugMetadataVersionFromModule(M);
4189   if (Version == DEBUG_METADATA_VERSION) {
4190     bool BrokenDebugInfo = false;
4191     if (verifyModule(M, &llvm::errs(), &BrokenDebugInfo))
4192       report_fatal_error("Broken module found, compilation aborted!");
4193     if (!BrokenDebugInfo)
4194       // Everything is ok.
4195       return false;
4196     else {
4197       // Diagnose malformed debug info.
4198       DiagnosticInfoIgnoringInvalidDebugMetadata Diag(M);
4199       M.getContext().diagnose(Diag);
4200     }
4201   }
4202   bool Modified = StripDebugInfo(M);
4203   if (Modified && Version != DEBUG_METADATA_VERSION) {
4204     // Diagnose a version mismatch.
4205     DiagnosticInfoDebugMetadataVersion DiagVersion(M, Version);
4206     M.getContext().diagnose(DiagVersion);
4207   }
4208   return Modified;
4209 }
4210 
4211 /// This checks for objc retain release marker which should be upgraded. It
4212 /// returns true if module is modified.
4213 static bool UpgradeRetainReleaseMarker(Module &M) {
4214   bool Changed = false;
4215   const char *MarkerKey = "clang.arc.retainAutoreleasedReturnValueMarker";
4216   NamedMDNode *ModRetainReleaseMarker = M.getNamedMetadata(MarkerKey);
4217   if (ModRetainReleaseMarker) {
4218     MDNode *Op = ModRetainReleaseMarker->getOperand(0);
4219     if (Op) {
4220       MDString *ID = dyn_cast_or_null<MDString>(Op->getOperand(0));
4221       if (ID) {
4222         SmallVector<StringRef, 4> ValueComp;
4223         ID->getString().split(ValueComp, "#");
4224         if (ValueComp.size() == 2) {
4225           std::string NewValue = ValueComp[0].str() + ";" + ValueComp[1].str();
4226           ID = MDString::get(M.getContext(), NewValue);
4227         }
4228         M.addModuleFlag(Module::Error, MarkerKey, ID);
4229         M.eraseNamedMetadata(ModRetainReleaseMarker);
4230         Changed = true;
4231       }
4232     }
4233   }
4234   return Changed;
4235 }
4236 
4237 void llvm::UpgradeARCRuntime(Module &M) {
4238   // This lambda converts normal function calls to ARC runtime functions to
4239   // intrinsic calls.
4240   auto UpgradeToIntrinsic = [&](const char *OldFunc,
4241                                 llvm::Intrinsic::ID IntrinsicFunc) {
4242     Function *Fn = M.getFunction(OldFunc);
4243 
4244     if (!Fn)
4245       return;
4246 
4247     Function *NewFn = llvm::Intrinsic::getDeclaration(&M, IntrinsicFunc);
4248 
4249     for (User *U : make_early_inc_range(Fn->users())) {
4250       CallInst *CI = dyn_cast<CallInst>(U);
4251       if (!CI || CI->getCalledFunction() != Fn)
4252         continue;
4253 
4254       IRBuilder<> Builder(CI->getParent(), CI->getIterator());
4255       FunctionType *NewFuncTy = NewFn->getFunctionType();
4256       SmallVector<Value *, 2> Args;
4257 
4258       // Don't upgrade the intrinsic if it's not valid to bitcast the return
4259       // value to the return type of the old function.
4260       if (NewFuncTy->getReturnType() != CI->getType() &&
4261           !CastInst::castIsValid(Instruction::BitCast, CI,
4262                                  NewFuncTy->getReturnType()))
4263         continue;
4264 
4265       bool InvalidCast = false;
4266 
4267       for (unsigned I = 0, E = CI->arg_size(); I != E; ++I) {
4268         Value *Arg = CI->getArgOperand(I);
4269 
4270         // Bitcast argument to the parameter type of the new function if it's
4271         // not a variadic argument.
4272         if (I < NewFuncTy->getNumParams()) {
4273           // Don't upgrade the intrinsic if it's not valid to bitcast the argument
4274           // to the parameter type of the new function.
4275           if (!CastInst::castIsValid(Instruction::BitCast, Arg,
4276                                      NewFuncTy->getParamType(I))) {
4277             InvalidCast = true;
4278             break;
4279           }
4280           Arg = Builder.CreateBitCast(Arg, NewFuncTy->getParamType(I));
4281         }
4282         Args.push_back(Arg);
4283       }
4284 
4285       if (InvalidCast)
4286         continue;
4287 
4288       // Create a call instruction that calls the new function.
4289       CallInst *NewCall = Builder.CreateCall(NewFuncTy, NewFn, Args);
4290       NewCall->setTailCallKind(cast<CallInst>(CI)->getTailCallKind());
4291       NewCall->takeName(CI);
4292 
4293       // Bitcast the return value back to the type of the old call.
4294       Value *NewRetVal = Builder.CreateBitCast(NewCall, CI->getType());
4295 
4296       if (!CI->use_empty())
4297         CI->replaceAllUsesWith(NewRetVal);
4298       CI->eraseFromParent();
4299     }
4300 
4301     if (Fn->use_empty())
4302       Fn->eraseFromParent();
4303   };
4304 
4305   // Unconditionally convert a call to "clang.arc.use" to a call to
4306   // "llvm.objc.clang.arc.use".
4307   UpgradeToIntrinsic("clang.arc.use", llvm::Intrinsic::objc_clang_arc_use);
4308 
4309   // Upgrade the retain release marker. If there is no need to upgrade
4310   // the marker, that means either the module is already new enough to contain
4311   // new intrinsics or it is not ARC. There is no need to upgrade runtime call.
4312   if (!UpgradeRetainReleaseMarker(M))
4313     return;
4314 
4315   std::pair<const char *, llvm::Intrinsic::ID> RuntimeFuncs[] = {
4316       {"objc_autorelease", llvm::Intrinsic::objc_autorelease},
4317       {"objc_autoreleasePoolPop", llvm::Intrinsic::objc_autoreleasePoolPop},
4318       {"objc_autoreleasePoolPush", llvm::Intrinsic::objc_autoreleasePoolPush},
4319       {"objc_autoreleaseReturnValue",
4320        llvm::Intrinsic::objc_autoreleaseReturnValue},
4321       {"objc_copyWeak", llvm::Intrinsic::objc_copyWeak},
4322       {"objc_destroyWeak", llvm::Intrinsic::objc_destroyWeak},
4323       {"objc_initWeak", llvm::Intrinsic::objc_initWeak},
4324       {"objc_loadWeak", llvm::Intrinsic::objc_loadWeak},
4325       {"objc_loadWeakRetained", llvm::Intrinsic::objc_loadWeakRetained},
4326       {"objc_moveWeak", llvm::Intrinsic::objc_moveWeak},
4327       {"objc_release", llvm::Intrinsic::objc_release},
4328       {"objc_retain", llvm::Intrinsic::objc_retain},
4329       {"objc_retainAutorelease", llvm::Intrinsic::objc_retainAutorelease},
4330       {"objc_retainAutoreleaseReturnValue",
4331        llvm::Intrinsic::objc_retainAutoreleaseReturnValue},
4332       {"objc_retainAutoreleasedReturnValue",
4333        llvm::Intrinsic::objc_retainAutoreleasedReturnValue},
4334       {"objc_retainBlock", llvm::Intrinsic::objc_retainBlock},
4335       {"objc_storeStrong", llvm::Intrinsic::objc_storeStrong},
4336       {"objc_storeWeak", llvm::Intrinsic::objc_storeWeak},
4337       {"objc_unsafeClaimAutoreleasedReturnValue",
4338        llvm::Intrinsic::objc_unsafeClaimAutoreleasedReturnValue},
4339       {"objc_retainedObject", llvm::Intrinsic::objc_retainedObject},
4340       {"objc_unretainedObject", llvm::Intrinsic::objc_unretainedObject},
4341       {"objc_unretainedPointer", llvm::Intrinsic::objc_unretainedPointer},
4342       {"objc_retain_autorelease", llvm::Intrinsic::objc_retain_autorelease},
4343       {"objc_sync_enter", llvm::Intrinsic::objc_sync_enter},
4344       {"objc_sync_exit", llvm::Intrinsic::objc_sync_exit},
4345       {"objc_arc_annotation_topdown_bbstart",
4346        llvm::Intrinsic::objc_arc_annotation_topdown_bbstart},
4347       {"objc_arc_annotation_topdown_bbend",
4348        llvm::Intrinsic::objc_arc_annotation_topdown_bbend},
4349       {"objc_arc_annotation_bottomup_bbstart",
4350        llvm::Intrinsic::objc_arc_annotation_bottomup_bbstart},
4351       {"objc_arc_annotation_bottomup_bbend",
4352        llvm::Intrinsic::objc_arc_annotation_bottomup_bbend}};
4353 
4354   for (auto &I : RuntimeFuncs)
4355     UpgradeToIntrinsic(I.first, I.second);
4356 }
4357 
4358 bool llvm::UpgradeModuleFlags(Module &M) {
4359   NamedMDNode *ModFlags = M.getModuleFlagsMetadata();
4360   if (!ModFlags)
4361     return false;
4362 
4363   bool HasObjCFlag = false, HasClassProperties = false, Changed = false;
4364   bool HasSwiftVersionFlag = false;
4365   uint8_t SwiftMajorVersion, SwiftMinorVersion;
4366   uint32_t SwiftABIVersion;
4367   auto Int8Ty = Type::getInt8Ty(M.getContext());
4368   auto Int32Ty = Type::getInt32Ty(M.getContext());
4369 
4370   for (unsigned I = 0, E = ModFlags->getNumOperands(); I != E; ++I) {
4371     MDNode *Op = ModFlags->getOperand(I);
4372     if (Op->getNumOperands() != 3)
4373       continue;
4374     MDString *ID = dyn_cast_or_null<MDString>(Op->getOperand(1));
4375     if (!ID)
4376       continue;
4377     if (ID->getString() == "Objective-C Image Info Version")
4378       HasObjCFlag = true;
4379     if (ID->getString() == "Objective-C Class Properties")
4380       HasClassProperties = true;
4381     // Upgrade PIC/PIE Module Flags. The module flag behavior for these two
4382     // field was Error and now they are Max.
4383     if (ID->getString() == "PIC Level" || ID->getString() == "PIE Level") {
4384       if (auto *Behavior =
4385               mdconst::dyn_extract_or_null<ConstantInt>(Op->getOperand(0))) {
4386         if (Behavior->getLimitedValue() == Module::Error) {
4387           Type *Int32Ty = Type::getInt32Ty(M.getContext());
4388           Metadata *Ops[3] = {
4389               ConstantAsMetadata::get(ConstantInt::get(Int32Ty, Module::Max)),
4390               MDString::get(M.getContext(), ID->getString()),
4391               Op->getOperand(2)};
4392           ModFlags->setOperand(I, MDNode::get(M.getContext(), Ops));
4393           Changed = true;
4394         }
4395       }
4396     }
4397 
4398     // Upgrade branch protection and return address signing module flags. The
4399     // module flag behavior for these fields were Error and now they are Min.
4400     if (ID->getString() == "branch-target-enforcement" ||
4401         ID->getString().startswith("sign-return-address")) {
4402       if (auto *Behavior =
4403               mdconst::dyn_extract_or_null<ConstantInt>(Op->getOperand(0))) {
4404         if (Behavior->getLimitedValue() == Module::Error) {
4405           Type *Int32Ty = Type::getInt32Ty(M.getContext());
4406           Metadata *Ops[3] = {
4407               ConstantAsMetadata::get(ConstantInt::get(Int32Ty, Module::Min)),
4408               Op->getOperand(1), Op->getOperand(2)};
4409           ModFlags->setOperand(I, MDNode::get(M.getContext(), Ops));
4410           Changed = true;
4411         }
4412       }
4413     }
4414 
4415     // Upgrade Objective-C Image Info Section. Removed the whitespce in the
4416     // section name so that llvm-lto will not complain about mismatching
4417     // module flags that is functionally the same.
4418     if (ID->getString() == "Objective-C Image Info Section") {
4419       if (auto *Value = dyn_cast_or_null<MDString>(Op->getOperand(2))) {
4420         SmallVector<StringRef, 4> ValueComp;
4421         Value->getString().split(ValueComp, " ");
4422         if (ValueComp.size() != 1) {
4423           std::string NewValue;
4424           for (auto &S : ValueComp)
4425             NewValue += S.str();
4426           Metadata *Ops[3] = {Op->getOperand(0), Op->getOperand(1),
4427                               MDString::get(M.getContext(), NewValue)};
4428           ModFlags->setOperand(I, MDNode::get(M.getContext(), Ops));
4429           Changed = true;
4430         }
4431       }
4432     }
4433 
4434     // IRUpgrader turns a i32 type "Objective-C Garbage Collection" into i8 value.
4435     // If the higher bits are set, it adds new module flag for swift info.
4436     if (ID->getString() == "Objective-C Garbage Collection") {
4437       auto Md = dyn_cast<ConstantAsMetadata>(Op->getOperand(2));
4438       if (Md) {
4439         assert(Md->getValue() && "Expected non-empty metadata");
4440         auto Type = Md->getValue()->getType();
4441         if (Type == Int8Ty)
4442           continue;
4443         unsigned Val = Md->getValue()->getUniqueInteger().getZExtValue();
4444         if ((Val & 0xff) != Val) {
4445           HasSwiftVersionFlag = true;
4446           SwiftABIVersion = (Val & 0xff00) >> 8;
4447           SwiftMajorVersion = (Val & 0xff000000) >> 24;
4448           SwiftMinorVersion = (Val & 0xff0000) >> 16;
4449         }
4450         Metadata *Ops[3] = {
4451           ConstantAsMetadata::get(ConstantInt::get(Int32Ty,Module::Error)),
4452           Op->getOperand(1),
4453           ConstantAsMetadata::get(ConstantInt::get(Int8Ty,Val & 0xff))};
4454         ModFlags->setOperand(I, MDNode::get(M.getContext(), Ops));
4455         Changed = true;
4456       }
4457     }
4458   }
4459 
4460   // "Objective-C Class Properties" is recently added for Objective-C. We
4461   // upgrade ObjC bitcodes to contain a "Objective-C Class Properties" module
4462   // flag of value 0, so we can correclty downgrade this flag when trying to
4463   // link an ObjC bitcode without this module flag with an ObjC bitcode with
4464   // this module flag.
4465   if (HasObjCFlag && !HasClassProperties) {
4466     M.addModuleFlag(llvm::Module::Override, "Objective-C Class Properties",
4467                     (uint32_t)0);
4468     Changed = true;
4469   }
4470 
4471   if (HasSwiftVersionFlag) {
4472     M.addModuleFlag(Module::Error, "Swift ABI Version",
4473                     SwiftABIVersion);
4474     M.addModuleFlag(Module::Error, "Swift Major Version",
4475                     ConstantInt::get(Int8Ty, SwiftMajorVersion));
4476     M.addModuleFlag(Module::Error, "Swift Minor Version",
4477                     ConstantInt::get(Int8Ty, SwiftMinorVersion));
4478     Changed = true;
4479   }
4480 
4481   return Changed;
4482 }
4483 
4484 void llvm::UpgradeSectionAttributes(Module &M) {
4485   auto TrimSpaces = [](StringRef Section) -> std::string {
4486     SmallVector<StringRef, 5> Components;
4487     Section.split(Components, ',');
4488 
4489     SmallString<32> Buffer;
4490     raw_svector_ostream OS(Buffer);
4491 
4492     for (auto Component : Components)
4493       OS << ',' << Component.trim();
4494 
4495     return std::string(OS.str().substr(1));
4496   };
4497 
4498   for (auto &GV : M.globals()) {
4499     if (!GV.hasSection())
4500       continue;
4501 
4502     StringRef Section = GV.getSection();
4503 
4504     if (!Section.startswith("__DATA, __objc_catlist"))
4505       continue;
4506 
4507     // __DATA, __objc_catlist, regular, no_dead_strip
4508     // __DATA,__objc_catlist,regular,no_dead_strip
4509     GV.setSection(TrimSpaces(Section));
4510   }
4511 }
4512 
4513 namespace {
4514 // Prior to LLVM 10.0, the strictfp attribute could be used on individual
4515 // callsites within a function that did not also have the strictfp attribute.
4516 // Since 10.0, if strict FP semantics are needed within a function, the
4517 // function must have the strictfp attribute and all calls within the function
4518 // must also have the strictfp attribute. This latter restriction is
4519 // necessary to prevent unwanted libcall simplification when a function is
4520 // being cloned (such as for inlining).
4521 //
4522 // The "dangling" strictfp attribute usage was only used to prevent constant
4523 // folding and other libcall simplification. The nobuiltin attribute on the
4524 // callsite has the same effect.
4525 struct StrictFPUpgradeVisitor : public InstVisitor<StrictFPUpgradeVisitor> {
4526   StrictFPUpgradeVisitor() = default;
4527 
4528   void visitCallBase(CallBase &Call) {
4529     if (!Call.isStrictFP())
4530       return;
4531     if (isa<ConstrainedFPIntrinsic>(&Call))
4532       return;
4533     // If we get here, the caller doesn't have the strictfp attribute
4534     // but this callsite does. Replace the strictfp attribute with nobuiltin.
4535     Call.removeFnAttr(Attribute::StrictFP);
4536     Call.addFnAttr(Attribute::NoBuiltin);
4537   }
4538 };
4539 } // namespace
4540 
4541 void llvm::UpgradeFunctionAttributes(Function &F) {
4542   // If a function definition doesn't have the strictfp attribute,
4543   // convert any callsite strictfp attributes to nobuiltin.
4544   if (!F.isDeclaration() && !F.hasFnAttribute(Attribute::StrictFP)) {
4545     StrictFPUpgradeVisitor SFPV;
4546     SFPV.visit(F);
4547   }
4548 
4549   // Remove all incompatibile attributes from function.
4550   F.removeRetAttrs(AttributeFuncs::typeIncompatible(F.getReturnType()));
4551   for (auto &Arg : F.args())
4552     Arg.removeAttrs(AttributeFuncs::typeIncompatible(Arg.getType()));
4553 }
4554 
4555 static bool isOldLoopArgument(Metadata *MD) {
4556   auto *T = dyn_cast_or_null<MDTuple>(MD);
4557   if (!T)
4558     return false;
4559   if (T->getNumOperands() < 1)
4560     return false;
4561   auto *S = dyn_cast_or_null<MDString>(T->getOperand(0));
4562   if (!S)
4563     return false;
4564   return S->getString().startswith("llvm.vectorizer.");
4565 }
4566 
4567 static MDString *upgradeLoopTag(LLVMContext &C, StringRef OldTag) {
4568   StringRef OldPrefix = "llvm.vectorizer.";
4569   assert(OldTag.startswith(OldPrefix) && "Expected old prefix");
4570 
4571   if (OldTag == "llvm.vectorizer.unroll")
4572     return MDString::get(C, "llvm.loop.interleave.count");
4573 
4574   return MDString::get(
4575       C, (Twine("llvm.loop.vectorize.") + OldTag.drop_front(OldPrefix.size()))
4576              .str());
4577 }
4578 
4579 static Metadata *upgradeLoopArgument(Metadata *MD) {
4580   auto *T = dyn_cast_or_null<MDTuple>(MD);
4581   if (!T)
4582     return MD;
4583   if (T->getNumOperands() < 1)
4584     return MD;
4585   auto *OldTag = dyn_cast_or_null<MDString>(T->getOperand(0));
4586   if (!OldTag)
4587     return MD;
4588   if (!OldTag->getString().startswith("llvm.vectorizer."))
4589     return MD;
4590 
4591   // This has an old tag.  Upgrade it.
4592   SmallVector<Metadata *, 8> Ops;
4593   Ops.reserve(T->getNumOperands());
4594   Ops.push_back(upgradeLoopTag(T->getContext(), OldTag->getString()));
4595   for (unsigned I = 1, E = T->getNumOperands(); I != E; ++I)
4596     Ops.push_back(T->getOperand(I));
4597 
4598   return MDTuple::get(T->getContext(), Ops);
4599 }
4600 
4601 MDNode *llvm::upgradeInstructionLoopAttachment(MDNode &N) {
4602   auto *T = dyn_cast<MDTuple>(&N);
4603   if (!T)
4604     return &N;
4605 
4606   if (none_of(T->operands(), isOldLoopArgument))
4607     return &N;
4608 
4609   SmallVector<Metadata *, 8> Ops;
4610   Ops.reserve(T->getNumOperands());
4611   for (Metadata *MD : T->operands())
4612     Ops.push_back(upgradeLoopArgument(MD));
4613 
4614   return MDTuple::get(T->getContext(), Ops);
4615 }
4616 
4617 std::string llvm::UpgradeDataLayoutString(StringRef DL, StringRef TT) {
4618   Triple T(TT);
4619   // For AMDGPU we uprgrade older DataLayouts to include the default globals
4620   // address space of 1.
4621   if (T.isAMDGPU() && !DL.contains("-G") && !DL.startswith("G")) {
4622     return DL.empty() ? std::string("G1") : (DL + "-G1").str();
4623   }
4624 
4625   std::string Res = DL.str();
4626   if (!T.isX86())
4627     return Res;
4628 
4629   // If the datalayout matches the expected format, add pointer size address
4630   // spaces to the datalayout.
4631   std::string AddrSpaces = "-p270:32:32-p271:32:32-p272:64:64";
4632   if (!DL.contains(AddrSpaces)) {
4633     SmallVector<StringRef, 4> Groups;
4634     Regex R("(e-m:[a-z](-p:32:32)?)(-[if]64:.*$)");
4635     if (R.match(DL, &Groups))
4636       Res = (Groups[1] + AddrSpaces + Groups[3]).str();
4637   }
4638 
4639   // For 32-bit MSVC targets, raise the alignment of f80 values to 16 bytes.
4640   // Raising the alignment is safe because Clang did not produce f80 values in
4641   // the MSVC environment before this upgrade was added.
4642   if (T.isWindowsMSVCEnvironment() && !T.isArch64Bit()) {
4643     StringRef Ref = Res;
4644     auto I = Ref.find("-f80:32-");
4645     if (I != StringRef::npos)
4646       Res = (Ref.take_front(I) + "-f80:128-" + Ref.drop_front(I + 8)).str();
4647   }
4648 
4649   return Res;
4650 }
4651 
4652 void llvm::UpgradeAttributes(AttrBuilder &B) {
4653   StringRef FramePointer;
4654   Attribute A = B.getAttribute("no-frame-pointer-elim");
4655   if (A.isValid()) {
4656     // The value can be "true" or "false".
4657     FramePointer = A.getValueAsString() == "true" ? "all" : "none";
4658     B.removeAttribute("no-frame-pointer-elim");
4659   }
4660   if (B.contains("no-frame-pointer-elim-non-leaf")) {
4661     // The value is ignored. "no-frame-pointer-elim"="true" takes priority.
4662     if (FramePointer != "all")
4663       FramePointer = "non-leaf";
4664     B.removeAttribute("no-frame-pointer-elim-non-leaf");
4665   }
4666   if (!FramePointer.empty())
4667     B.addAttribute("frame-pointer", FramePointer);
4668 
4669   A = B.getAttribute("null-pointer-is-valid");
4670   if (A.isValid()) {
4671     // The value can be "true" or "false".
4672     bool NullPointerIsValid = A.getValueAsString() == "true";
4673     B.removeAttribute("null-pointer-is-valid");
4674     if (NullPointerIsValid)
4675       B.addAttribute(Attribute::NullPointerIsValid);
4676   }
4677 }
4678