1 //===-- AutoUpgrade.cpp - Implement auto-upgrade helper functions ---------===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 // This file implements the auto-upgrade helper functions.
10 // This is where deprecated IR intrinsics and other IR features are updated to
11 // current specifications.
12 //
13 //===----------------------------------------------------------------------===//
14 
15 #include "llvm/IR/AutoUpgrade.h"
16 #include "llvm/ADT/StringSwitch.h"
17 #include "llvm/IR/Constants.h"
18 #include "llvm/IR/DIBuilder.h"
19 #include "llvm/IR/DebugInfo.h"
20 #include "llvm/IR/DiagnosticInfo.h"
21 #include "llvm/IR/Function.h"
22 #include "llvm/IR/IRBuilder.h"
23 #include "llvm/IR/Instruction.h"
24 #include "llvm/IR/IntrinsicInst.h"
25 #include "llvm/IR/IntrinsicsAArch64.h"
26 #include "llvm/IR/IntrinsicsARM.h"
27 #include "llvm/IR/IntrinsicsX86.h"
28 #include "llvm/IR/LLVMContext.h"
29 #include "llvm/IR/Module.h"
30 #include "llvm/IR/Verifier.h"
31 #include "llvm/Support/ErrorHandling.h"
32 #include "llvm/Support/Regex.h"
33 #include <cstring>
34 using namespace llvm;
35 
36 static void rename(GlobalValue *GV) { GV->setName(GV->getName() + ".old"); }
37 
38 // Upgrade the declarations of the SSE4.1 ptest intrinsics whose arguments have
39 // changed their type from v4f32 to v2i64.
40 static bool UpgradePTESTIntrinsic(Function* F, Intrinsic::ID IID,
41                                   Function *&NewFn) {
42   // Check whether this is an old version of the function, which received
43   // v4f32 arguments.
44   Type *Arg0Type = F->getFunctionType()->getParamType(0);
45   if (Arg0Type != VectorType::get(Type::getFloatTy(F->getContext()), 4))
46     return false;
47 
48   // Yes, it's old, replace it with new version.
49   rename(F);
50   NewFn = Intrinsic::getDeclaration(F->getParent(), IID);
51   return true;
52 }
53 
54 // Upgrade the declarations of intrinsic functions whose 8-bit immediate mask
55 // arguments have changed their type from i32 to i8.
56 static bool UpgradeX86IntrinsicsWith8BitMask(Function *F, Intrinsic::ID IID,
57                                              Function *&NewFn) {
58   // Check that the last argument is an i32.
59   Type *LastArgType = F->getFunctionType()->getParamType(
60      F->getFunctionType()->getNumParams() - 1);
61   if (!LastArgType->isIntegerTy(32))
62     return false;
63 
64   // Move this function aside and map down.
65   rename(F);
66   NewFn = Intrinsic::getDeclaration(F->getParent(), IID);
67   return true;
68 }
69 
70 static bool ShouldUpgradeX86Intrinsic(Function *F, StringRef Name) {
71   // All of the intrinsics matches below should be marked with which llvm
72   // version started autoupgrading them. At some point in the future we would
73   // like to use this information to remove upgrade code for some older
74   // intrinsics. It is currently undecided how we will determine that future
75   // point.
76   if (Name == "addcarryx.u32" || // Added in 8.0
77       Name == "addcarryx.u64" || // Added in 8.0
78       Name == "addcarry.u32" || // Added in 8.0
79       Name == "addcarry.u64" || // Added in 8.0
80       Name == "subborrow.u32" || // Added in 8.0
81       Name == "subborrow.u64" || // Added in 8.0
82       Name.startswith("sse2.padds.") || // Added in 8.0
83       Name.startswith("sse2.psubs.") || // Added in 8.0
84       Name.startswith("sse2.paddus.") || // Added in 8.0
85       Name.startswith("sse2.psubus.") || // Added in 8.0
86       Name.startswith("avx2.padds.") || // Added in 8.0
87       Name.startswith("avx2.psubs.") || // Added in 8.0
88       Name.startswith("avx2.paddus.") || // Added in 8.0
89       Name.startswith("avx2.psubus.") || // Added in 8.0
90       Name.startswith("avx512.padds.") || // Added in 8.0
91       Name.startswith("avx512.psubs.") || // Added in 8.0
92       Name.startswith("avx512.mask.padds.") || // Added in 8.0
93       Name.startswith("avx512.mask.psubs.") || // Added in 8.0
94       Name.startswith("avx512.mask.paddus.") || // Added in 8.0
95       Name.startswith("avx512.mask.psubus.") || // Added in 8.0
96       Name=="ssse3.pabs.b.128" || // Added in 6.0
97       Name=="ssse3.pabs.w.128" || // Added in 6.0
98       Name=="ssse3.pabs.d.128" || // Added in 6.0
99       Name.startswith("fma4.vfmadd.s") || // Added in 7.0
100       Name.startswith("fma.vfmadd.") || // Added in 7.0
101       Name.startswith("fma.vfmsub.") || // Added in 7.0
102       Name.startswith("fma.vfmaddsub.") || // Added in 7.0
103       Name.startswith("fma.vfmsubadd.") || // Added in 7.0
104       Name.startswith("fma.vfnmadd.") || // Added in 7.0
105       Name.startswith("fma.vfnmsub.") || // Added in 7.0
106       Name.startswith("avx512.mask.vfmadd.") || // Added in 7.0
107       Name.startswith("avx512.mask.vfnmadd.") || // Added in 7.0
108       Name.startswith("avx512.mask.vfnmsub.") || // Added in 7.0
109       Name.startswith("avx512.mask3.vfmadd.") || // Added in 7.0
110       Name.startswith("avx512.maskz.vfmadd.") || // Added in 7.0
111       Name.startswith("avx512.mask3.vfmsub.") || // Added in 7.0
112       Name.startswith("avx512.mask3.vfnmsub.") || // Added in 7.0
113       Name.startswith("avx512.mask.vfmaddsub.") || // Added in 7.0
114       Name.startswith("avx512.maskz.vfmaddsub.") || // Added in 7.0
115       Name.startswith("avx512.mask3.vfmaddsub.") || // Added in 7.0
116       Name.startswith("avx512.mask3.vfmsubadd.") || // Added in 7.0
117       Name.startswith("avx512.mask.shuf.i") || // Added in 6.0
118       Name.startswith("avx512.mask.shuf.f") || // Added in 6.0
119       Name.startswith("avx512.kunpck") || //added in 6.0
120       Name.startswith("avx2.pabs.") || // Added in 6.0
121       Name.startswith("avx512.mask.pabs.") || // Added in 6.0
122       Name.startswith("avx512.broadcastm") || // Added in 6.0
123       Name == "sse.sqrt.ss" || // Added in 7.0
124       Name == "sse2.sqrt.sd" || // Added in 7.0
125       Name.startswith("avx512.mask.sqrt.p") || // Added in 7.0
126       Name.startswith("avx.sqrt.p") || // Added in 7.0
127       Name.startswith("sse2.sqrt.p") || // Added in 7.0
128       Name.startswith("sse.sqrt.p") || // Added in 7.0
129       Name.startswith("avx512.mask.pbroadcast") || // Added in 6.0
130       Name.startswith("sse2.pcmpeq.") || // Added in 3.1
131       Name.startswith("sse2.pcmpgt.") || // Added in 3.1
132       Name.startswith("avx2.pcmpeq.") || // Added in 3.1
133       Name.startswith("avx2.pcmpgt.") || // Added in 3.1
134       Name.startswith("avx512.mask.pcmpeq.") || // Added in 3.9
135       Name.startswith("avx512.mask.pcmpgt.") || // Added in 3.9
136       Name.startswith("avx.vperm2f128.") || // Added in 6.0
137       Name == "avx2.vperm2i128" || // Added in 6.0
138       Name == "sse.add.ss" || // Added in 4.0
139       Name == "sse2.add.sd" || // Added in 4.0
140       Name == "sse.sub.ss" || // Added in 4.0
141       Name == "sse2.sub.sd" || // Added in 4.0
142       Name == "sse.mul.ss" || // Added in 4.0
143       Name == "sse2.mul.sd" || // Added in 4.0
144       Name == "sse.div.ss" || // Added in 4.0
145       Name == "sse2.div.sd" || // Added in 4.0
146       Name == "sse41.pmaxsb" || // Added in 3.9
147       Name == "sse2.pmaxs.w" || // Added in 3.9
148       Name == "sse41.pmaxsd" || // Added in 3.9
149       Name == "sse2.pmaxu.b" || // Added in 3.9
150       Name == "sse41.pmaxuw" || // Added in 3.9
151       Name == "sse41.pmaxud" || // Added in 3.9
152       Name == "sse41.pminsb" || // Added in 3.9
153       Name == "sse2.pmins.w" || // Added in 3.9
154       Name == "sse41.pminsd" || // Added in 3.9
155       Name == "sse2.pminu.b" || // Added in 3.9
156       Name == "sse41.pminuw" || // Added in 3.9
157       Name == "sse41.pminud" || // Added in 3.9
158       Name == "avx512.kand.w" || // Added in 7.0
159       Name == "avx512.kandn.w" || // Added in 7.0
160       Name == "avx512.knot.w" || // Added in 7.0
161       Name == "avx512.kor.w" || // Added in 7.0
162       Name == "avx512.kxor.w" || // Added in 7.0
163       Name == "avx512.kxnor.w" || // Added in 7.0
164       Name == "avx512.kortestc.w" || // Added in 7.0
165       Name == "avx512.kortestz.w" || // Added in 7.0
166       Name.startswith("avx512.mask.pshuf.b.") || // Added in 4.0
167       Name.startswith("avx2.pmax") || // Added in 3.9
168       Name.startswith("avx2.pmin") || // Added in 3.9
169       Name.startswith("avx512.mask.pmax") || // Added in 4.0
170       Name.startswith("avx512.mask.pmin") || // Added in 4.0
171       Name.startswith("avx2.vbroadcast") || // Added in 3.8
172       Name.startswith("avx2.pbroadcast") || // Added in 3.8
173       Name.startswith("avx.vpermil.") || // Added in 3.1
174       Name.startswith("sse2.pshuf") || // Added in 3.9
175       Name.startswith("avx512.pbroadcast") || // Added in 3.9
176       Name.startswith("avx512.mask.broadcast.s") || // Added in 3.9
177       Name.startswith("avx512.mask.movddup") || // Added in 3.9
178       Name.startswith("avx512.mask.movshdup") || // Added in 3.9
179       Name.startswith("avx512.mask.movsldup") || // Added in 3.9
180       Name.startswith("avx512.mask.pshuf.d.") || // Added in 3.9
181       Name.startswith("avx512.mask.pshufl.w.") || // Added in 3.9
182       Name.startswith("avx512.mask.pshufh.w.") || // Added in 3.9
183       Name.startswith("avx512.mask.shuf.p") || // Added in 4.0
184       Name.startswith("avx512.mask.vpermil.p") || // Added in 3.9
185       Name.startswith("avx512.mask.perm.df.") || // Added in 3.9
186       Name.startswith("avx512.mask.perm.di.") || // Added in 3.9
187       Name.startswith("avx512.mask.punpckl") || // Added in 3.9
188       Name.startswith("avx512.mask.punpckh") || // Added in 3.9
189       Name.startswith("avx512.mask.unpckl.") || // Added in 3.9
190       Name.startswith("avx512.mask.unpckh.") || // Added in 3.9
191       Name.startswith("avx512.mask.pand.") || // Added in 3.9
192       Name.startswith("avx512.mask.pandn.") || // Added in 3.9
193       Name.startswith("avx512.mask.por.") || // Added in 3.9
194       Name.startswith("avx512.mask.pxor.") || // Added in 3.9
195       Name.startswith("avx512.mask.and.") || // Added in 3.9
196       Name.startswith("avx512.mask.andn.") || // Added in 3.9
197       Name.startswith("avx512.mask.or.") || // Added in 3.9
198       Name.startswith("avx512.mask.xor.") || // Added in 3.9
199       Name.startswith("avx512.mask.padd.") || // Added in 4.0
200       Name.startswith("avx512.mask.psub.") || // Added in 4.0
201       Name.startswith("avx512.mask.pmull.") || // Added in 4.0
202       Name.startswith("avx512.mask.cvtdq2pd.") || // Added in 4.0
203       Name.startswith("avx512.mask.cvtudq2pd.") || // Added in 4.0
204       Name.startswith("avx512.mask.cvtudq2ps.") || // Added in 7.0 updated 9.0
205       Name.startswith("avx512.mask.cvtqq2pd.") || // Added in 7.0 updated 9.0
206       Name.startswith("avx512.mask.cvtuqq2pd.") || // Added in 7.0 updated 9.0
207       Name.startswith("avx512.mask.cvtdq2ps.") || // Added in 7.0 updated 9.0
208       Name == "avx512.mask.cvtqq2ps.256" || // Added in 9.0
209       Name == "avx512.mask.cvtqq2ps.512" || // Added in 9.0
210       Name == "avx512.mask.cvtuqq2ps.256" || // Added in 9.0
211       Name == "avx512.mask.cvtuqq2ps.512" || // Added in 9.0
212       Name == "avx512.mask.cvtpd2dq.256" || // Added in 7.0
213       Name == "avx512.mask.cvtpd2ps.256" || // Added in 7.0
214       Name == "avx512.mask.cvttpd2dq.256" || // Added in 7.0
215       Name == "avx512.mask.cvttps2dq.128" || // Added in 7.0
216       Name == "avx512.mask.cvttps2dq.256" || // Added in 7.0
217       Name == "avx512.mask.cvtps2pd.128" || // Added in 7.0
218       Name == "avx512.mask.cvtps2pd.256" || // Added in 7.0
219       Name == "avx512.cvtusi2sd" || // Added in 7.0
220       Name.startswith("avx512.mask.permvar.") || // Added in 7.0
221       Name == "sse2.pmulu.dq" || // Added in 7.0
222       Name == "sse41.pmuldq" || // Added in 7.0
223       Name == "avx2.pmulu.dq" || // Added in 7.0
224       Name == "avx2.pmul.dq" || // Added in 7.0
225       Name == "avx512.pmulu.dq.512" || // Added in 7.0
226       Name == "avx512.pmul.dq.512" || // Added in 7.0
227       Name.startswith("avx512.mask.pmul.dq.") || // Added in 4.0
228       Name.startswith("avx512.mask.pmulu.dq.") || // Added in 4.0
229       Name.startswith("avx512.mask.pmul.hr.sw.") || // Added in 7.0
230       Name.startswith("avx512.mask.pmulh.w.") || // Added in 7.0
231       Name.startswith("avx512.mask.pmulhu.w.") || // Added in 7.0
232       Name.startswith("avx512.mask.pmaddw.d.") || // Added in 7.0
233       Name.startswith("avx512.mask.pmaddubs.w.") || // Added in 7.0
234       Name.startswith("avx512.mask.packsswb.") || // Added in 5.0
235       Name.startswith("avx512.mask.packssdw.") || // Added in 5.0
236       Name.startswith("avx512.mask.packuswb.") || // Added in 5.0
237       Name.startswith("avx512.mask.packusdw.") || // Added in 5.0
238       Name.startswith("avx512.mask.cmp.b") || // Added in 5.0
239       Name.startswith("avx512.mask.cmp.d") || // Added in 5.0
240       Name.startswith("avx512.mask.cmp.q") || // Added in 5.0
241       Name.startswith("avx512.mask.cmp.w") || // Added in 5.0
242       Name.startswith("avx512.mask.cmp.p") || // Added in 7.0
243       Name.startswith("avx512.mask.ucmp.") || // Added in 5.0
244       Name.startswith("avx512.cvtb2mask.") || // Added in 7.0
245       Name.startswith("avx512.cvtw2mask.") || // Added in 7.0
246       Name.startswith("avx512.cvtd2mask.") || // Added in 7.0
247       Name.startswith("avx512.cvtq2mask.") || // Added in 7.0
248       Name.startswith("avx512.mask.vpermilvar.") || // Added in 4.0
249       Name.startswith("avx512.mask.psll.d") || // Added in 4.0
250       Name.startswith("avx512.mask.psll.q") || // Added in 4.0
251       Name.startswith("avx512.mask.psll.w") || // Added in 4.0
252       Name.startswith("avx512.mask.psra.d") || // Added in 4.0
253       Name.startswith("avx512.mask.psra.q") || // Added in 4.0
254       Name.startswith("avx512.mask.psra.w") || // Added in 4.0
255       Name.startswith("avx512.mask.psrl.d") || // Added in 4.0
256       Name.startswith("avx512.mask.psrl.q") || // Added in 4.0
257       Name.startswith("avx512.mask.psrl.w") || // Added in 4.0
258       Name.startswith("avx512.mask.pslli") || // Added in 4.0
259       Name.startswith("avx512.mask.psrai") || // Added in 4.0
260       Name.startswith("avx512.mask.psrli") || // Added in 4.0
261       Name.startswith("avx512.mask.psllv") || // Added in 4.0
262       Name.startswith("avx512.mask.psrav") || // Added in 4.0
263       Name.startswith("avx512.mask.psrlv") || // Added in 4.0
264       Name.startswith("sse41.pmovsx") || // Added in 3.8
265       Name.startswith("sse41.pmovzx") || // Added in 3.9
266       Name.startswith("avx2.pmovsx") || // Added in 3.9
267       Name.startswith("avx2.pmovzx") || // Added in 3.9
268       Name.startswith("avx512.mask.pmovsx") || // Added in 4.0
269       Name.startswith("avx512.mask.pmovzx") || // Added in 4.0
270       Name.startswith("avx512.mask.lzcnt.") || // Added in 5.0
271       Name.startswith("avx512.mask.pternlog.") || // Added in 7.0
272       Name.startswith("avx512.maskz.pternlog.") || // Added in 7.0
273       Name.startswith("avx512.mask.vpmadd52") || // Added in 7.0
274       Name.startswith("avx512.maskz.vpmadd52") || // Added in 7.0
275       Name.startswith("avx512.mask.vpermi2var.") || // Added in 7.0
276       Name.startswith("avx512.mask.vpermt2var.") || // Added in 7.0
277       Name.startswith("avx512.maskz.vpermt2var.") || // Added in 7.0
278       Name.startswith("avx512.mask.vpdpbusd.") || // Added in 7.0
279       Name.startswith("avx512.maskz.vpdpbusd.") || // Added in 7.0
280       Name.startswith("avx512.mask.vpdpbusds.") || // Added in 7.0
281       Name.startswith("avx512.maskz.vpdpbusds.") || // Added in 7.0
282       Name.startswith("avx512.mask.vpdpwssd.") || // Added in 7.0
283       Name.startswith("avx512.maskz.vpdpwssd.") || // Added in 7.0
284       Name.startswith("avx512.mask.vpdpwssds.") || // Added in 7.0
285       Name.startswith("avx512.maskz.vpdpwssds.") || // Added in 7.0
286       Name.startswith("avx512.mask.dbpsadbw.") || // Added in 7.0
287       Name.startswith("avx512.mask.vpshld.") || // Added in 7.0
288       Name.startswith("avx512.mask.vpshrd.") || // Added in 7.0
289       Name.startswith("avx512.mask.vpshldv.") || // Added in 8.0
290       Name.startswith("avx512.mask.vpshrdv.") || // Added in 8.0
291       Name.startswith("avx512.maskz.vpshldv.") || // Added in 8.0
292       Name.startswith("avx512.maskz.vpshrdv.") || // Added in 8.0
293       Name.startswith("avx512.vpshld.") || // Added in 8.0
294       Name.startswith("avx512.vpshrd.") || // Added in 8.0
295       Name.startswith("avx512.mask.add.p") || // Added in 7.0. 128/256 in 4.0
296       Name.startswith("avx512.mask.sub.p") || // Added in 7.0. 128/256 in 4.0
297       Name.startswith("avx512.mask.mul.p") || // Added in 7.0. 128/256 in 4.0
298       Name.startswith("avx512.mask.div.p") || // Added in 7.0. 128/256 in 4.0
299       Name.startswith("avx512.mask.max.p") || // Added in 7.0. 128/256 in 5.0
300       Name.startswith("avx512.mask.min.p") || // Added in 7.0. 128/256 in 5.0
301       Name.startswith("avx512.mask.fpclass.p") || // Added in 7.0
302       Name.startswith("avx512.mask.vpshufbitqmb.") || // Added in 8.0
303       Name.startswith("avx512.mask.pmultishift.qb.") || // Added in 8.0
304       Name.startswith("avx512.mask.conflict.") || // Added in 9.0
305       Name == "avx512.mask.pmov.qd.256" || // Added in 9.0
306       Name == "avx512.mask.pmov.qd.512" || // Added in 9.0
307       Name == "avx512.mask.pmov.wb.256" || // Added in 9.0
308       Name == "avx512.mask.pmov.wb.512" || // Added in 9.0
309       Name == "sse.cvtsi2ss" || // Added in 7.0
310       Name == "sse.cvtsi642ss" || // Added in 7.0
311       Name == "sse2.cvtsi2sd" || // Added in 7.0
312       Name == "sse2.cvtsi642sd" || // Added in 7.0
313       Name == "sse2.cvtss2sd" || // Added in 7.0
314       Name == "sse2.cvtdq2pd" || // Added in 3.9
315       Name == "sse2.cvtdq2ps" || // Added in 7.0
316       Name == "sse2.cvtps2pd" || // Added in 3.9
317       Name == "avx.cvtdq2.pd.256" || // Added in 3.9
318       Name == "avx.cvtdq2.ps.256" || // Added in 7.0
319       Name == "avx.cvt.ps2.pd.256" || // Added in 3.9
320       Name.startswith("avx.vinsertf128.") || // Added in 3.7
321       Name == "avx2.vinserti128" || // Added in 3.7
322       Name.startswith("avx512.mask.insert") || // Added in 4.0
323       Name.startswith("avx.vextractf128.") || // Added in 3.7
324       Name == "avx2.vextracti128" || // Added in 3.7
325       Name.startswith("avx512.mask.vextract") || // Added in 4.0
326       Name.startswith("sse4a.movnt.") || // Added in 3.9
327       Name.startswith("avx.movnt.") || // Added in 3.2
328       Name.startswith("avx512.storent.") || // Added in 3.9
329       Name == "sse41.movntdqa" || // Added in 5.0
330       Name == "avx2.movntdqa" || // Added in 5.0
331       Name == "avx512.movntdqa" || // Added in 5.0
332       Name == "sse2.storel.dq" || // Added in 3.9
333       Name.startswith("sse.storeu.") || // Added in 3.9
334       Name.startswith("sse2.storeu.") || // Added in 3.9
335       Name.startswith("avx.storeu.") || // Added in 3.9
336       Name.startswith("avx512.mask.storeu.") || // Added in 3.9
337       Name.startswith("avx512.mask.store.p") || // Added in 3.9
338       Name.startswith("avx512.mask.store.b.") || // Added in 3.9
339       Name.startswith("avx512.mask.store.w.") || // Added in 3.9
340       Name.startswith("avx512.mask.store.d.") || // Added in 3.9
341       Name.startswith("avx512.mask.store.q.") || // Added in 3.9
342       Name == "avx512.mask.store.ss" || // Added in 7.0
343       Name.startswith("avx512.mask.loadu.") || // Added in 3.9
344       Name.startswith("avx512.mask.load.") || // Added in 3.9
345       Name.startswith("avx512.mask.expand.load.") || // Added in 7.0
346       Name.startswith("avx512.mask.compress.store.") || // Added in 7.0
347       Name.startswith("avx512.mask.expand.b") || // Added in 9.0
348       Name.startswith("avx512.mask.expand.w") || // Added in 9.0
349       Name.startswith("avx512.mask.expand.d") || // Added in 9.0
350       Name.startswith("avx512.mask.expand.q") || // Added in 9.0
351       Name.startswith("avx512.mask.expand.p") || // Added in 9.0
352       Name.startswith("avx512.mask.compress.b") || // Added in 9.0
353       Name.startswith("avx512.mask.compress.w") || // Added in 9.0
354       Name.startswith("avx512.mask.compress.d") || // Added in 9.0
355       Name.startswith("avx512.mask.compress.q") || // Added in 9.0
356       Name.startswith("avx512.mask.compress.p") || // Added in 9.0
357       Name == "sse42.crc32.64.8" || // Added in 3.4
358       Name.startswith("avx.vbroadcast.s") || // Added in 3.5
359       Name.startswith("avx512.vbroadcast.s") || // Added in 7.0
360       Name.startswith("avx512.mask.palignr.") || // Added in 3.9
361       Name.startswith("avx512.mask.valign.") || // Added in 4.0
362       Name.startswith("sse2.psll.dq") || // Added in 3.7
363       Name.startswith("sse2.psrl.dq") || // Added in 3.7
364       Name.startswith("avx2.psll.dq") || // Added in 3.7
365       Name.startswith("avx2.psrl.dq") || // Added in 3.7
366       Name.startswith("avx512.psll.dq") || // Added in 3.9
367       Name.startswith("avx512.psrl.dq") || // Added in 3.9
368       Name == "sse41.pblendw" || // Added in 3.7
369       Name.startswith("sse41.blendp") || // Added in 3.7
370       Name.startswith("avx.blend.p") || // Added in 3.7
371       Name == "avx2.pblendw" || // Added in 3.7
372       Name.startswith("avx2.pblendd.") || // Added in 3.7
373       Name.startswith("avx.vbroadcastf128") || // Added in 4.0
374       Name == "avx2.vbroadcasti128" || // Added in 3.7
375       Name.startswith("avx512.mask.broadcastf") || // Added in 6.0
376       Name.startswith("avx512.mask.broadcasti") || // Added in 6.0
377       Name == "xop.vpcmov" || // Added in 3.8
378       Name == "xop.vpcmov.256" || // Added in 5.0
379       Name.startswith("avx512.mask.move.s") || // Added in 4.0
380       Name.startswith("avx512.cvtmask2") || // Added in 5.0
381       Name.startswith("xop.vpcom") || // Added in 3.2, Updated in 9.0
382       Name.startswith("xop.vprot") || // Added in 8.0
383       Name.startswith("avx512.prol") || // Added in 8.0
384       Name.startswith("avx512.pror") || // Added in 8.0
385       Name.startswith("avx512.mask.prorv.") || // Added in 8.0
386       Name.startswith("avx512.mask.pror.") ||  // Added in 8.0
387       Name.startswith("avx512.mask.prolv.") || // Added in 8.0
388       Name.startswith("avx512.mask.prol.") ||  // Added in 8.0
389       Name.startswith("avx512.ptestm") || //Added in 6.0
390       Name.startswith("avx512.ptestnm") || //Added in 6.0
391       Name.startswith("avx512.mask.pavg")) // Added in 6.0
392     return true;
393 
394   return false;
395 }
396 
397 static bool UpgradeX86IntrinsicFunction(Function *F, StringRef Name,
398                                         Function *&NewFn) {
399   // Only handle intrinsics that start with "x86.".
400   if (!Name.startswith("x86."))
401     return false;
402   // Remove "x86." prefix.
403   Name = Name.substr(4);
404 
405   if (ShouldUpgradeX86Intrinsic(F, Name)) {
406     NewFn = nullptr;
407     return true;
408   }
409 
410   if (Name == "rdtscp") { // Added in 8.0
411     // If this intrinsic has 0 operands, it's the new version.
412     if (F->getFunctionType()->getNumParams() == 0)
413       return false;
414 
415     rename(F);
416     NewFn = Intrinsic::getDeclaration(F->getParent(),
417                                       Intrinsic::x86_rdtscp);
418     return true;
419   }
420 
421   // SSE4.1 ptest functions may have an old signature.
422   if (Name.startswith("sse41.ptest")) { // Added in 3.2
423     if (Name.substr(11) == "c")
424       return UpgradePTESTIntrinsic(F, Intrinsic::x86_sse41_ptestc, NewFn);
425     if (Name.substr(11) == "z")
426       return UpgradePTESTIntrinsic(F, Intrinsic::x86_sse41_ptestz, NewFn);
427     if (Name.substr(11) == "nzc")
428       return UpgradePTESTIntrinsic(F, Intrinsic::x86_sse41_ptestnzc, NewFn);
429   }
430   // Several blend and other instructions with masks used the wrong number of
431   // bits.
432   if (Name == "sse41.insertps") // Added in 3.6
433     return UpgradeX86IntrinsicsWith8BitMask(F, Intrinsic::x86_sse41_insertps,
434                                             NewFn);
435   if (Name == "sse41.dppd") // Added in 3.6
436     return UpgradeX86IntrinsicsWith8BitMask(F, Intrinsic::x86_sse41_dppd,
437                                             NewFn);
438   if (Name == "sse41.dpps") // Added in 3.6
439     return UpgradeX86IntrinsicsWith8BitMask(F, Intrinsic::x86_sse41_dpps,
440                                             NewFn);
441   if (Name == "sse41.mpsadbw") // Added in 3.6
442     return UpgradeX86IntrinsicsWith8BitMask(F, Intrinsic::x86_sse41_mpsadbw,
443                                             NewFn);
444   if (Name == "avx.dp.ps.256") // Added in 3.6
445     return UpgradeX86IntrinsicsWith8BitMask(F, Intrinsic::x86_avx_dp_ps_256,
446                                             NewFn);
447   if (Name == "avx2.mpsadbw") // Added in 3.6
448     return UpgradeX86IntrinsicsWith8BitMask(F, Intrinsic::x86_avx2_mpsadbw,
449                                             NewFn);
450 
451   // frcz.ss/sd may need to have an argument dropped. Added in 3.2
452   if (Name.startswith("xop.vfrcz.ss") && F->arg_size() == 2) {
453     rename(F);
454     NewFn = Intrinsic::getDeclaration(F->getParent(),
455                                       Intrinsic::x86_xop_vfrcz_ss);
456     return true;
457   }
458   if (Name.startswith("xop.vfrcz.sd") && F->arg_size() == 2) {
459     rename(F);
460     NewFn = Intrinsic::getDeclaration(F->getParent(),
461                                       Intrinsic::x86_xop_vfrcz_sd);
462     return true;
463   }
464   // Upgrade any XOP PERMIL2 index operand still using a float/double vector.
465   if (Name.startswith("xop.vpermil2")) { // Added in 3.9
466     auto Idx = F->getFunctionType()->getParamType(2);
467     if (Idx->isFPOrFPVectorTy()) {
468       rename(F);
469       unsigned IdxSize = Idx->getPrimitiveSizeInBits();
470       unsigned EltSize = Idx->getScalarSizeInBits();
471       Intrinsic::ID Permil2ID;
472       if (EltSize == 64 && IdxSize == 128)
473         Permil2ID = Intrinsic::x86_xop_vpermil2pd;
474       else if (EltSize == 32 && IdxSize == 128)
475         Permil2ID = Intrinsic::x86_xop_vpermil2ps;
476       else if (EltSize == 64 && IdxSize == 256)
477         Permil2ID = Intrinsic::x86_xop_vpermil2pd_256;
478       else
479         Permil2ID = Intrinsic::x86_xop_vpermil2ps_256;
480       NewFn = Intrinsic::getDeclaration(F->getParent(), Permil2ID);
481       return true;
482     }
483   }
484 
485   if (Name == "seh.recoverfp") {
486     NewFn = Intrinsic::getDeclaration(F->getParent(), Intrinsic::eh_recoverfp);
487     return true;
488   }
489 
490   return false;
491 }
492 
493 static bool UpgradeIntrinsicFunction1(Function *F, Function *&NewFn) {
494   assert(F && "Illegal to upgrade a non-existent Function.");
495 
496   // Quickly eliminate it, if it's not a candidate.
497   StringRef Name = F->getName();
498   if (Name.size() <= 8 || !Name.startswith("llvm."))
499     return false;
500   Name = Name.substr(5); // Strip off "llvm."
501 
502   switch (Name[0]) {
503   default: break;
504   case 'a': {
505     if (Name.startswith("arm.rbit") || Name.startswith("aarch64.rbit")) {
506       NewFn = Intrinsic::getDeclaration(F->getParent(), Intrinsic::bitreverse,
507                                         F->arg_begin()->getType());
508       return true;
509     }
510     if (Name.startswith("arm.neon.vclz")) {
511       Type* args[2] = {
512         F->arg_begin()->getType(),
513         Type::getInt1Ty(F->getContext())
514       };
515       // Can't use Intrinsic::getDeclaration here as it adds a ".i1" to
516       // the end of the name. Change name from llvm.arm.neon.vclz.* to
517       //  llvm.ctlz.*
518       FunctionType* fType = FunctionType::get(F->getReturnType(), args, false);
519       NewFn = Function::Create(fType, F->getLinkage(), F->getAddressSpace(),
520                                "llvm.ctlz." + Name.substr(14), F->getParent());
521       return true;
522     }
523     if (Name.startswith("arm.neon.vcnt")) {
524       NewFn = Intrinsic::getDeclaration(F->getParent(), Intrinsic::ctpop,
525                                         F->arg_begin()->getType());
526       return true;
527     }
528     static const Regex vldRegex("^arm\\.neon\\.vld([1234]|[234]lane)\\.v[a-z0-9]*$");
529     if (vldRegex.match(Name)) {
530       auto fArgs = F->getFunctionType()->params();
531       SmallVector<Type *, 4> Tys(fArgs.begin(), fArgs.end());
532       // Can't use Intrinsic::getDeclaration here as the return types might
533       // then only be structurally equal.
534       FunctionType* fType = FunctionType::get(F->getReturnType(), Tys, false);
535       NewFn = Function::Create(fType, F->getLinkage(), F->getAddressSpace(),
536                                "llvm." + Name + ".p0i8", F->getParent());
537       return true;
538     }
539     static const Regex vstRegex("^arm\\.neon\\.vst([1234]|[234]lane)\\.v[a-z0-9]*$");
540     if (vstRegex.match(Name)) {
541       static const Intrinsic::ID StoreInts[] = {Intrinsic::arm_neon_vst1,
542                                                 Intrinsic::arm_neon_vst2,
543                                                 Intrinsic::arm_neon_vst3,
544                                                 Intrinsic::arm_neon_vst4};
545 
546       static const Intrinsic::ID StoreLaneInts[] = {
547         Intrinsic::arm_neon_vst2lane, Intrinsic::arm_neon_vst3lane,
548         Intrinsic::arm_neon_vst4lane
549       };
550 
551       auto fArgs = F->getFunctionType()->params();
552       Type *Tys[] = {fArgs[0], fArgs[1]};
553       if (Name.find("lane") == StringRef::npos)
554         NewFn = Intrinsic::getDeclaration(F->getParent(),
555                                           StoreInts[fArgs.size() - 3], Tys);
556       else
557         NewFn = Intrinsic::getDeclaration(F->getParent(),
558                                           StoreLaneInts[fArgs.size() - 5], Tys);
559       return true;
560     }
561     if (Name == "aarch64.thread.pointer" || Name == "arm.thread.pointer") {
562       NewFn = Intrinsic::getDeclaration(F->getParent(), Intrinsic::thread_pointer);
563       return true;
564     }
565     if (Name.startswith("arm.neon.vqadds.")) {
566       NewFn = Intrinsic::getDeclaration(F->getParent(), Intrinsic::sadd_sat,
567                                         F->arg_begin()->getType());
568       return true;
569     }
570     if (Name.startswith("arm.neon.vqaddu.")) {
571       NewFn = Intrinsic::getDeclaration(F->getParent(), Intrinsic::uadd_sat,
572                                         F->arg_begin()->getType());
573       return true;
574     }
575     if (Name.startswith("arm.neon.vqsubs.")) {
576       NewFn = Intrinsic::getDeclaration(F->getParent(), Intrinsic::ssub_sat,
577                                         F->arg_begin()->getType());
578       return true;
579     }
580     if (Name.startswith("arm.neon.vqsubu.")) {
581       NewFn = Intrinsic::getDeclaration(F->getParent(), Intrinsic::usub_sat,
582                                         F->arg_begin()->getType());
583       return true;
584     }
585     if (Name.startswith("aarch64.neon.addp")) {
586       if (F->arg_size() != 2)
587         break; // Invalid IR.
588       VectorType *Ty = dyn_cast<VectorType>(F->getReturnType());
589       if (Ty && Ty->getElementType()->isFloatingPointTy()) {
590         NewFn = Intrinsic::getDeclaration(F->getParent(),
591                                           Intrinsic::aarch64_neon_faddp, Ty);
592         return true;
593       }
594     }
595     break;
596   }
597 
598   case 'c': {
599     if (Name.startswith("ctlz.") && F->arg_size() == 1) {
600       rename(F);
601       NewFn = Intrinsic::getDeclaration(F->getParent(), Intrinsic::ctlz,
602                                         F->arg_begin()->getType());
603       return true;
604     }
605     if (Name.startswith("cttz.") && F->arg_size() == 1) {
606       rename(F);
607       NewFn = Intrinsic::getDeclaration(F->getParent(), Intrinsic::cttz,
608                                         F->arg_begin()->getType());
609       return true;
610     }
611     break;
612   }
613   case 'd': {
614     if (Name == "dbg.value" && F->arg_size() == 4) {
615       rename(F);
616       NewFn = Intrinsic::getDeclaration(F->getParent(), Intrinsic::dbg_value);
617       return true;
618     }
619     break;
620   }
621   case 'e': {
622     SmallVector<StringRef, 2> Groups;
623     static const Regex R("^experimental.vector.reduce.([a-z]+)\\.[fi][0-9]+");
624     if (R.match(Name, &Groups)) {
625       Intrinsic::ID ID = Intrinsic::not_intrinsic;
626       if (Groups[1] == "fadd")
627         ID = Intrinsic::experimental_vector_reduce_v2_fadd;
628       if (Groups[1] == "fmul")
629         ID = Intrinsic::experimental_vector_reduce_v2_fmul;
630 
631       if (ID != Intrinsic::not_intrinsic) {
632         rename(F);
633         auto Args = F->getFunctionType()->params();
634         Type *Tys[] = {F->getFunctionType()->getReturnType(), Args[1]};
635         NewFn = Intrinsic::getDeclaration(F->getParent(), ID, Tys);
636         return true;
637       }
638     }
639     break;
640   }
641   case 'i':
642   case 'l': {
643     bool IsLifetimeStart = Name.startswith("lifetime.start");
644     if (IsLifetimeStart || Name.startswith("invariant.start")) {
645       Intrinsic::ID ID = IsLifetimeStart ?
646         Intrinsic::lifetime_start : Intrinsic::invariant_start;
647       auto Args = F->getFunctionType()->params();
648       Type* ObjectPtr[1] = {Args[1]};
649       if (F->getName() != Intrinsic::getName(ID, ObjectPtr)) {
650         rename(F);
651         NewFn = Intrinsic::getDeclaration(F->getParent(), ID, ObjectPtr);
652         return true;
653       }
654     }
655 
656     bool IsLifetimeEnd = Name.startswith("lifetime.end");
657     if (IsLifetimeEnd || Name.startswith("invariant.end")) {
658       Intrinsic::ID ID = IsLifetimeEnd ?
659         Intrinsic::lifetime_end : Intrinsic::invariant_end;
660 
661       auto Args = F->getFunctionType()->params();
662       Type* ObjectPtr[1] = {Args[IsLifetimeEnd ? 1 : 2]};
663       if (F->getName() != Intrinsic::getName(ID, ObjectPtr)) {
664         rename(F);
665         NewFn = Intrinsic::getDeclaration(F->getParent(), ID, ObjectPtr);
666         return true;
667       }
668     }
669     if (Name.startswith("invariant.group.barrier")) {
670       // Rename invariant.group.barrier to launder.invariant.group
671       auto Args = F->getFunctionType()->params();
672       Type* ObjectPtr[1] = {Args[0]};
673       rename(F);
674       NewFn = Intrinsic::getDeclaration(F->getParent(),
675           Intrinsic::launder_invariant_group, ObjectPtr);
676       return true;
677 
678     }
679 
680     break;
681   }
682   case 'm': {
683     if (Name.startswith("masked.load.")) {
684       Type *Tys[] = { F->getReturnType(), F->arg_begin()->getType() };
685       if (F->getName() != Intrinsic::getName(Intrinsic::masked_load, Tys)) {
686         rename(F);
687         NewFn = Intrinsic::getDeclaration(F->getParent(),
688                                           Intrinsic::masked_load,
689                                           Tys);
690         return true;
691       }
692     }
693     if (Name.startswith("masked.store.")) {
694       auto Args = F->getFunctionType()->params();
695       Type *Tys[] = { Args[0], Args[1] };
696       if (F->getName() != Intrinsic::getName(Intrinsic::masked_store, Tys)) {
697         rename(F);
698         NewFn = Intrinsic::getDeclaration(F->getParent(),
699                                           Intrinsic::masked_store,
700                                           Tys);
701         return true;
702       }
703     }
704     // Renaming gather/scatter intrinsics with no address space overloading
705     // to the new overload which includes an address space
706     if (Name.startswith("masked.gather.")) {
707       Type *Tys[] = {F->getReturnType(), F->arg_begin()->getType()};
708       if (F->getName() != Intrinsic::getName(Intrinsic::masked_gather, Tys)) {
709         rename(F);
710         NewFn = Intrinsic::getDeclaration(F->getParent(),
711                                           Intrinsic::masked_gather, Tys);
712         return true;
713       }
714     }
715     if (Name.startswith("masked.scatter.")) {
716       auto Args = F->getFunctionType()->params();
717       Type *Tys[] = {Args[0], Args[1]};
718       if (F->getName() != Intrinsic::getName(Intrinsic::masked_scatter, Tys)) {
719         rename(F);
720         NewFn = Intrinsic::getDeclaration(F->getParent(),
721                                           Intrinsic::masked_scatter, Tys);
722         return true;
723       }
724     }
725     // Updating the memory intrinsics (memcpy/memmove/memset) that have an
726     // alignment parameter to embedding the alignment as an attribute of
727     // the pointer args.
728     if (Name.startswith("memcpy.") && F->arg_size() == 5) {
729       rename(F);
730       // Get the types of dest, src, and len
731       ArrayRef<Type *> ParamTypes = F->getFunctionType()->params().slice(0, 3);
732       NewFn = Intrinsic::getDeclaration(F->getParent(), Intrinsic::memcpy,
733                                         ParamTypes);
734       return true;
735     }
736     if (Name.startswith("memmove.") && F->arg_size() == 5) {
737       rename(F);
738       // Get the types of dest, src, and len
739       ArrayRef<Type *> ParamTypes = F->getFunctionType()->params().slice(0, 3);
740       NewFn = Intrinsic::getDeclaration(F->getParent(), Intrinsic::memmove,
741                                         ParamTypes);
742       return true;
743     }
744     if (Name.startswith("memset.") && F->arg_size() == 5) {
745       rename(F);
746       // Get the types of dest, and len
747       const auto *FT = F->getFunctionType();
748       Type *ParamTypes[2] = {
749           FT->getParamType(0), // Dest
750           FT->getParamType(2)  // len
751       };
752       NewFn = Intrinsic::getDeclaration(F->getParent(), Intrinsic::memset,
753                                         ParamTypes);
754       return true;
755     }
756     break;
757   }
758   case 'n': {
759     if (Name.startswith("nvvm.")) {
760       Name = Name.substr(5);
761 
762       // The following nvvm intrinsics correspond exactly to an LLVM intrinsic.
763       Intrinsic::ID IID = StringSwitch<Intrinsic::ID>(Name)
764                               .Cases("brev32", "brev64", Intrinsic::bitreverse)
765                               .Case("clz.i", Intrinsic::ctlz)
766                               .Case("popc.i", Intrinsic::ctpop)
767                               .Default(Intrinsic::not_intrinsic);
768       if (IID != Intrinsic::not_intrinsic && F->arg_size() == 1) {
769         NewFn = Intrinsic::getDeclaration(F->getParent(), IID,
770                                           {F->getReturnType()});
771         return true;
772       }
773 
774       // The following nvvm intrinsics correspond exactly to an LLVM idiom, but
775       // not to an intrinsic alone.  We expand them in UpgradeIntrinsicCall.
776       //
777       // TODO: We could add lohi.i2d.
778       bool Expand = StringSwitch<bool>(Name)
779                         .Cases("abs.i", "abs.ll", true)
780                         .Cases("clz.ll", "popc.ll", "h2f", true)
781                         .Cases("max.i", "max.ll", "max.ui", "max.ull", true)
782                         .Cases("min.i", "min.ll", "min.ui", "min.ull", true)
783                         .StartsWith("atomic.load.add.f32.p", true)
784                         .StartsWith("atomic.load.add.f64.p", true)
785                         .Default(false);
786       if (Expand) {
787         NewFn = nullptr;
788         return true;
789       }
790     }
791     break;
792   }
793   case 'o':
794     // We only need to change the name to match the mangling including the
795     // address space.
796     if (Name.startswith("objectsize.")) {
797       Type *Tys[2] = { F->getReturnType(), F->arg_begin()->getType() };
798       if (F->arg_size() == 2 || F->arg_size() == 3 ||
799           F->getName() != Intrinsic::getName(Intrinsic::objectsize, Tys)) {
800         rename(F);
801         NewFn = Intrinsic::getDeclaration(F->getParent(), Intrinsic::objectsize,
802                                           Tys);
803         return true;
804       }
805     }
806     break;
807 
808   case 'p':
809     if (Name == "prefetch") {
810       // Handle address space overloading.
811       Type *Tys[] = {F->arg_begin()->getType()};
812       if (F->getName() != Intrinsic::getName(Intrinsic::prefetch, Tys)) {
813         rename(F);
814         NewFn =
815             Intrinsic::getDeclaration(F->getParent(), Intrinsic::prefetch, Tys);
816         return true;
817       }
818     }
819     break;
820 
821   case 's':
822     if (Name == "stackprotectorcheck") {
823       NewFn = nullptr;
824       return true;
825     }
826     break;
827 
828   case 'x':
829     if (UpgradeX86IntrinsicFunction(F, Name, NewFn))
830       return true;
831   }
832   // Remangle our intrinsic since we upgrade the mangling
833   auto Result = llvm::Intrinsic::remangleIntrinsicFunction(F);
834   if (Result != None) {
835     NewFn = Result.getValue();
836     return true;
837   }
838 
839   //  This may not belong here. This function is effectively being overloaded
840   //  to both detect an intrinsic which needs upgrading, and to provide the
841   //  upgraded form of the intrinsic. We should perhaps have two separate
842   //  functions for this.
843   return false;
844 }
845 
846 bool llvm::UpgradeIntrinsicFunction(Function *F, Function *&NewFn) {
847   NewFn = nullptr;
848   bool Upgraded = UpgradeIntrinsicFunction1(F, NewFn);
849   assert(F != NewFn && "Intrinsic function upgraded to the same function");
850 
851   // Upgrade intrinsic attributes.  This does not change the function.
852   if (NewFn)
853     F = NewFn;
854   if (Intrinsic::ID id = F->getIntrinsicID())
855     F->setAttributes(Intrinsic::getAttributes(F->getContext(), id));
856   return Upgraded;
857 }
858 
859 GlobalVariable *llvm::UpgradeGlobalVariable(GlobalVariable *GV) {
860   if (!(GV->hasName() && (GV->getName() == "llvm.global_ctors" ||
861                           GV->getName() == "llvm.global_dtors")) ||
862       !GV->hasInitializer())
863     return nullptr;
864   ArrayType *ATy = dyn_cast<ArrayType>(GV->getValueType());
865   if (!ATy)
866     return nullptr;
867   StructType *STy = dyn_cast<StructType>(ATy->getElementType());
868   if (!STy || STy->getNumElements() != 2)
869     return nullptr;
870 
871   LLVMContext &C = GV->getContext();
872   IRBuilder<> IRB(C);
873   auto EltTy = StructType::get(STy->getElementType(0), STy->getElementType(1),
874                                IRB.getInt8PtrTy());
875   Constant *Init = GV->getInitializer();
876   unsigned N = Init->getNumOperands();
877   std::vector<Constant *> NewCtors(N);
878   for (unsigned i = 0; i != N; ++i) {
879     auto Ctor = cast<Constant>(Init->getOperand(i));
880     NewCtors[i] = ConstantStruct::get(
881         EltTy, Ctor->getAggregateElement(0u), Ctor->getAggregateElement(1),
882         Constant::getNullValue(IRB.getInt8PtrTy()));
883   }
884   Constant *NewInit = ConstantArray::get(ArrayType::get(EltTy, N), NewCtors);
885 
886   return new GlobalVariable(NewInit->getType(), false, GV->getLinkage(),
887                             NewInit, GV->getName());
888 }
889 
890 // Handles upgrading SSE2/AVX2/AVX512BW PSLLDQ intrinsics by converting them
891 // to byte shuffles.
892 static Value *UpgradeX86PSLLDQIntrinsics(IRBuilder<> &Builder,
893                                          Value *Op, unsigned Shift) {
894   Type *ResultTy = Op->getType();
895   unsigned NumElts = ResultTy->getVectorNumElements() * 8;
896 
897   // Bitcast from a 64-bit element type to a byte element type.
898   Type *VecTy = VectorType::get(Builder.getInt8Ty(), NumElts);
899   Op = Builder.CreateBitCast(Op, VecTy, "cast");
900 
901   // We'll be shuffling in zeroes.
902   Value *Res = Constant::getNullValue(VecTy);
903 
904   // If shift is less than 16, emit a shuffle to move the bytes. Otherwise,
905   // we'll just return the zero vector.
906   if (Shift < 16) {
907     uint32_t Idxs[64];
908     // 256/512-bit version is split into 2/4 16-byte lanes.
909     for (unsigned l = 0; l != NumElts; l += 16)
910       for (unsigned i = 0; i != 16; ++i) {
911         unsigned Idx = NumElts + i - Shift;
912         if (Idx < NumElts)
913           Idx -= NumElts - 16; // end of lane, switch operand.
914         Idxs[l + i] = Idx + l;
915       }
916 
917     Res = Builder.CreateShuffleVector(Res, Op, makeArrayRef(Idxs, NumElts));
918   }
919 
920   // Bitcast back to a 64-bit element type.
921   return Builder.CreateBitCast(Res, ResultTy, "cast");
922 }
923 
924 // Handles upgrading SSE2/AVX2/AVX512BW PSRLDQ intrinsics by converting them
925 // to byte shuffles.
926 static Value *UpgradeX86PSRLDQIntrinsics(IRBuilder<> &Builder, Value *Op,
927                                          unsigned Shift) {
928   Type *ResultTy = Op->getType();
929   unsigned NumElts = ResultTy->getVectorNumElements() * 8;
930 
931   // Bitcast from a 64-bit element type to a byte element type.
932   Type *VecTy = VectorType::get(Builder.getInt8Ty(), NumElts);
933   Op = Builder.CreateBitCast(Op, VecTy, "cast");
934 
935   // We'll be shuffling in zeroes.
936   Value *Res = Constant::getNullValue(VecTy);
937 
938   // If shift is less than 16, emit a shuffle to move the bytes. Otherwise,
939   // we'll just return the zero vector.
940   if (Shift < 16) {
941     uint32_t Idxs[64];
942     // 256/512-bit version is split into 2/4 16-byte lanes.
943     for (unsigned l = 0; l != NumElts; l += 16)
944       for (unsigned i = 0; i != 16; ++i) {
945         unsigned Idx = i + Shift;
946         if (Idx >= 16)
947           Idx += NumElts - 16; // end of lane, switch operand.
948         Idxs[l + i] = Idx + l;
949       }
950 
951     Res = Builder.CreateShuffleVector(Op, Res, makeArrayRef(Idxs, NumElts));
952   }
953 
954   // Bitcast back to a 64-bit element type.
955   return Builder.CreateBitCast(Res, ResultTy, "cast");
956 }
957 
958 static Value *getX86MaskVec(IRBuilder<> &Builder, Value *Mask,
959                             unsigned NumElts) {
960   llvm::VectorType *MaskTy = llvm::VectorType::get(Builder.getInt1Ty(),
961                              cast<IntegerType>(Mask->getType())->getBitWidth());
962   Mask = Builder.CreateBitCast(Mask, MaskTy);
963 
964   // If we have less than 8 elements, then the starting mask was an i8 and
965   // we need to extract down to the right number of elements.
966   if (NumElts < 8) {
967     uint32_t Indices[4];
968     for (unsigned i = 0; i != NumElts; ++i)
969       Indices[i] = i;
970     Mask = Builder.CreateShuffleVector(Mask, Mask,
971                                        makeArrayRef(Indices, NumElts),
972                                        "extract");
973   }
974 
975   return Mask;
976 }
977 
978 static Value *EmitX86Select(IRBuilder<> &Builder, Value *Mask,
979                             Value *Op0, Value *Op1) {
980   // If the mask is all ones just emit the first operation.
981   if (const auto *C = dyn_cast<Constant>(Mask))
982     if (C->isAllOnesValue())
983       return Op0;
984 
985   Mask = getX86MaskVec(Builder, Mask, Op0->getType()->getVectorNumElements());
986   return Builder.CreateSelect(Mask, Op0, Op1);
987 }
988 
989 static Value *EmitX86ScalarSelect(IRBuilder<> &Builder, Value *Mask,
990                                   Value *Op0, Value *Op1) {
991   // If the mask is all ones just emit the first operation.
992   if (const auto *C = dyn_cast<Constant>(Mask))
993     if (C->isAllOnesValue())
994       return Op0;
995 
996   llvm::VectorType *MaskTy =
997     llvm::VectorType::get(Builder.getInt1Ty(),
998                           Mask->getType()->getIntegerBitWidth());
999   Mask = Builder.CreateBitCast(Mask, MaskTy);
1000   Mask = Builder.CreateExtractElement(Mask, (uint64_t)0);
1001   return Builder.CreateSelect(Mask, Op0, Op1);
1002 }
1003 
1004 // Handle autoupgrade for masked PALIGNR and VALIGND/Q intrinsics.
1005 // PALIGNR handles large immediates by shifting while VALIGN masks the immediate
1006 // so we need to handle both cases. VALIGN also doesn't have 128-bit lanes.
1007 static Value *UpgradeX86ALIGNIntrinsics(IRBuilder<> &Builder, Value *Op0,
1008                                         Value *Op1, Value *Shift,
1009                                         Value *Passthru, Value *Mask,
1010                                         bool IsVALIGN) {
1011   unsigned ShiftVal = cast<llvm::ConstantInt>(Shift)->getZExtValue();
1012 
1013   unsigned NumElts = Op0->getType()->getVectorNumElements();
1014   assert((IsVALIGN || NumElts % 16 == 0) && "Illegal NumElts for PALIGNR!");
1015   assert((!IsVALIGN || NumElts <= 16) && "NumElts too large for VALIGN!");
1016   assert(isPowerOf2_32(NumElts) && "NumElts not a power of 2!");
1017 
1018   // Mask the immediate for VALIGN.
1019   if (IsVALIGN)
1020     ShiftVal &= (NumElts - 1);
1021 
1022   // If palignr is shifting the pair of vectors more than the size of two
1023   // lanes, emit zero.
1024   if (ShiftVal >= 32)
1025     return llvm::Constant::getNullValue(Op0->getType());
1026 
1027   // If palignr is shifting the pair of input vectors more than one lane,
1028   // but less than two lanes, convert to shifting in zeroes.
1029   if (ShiftVal > 16) {
1030     ShiftVal -= 16;
1031     Op1 = Op0;
1032     Op0 = llvm::Constant::getNullValue(Op0->getType());
1033   }
1034 
1035   uint32_t Indices[64];
1036   // 256-bit palignr operates on 128-bit lanes so we need to handle that
1037   for (unsigned l = 0; l < NumElts; l += 16) {
1038     for (unsigned i = 0; i != 16; ++i) {
1039       unsigned Idx = ShiftVal + i;
1040       if (!IsVALIGN && Idx >= 16) // Disable wrap for VALIGN.
1041         Idx += NumElts - 16; // End of lane, switch operand.
1042       Indices[l + i] = Idx + l;
1043     }
1044   }
1045 
1046   Value *Align = Builder.CreateShuffleVector(Op1, Op0,
1047                                              makeArrayRef(Indices, NumElts),
1048                                              "palignr");
1049 
1050   return EmitX86Select(Builder, Mask, Align, Passthru);
1051 }
1052 
1053 static Value *UpgradeX86VPERMT2Intrinsics(IRBuilder<> &Builder, CallInst &CI,
1054                                           bool ZeroMask, bool IndexForm) {
1055   Type *Ty = CI.getType();
1056   unsigned VecWidth = Ty->getPrimitiveSizeInBits();
1057   unsigned EltWidth = Ty->getScalarSizeInBits();
1058   bool IsFloat = Ty->isFPOrFPVectorTy();
1059   Intrinsic::ID IID;
1060   if (VecWidth == 128 && EltWidth == 32 && IsFloat)
1061     IID = Intrinsic::x86_avx512_vpermi2var_ps_128;
1062   else if (VecWidth == 128 && EltWidth == 32 && !IsFloat)
1063     IID = Intrinsic::x86_avx512_vpermi2var_d_128;
1064   else if (VecWidth == 128 && EltWidth == 64 && IsFloat)
1065     IID = Intrinsic::x86_avx512_vpermi2var_pd_128;
1066   else if (VecWidth == 128 && EltWidth == 64 && !IsFloat)
1067     IID = Intrinsic::x86_avx512_vpermi2var_q_128;
1068   else if (VecWidth == 256 && EltWidth == 32 && IsFloat)
1069     IID = Intrinsic::x86_avx512_vpermi2var_ps_256;
1070   else if (VecWidth == 256 && EltWidth == 32 && !IsFloat)
1071     IID = Intrinsic::x86_avx512_vpermi2var_d_256;
1072   else if (VecWidth == 256 && EltWidth == 64 && IsFloat)
1073     IID = Intrinsic::x86_avx512_vpermi2var_pd_256;
1074   else if (VecWidth == 256 && EltWidth == 64 && !IsFloat)
1075     IID = Intrinsic::x86_avx512_vpermi2var_q_256;
1076   else if (VecWidth == 512 && EltWidth == 32 && IsFloat)
1077     IID = Intrinsic::x86_avx512_vpermi2var_ps_512;
1078   else if (VecWidth == 512 && EltWidth == 32 && !IsFloat)
1079     IID = Intrinsic::x86_avx512_vpermi2var_d_512;
1080   else if (VecWidth == 512 && EltWidth == 64 && IsFloat)
1081     IID = Intrinsic::x86_avx512_vpermi2var_pd_512;
1082   else if (VecWidth == 512 && EltWidth == 64 && !IsFloat)
1083     IID = Intrinsic::x86_avx512_vpermi2var_q_512;
1084   else if (VecWidth == 128 && EltWidth == 16)
1085     IID = Intrinsic::x86_avx512_vpermi2var_hi_128;
1086   else if (VecWidth == 256 && EltWidth == 16)
1087     IID = Intrinsic::x86_avx512_vpermi2var_hi_256;
1088   else if (VecWidth == 512 && EltWidth == 16)
1089     IID = Intrinsic::x86_avx512_vpermi2var_hi_512;
1090   else if (VecWidth == 128 && EltWidth == 8)
1091     IID = Intrinsic::x86_avx512_vpermi2var_qi_128;
1092   else if (VecWidth == 256 && EltWidth == 8)
1093     IID = Intrinsic::x86_avx512_vpermi2var_qi_256;
1094   else if (VecWidth == 512 && EltWidth == 8)
1095     IID = Intrinsic::x86_avx512_vpermi2var_qi_512;
1096   else
1097     llvm_unreachable("Unexpected intrinsic");
1098 
1099   Value *Args[] = { CI.getArgOperand(0) , CI.getArgOperand(1),
1100                     CI.getArgOperand(2) };
1101 
1102   // If this isn't index form we need to swap operand 0 and 1.
1103   if (!IndexForm)
1104     std::swap(Args[0], Args[1]);
1105 
1106   Value *V = Builder.CreateCall(Intrinsic::getDeclaration(CI.getModule(), IID),
1107                                 Args);
1108   Value *PassThru = ZeroMask ? ConstantAggregateZero::get(Ty)
1109                              : Builder.CreateBitCast(CI.getArgOperand(1),
1110                                                      Ty);
1111   return EmitX86Select(Builder, CI.getArgOperand(3), V, PassThru);
1112 }
1113 
1114 static Value *UpgradeX86AddSubSatIntrinsics(IRBuilder<> &Builder, CallInst &CI,
1115                                             bool IsSigned, bool IsAddition) {
1116   Type *Ty = CI.getType();
1117   Value *Op0 = CI.getOperand(0);
1118   Value *Op1 = CI.getOperand(1);
1119 
1120   Intrinsic::ID IID =
1121       IsSigned ? (IsAddition ? Intrinsic::sadd_sat : Intrinsic::ssub_sat)
1122                : (IsAddition ? Intrinsic::uadd_sat : Intrinsic::usub_sat);
1123   Function *Intrin = Intrinsic::getDeclaration(CI.getModule(), IID, Ty);
1124   Value *Res = Builder.CreateCall(Intrin, {Op0, Op1});
1125 
1126   if (CI.getNumArgOperands() == 4) { // For masked intrinsics.
1127     Value *VecSrc = CI.getOperand(2);
1128     Value *Mask = CI.getOperand(3);
1129     Res = EmitX86Select(Builder, Mask, Res, VecSrc);
1130   }
1131   return Res;
1132 }
1133 
1134 static Value *upgradeX86Rotate(IRBuilder<> &Builder, CallInst &CI,
1135                                bool IsRotateRight) {
1136   Type *Ty = CI.getType();
1137   Value *Src = CI.getArgOperand(0);
1138   Value *Amt = CI.getArgOperand(1);
1139 
1140   // Amount may be scalar immediate, in which case create a splat vector.
1141   // Funnel shifts amounts are treated as modulo and types are all power-of-2 so
1142   // we only care about the lowest log2 bits anyway.
1143   if (Amt->getType() != Ty) {
1144     unsigned NumElts = Ty->getVectorNumElements();
1145     Amt = Builder.CreateIntCast(Amt, Ty->getScalarType(), false);
1146     Amt = Builder.CreateVectorSplat(NumElts, Amt);
1147   }
1148 
1149   Intrinsic::ID IID = IsRotateRight ? Intrinsic::fshr : Intrinsic::fshl;
1150   Function *Intrin = Intrinsic::getDeclaration(CI.getModule(), IID, Ty);
1151   Value *Res = Builder.CreateCall(Intrin, {Src, Src, Amt});
1152 
1153   if (CI.getNumArgOperands() == 4) { // For masked intrinsics.
1154     Value *VecSrc = CI.getOperand(2);
1155     Value *Mask = CI.getOperand(3);
1156     Res = EmitX86Select(Builder, Mask, Res, VecSrc);
1157   }
1158   return Res;
1159 }
1160 
1161 static Value *upgradeX86vpcom(IRBuilder<> &Builder, CallInst &CI, unsigned Imm,
1162                               bool IsSigned) {
1163   Type *Ty = CI.getType();
1164   Value *LHS = CI.getArgOperand(0);
1165   Value *RHS = CI.getArgOperand(1);
1166 
1167   CmpInst::Predicate Pred;
1168   switch (Imm) {
1169   case 0x0:
1170     Pred = IsSigned ? ICmpInst::ICMP_SLT : ICmpInst::ICMP_ULT;
1171     break;
1172   case 0x1:
1173     Pred = IsSigned ? ICmpInst::ICMP_SLE : ICmpInst::ICMP_ULE;
1174     break;
1175   case 0x2:
1176     Pred = IsSigned ? ICmpInst::ICMP_SGT : ICmpInst::ICMP_UGT;
1177     break;
1178   case 0x3:
1179     Pred = IsSigned ? ICmpInst::ICMP_SGE : ICmpInst::ICMP_UGE;
1180     break;
1181   case 0x4:
1182     Pred = ICmpInst::ICMP_EQ;
1183     break;
1184   case 0x5:
1185     Pred = ICmpInst::ICMP_NE;
1186     break;
1187   case 0x6:
1188     return Constant::getNullValue(Ty); // FALSE
1189   case 0x7:
1190     return Constant::getAllOnesValue(Ty); // TRUE
1191   default:
1192     llvm_unreachable("Unknown XOP vpcom/vpcomu predicate");
1193   }
1194 
1195   Value *Cmp = Builder.CreateICmp(Pred, LHS, RHS);
1196   Value *Ext = Builder.CreateSExt(Cmp, Ty);
1197   return Ext;
1198 }
1199 
1200 static Value *upgradeX86ConcatShift(IRBuilder<> &Builder, CallInst &CI,
1201                                     bool IsShiftRight, bool ZeroMask) {
1202   Type *Ty = CI.getType();
1203   Value *Op0 = CI.getArgOperand(0);
1204   Value *Op1 = CI.getArgOperand(1);
1205   Value *Amt = CI.getArgOperand(2);
1206 
1207   if (IsShiftRight)
1208     std::swap(Op0, Op1);
1209 
1210   // Amount may be scalar immediate, in which case create a splat vector.
1211   // Funnel shifts amounts are treated as modulo and types are all power-of-2 so
1212   // we only care about the lowest log2 bits anyway.
1213   if (Amt->getType() != Ty) {
1214     unsigned NumElts = Ty->getVectorNumElements();
1215     Amt = Builder.CreateIntCast(Amt, Ty->getScalarType(), false);
1216     Amt = Builder.CreateVectorSplat(NumElts, Amt);
1217   }
1218 
1219   Intrinsic::ID IID = IsShiftRight ? Intrinsic::fshr : Intrinsic::fshl;
1220   Function *Intrin = Intrinsic::getDeclaration(CI.getModule(), IID, Ty);
1221   Value *Res = Builder.CreateCall(Intrin, {Op0, Op1, Amt});
1222 
1223   unsigned NumArgs = CI.getNumArgOperands();
1224   if (NumArgs >= 4) { // For masked intrinsics.
1225     Value *VecSrc = NumArgs == 5 ? CI.getArgOperand(3) :
1226                     ZeroMask     ? ConstantAggregateZero::get(CI.getType()) :
1227                                    CI.getArgOperand(0);
1228     Value *Mask = CI.getOperand(NumArgs - 1);
1229     Res = EmitX86Select(Builder, Mask, Res, VecSrc);
1230   }
1231   return Res;
1232 }
1233 
1234 static Value *UpgradeMaskedStore(IRBuilder<> &Builder,
1235                                  Value *Ptr, Value *Data, Value *Mask,
1236                                  bool Aligned) {
1237   // Cast the pointer to the right type.
1238   Ptr = Builder.CreateBitCast(Ptr,
1239                               llvm::PointerType::getUnqual(Data->getType()));
1240   const Align Alignment =
1241       Aligned ? Align(cast<VectorType>(Data->getType())->getBitWidth() / 8)
1242               : Align(1);
1243 
1244   // If the mask is all ones just emit a regular store.
1245   if (const auto *C = dyn_cast<Constant>(Mask))
1246     if (C->isAllOnesValue())
1247       return Builder.CreateAlignedStore(Data, Ptr, Alignment);
1248 
1249   // Convert the mask from an integer type to a vector of i1.
1250   unsigned NumElts = Data->getType()->getVectorNumElements();
1251   Mask = getX86MaskVec(Builder, Mask, NumElts);
1252   return Builder.CreateMaskedStore(Data, Ptr, Alignment, Mask);
1253 }
1254 
1255 static Value *UpgradeMaskedLoad(IRBuilder<> &Builder,
1256                                 Value *Ptr, Value *Passthru, Value *Mask,
1257                                 bool Aligned) {
1258   Type *ValTy = Passthru->getType();
1259   // Cast the pointer to the right type.
1260   Ptr = Builder.CreateBitCast(Ptr, llvm::PointerType::getUnqual(ValTy));
1261   const Align Alignment =
1262       Aligned ? Align(cast<VectorType>(Passthru->getType())->getBitWidth() / 8)
1263               : Align(1);
1264 
1265   // If the mask is all ones just emit a regular store.
1266   if (const auto *C = dyn_cast<Constant>(Mask))
1267     if (C->isAllOnesValue())
1268       return Builder.CreateAlignedLoad(ValTy, Ptr, Alignment);
1269 
1270   // Convert the mask from an integer type to a vector of i1.
1271   unsigned NumElts = Passthru->getType()->getVectorNumElements();
1272   Mask = getX86MaskVec(Builder, Mask, NumElts);
1273   return Builder.CreateMaskedLoad(Ptr, Alignment, Mask, Passthru);
1274 }
1275 
1276 static Value *upgradeAbs(IRBuilder<> &Builder, CallInst &CI) {
1277   Value *Op0 = CI.getArgOperand(0);
1278   llvm::Type *Ty = Op0->getType();
1279   Value *Zero = llvm::Constant::getNullValue(Ty);
1280   Value *Cmp = Builder.CreateICmp(ICmpInst::ICMP_SGT, Op0, Zero);
1281   Value *Neg = Builder.CreateNeg(Op0);
1282   Value *Res = Builder.CreateSelect(Cmp, Op0, Neg);
1283 
1284   if (CI.getNumArgOperands() == 3)
1285     Res = EmitX86Select(Builder,CI.getArgOperand(2), Res, CI.getArgOperand(1));
1286 
1287   return Res;
1288 }
1289 
1290 static Value *upgradeIntMinMax(IRBuilder<> &Builder, CallInst &CI,
1291                                ICmpInst::Predicate Pred) {
1292   Value *Op0 = CI.getArgOperand(0);
1293   Value *Op1 = CI.getArgOperand(1);
1294   Value *Cmp = Builder.CreateICmp(Pred, Op0, Op1);
1295   Value *Res = Builder.CreateSelect(Cmp, Op0, Op1);
1296 
1297   if (CI.getNumArgOperands() == 4)
1298     Res = EmitX86Select(Builder, CI.getArgOperand(3), Res, CI.getArgOperand(2));
1299 
1300   return Res;
1301 }
1302 
1303 static Value *upgradePMULDQ(IRBuilder<> &Builder, CallInst &CI, bool IsSigned) {
1304   Type *Ty = CI.getType();
1305 
1306   // Arguments have a vXi32 type so cast to vXi64.
1307   Value *LHS = Builder.CreateBitCast(CI.getArgOperand(0), Ty);
1308   Value *RHS = Builder.CreateBitCast(CI.getArgOperand(1), Ty);
1309 
1310   if (IsSigned) {
1311     // Shift left then arithmetic shift right.
1312     Constant *ShiftAmt = ConstantInt::get(Ty, 32);
1313     LHS = Builder.CreateShl(LHS, ShiftAmt);
1314     LHS = Builder.CreateAShr(LHS, ShiftAmt);
1315     RHS = Builder.CreateShl(RHS, ShiftAmt);
1316     RHS = Builder.CreateAShr(RHS, ShiftAmt);
1317   } else {
1318     // Clear the upper bits.
1319     Constant *Mask = ConstantInt::get(Ty, 0xffffffff);
1320     LHS = Builder.CreateAnd(LHS, Mask);
1321     RHS = Builder.CreateAnd(RHS, Mask);
1322   }
1323 
1324   Value *Res = Builder.CreateMul(LHS, RHS);
1325 
1326   if (CI.getNumArgOperands() == 4)
1327     Res = EmitX86Select(Builder, CI.getArgOperand(3), Res, CI.getArgOperand(2));
1328 
1329   return Res;
1330 }
1331 
1332 // Applying mask on vector of i1's and make sure result is at least 8 bits wide.
1333 static Value *ApplyX86MaskOn1BitsVec(IRBuilder<> &Builder, Value *Vec,
1334                                      Value *Mask) {
1335   unsigned NumElts = Vec->getType()->getVectorNumElements();
1336   if (Mask) {
1337     const auto *C = dyn_cast<Constant>(Mask);
1338     if (!C || !C->isAllOnesValue())
1339       Vec = Builder.CreateAnd(Vec, getX86MaskVec(Builder, Mask, NumElts));
1340   }
1341 
1342   if (NumElts < 8) {
1343     uint32_t Indices[8];
1344     for (unsigned i = 0; i != NumElts; ++i)
1345       Indices[i] = i;
1346     for (unsigned i = NumElts; i != 8; ++i)
1347       Indices[i] = NumElts + i % NumElts;
1348     Vec = Builder.CreateShuffleVector(Vec,
1349                                       Constant::getNullValue(Vec->getType()),
1350                                       Indices);
1351   }
1352   return Builder.CreateBitCast(Vec, Builder.getIntNTy(std::max(NumElts, 8U)));
1353 }
1354 
1355 static Value *upgradeMaskedCompare(IRBuilder<> &Builder, CallInst &CI,
1356                                    unsigned CC, bool Signed) {
1357   Value *Op0 = CI.getArgOperand(0);
1358   unsigned NumElts = Op0->getType()->getVectorNumElements();
1359 
1360   Value *Cmp;
1361   if (CC == 3) {
1362     Cmp = Constant::getNullValue(llvm::VectorType::get(Builder.getInt1Ty(), NumElts));
1363   } else if (CC == 7) {
1364     Cmp = Constant::getAllOnesValue(llvm::VectorType::get(Builder.getInt1Ty(), NumElts));
1365   } else {
1366     ICmpInst::Predicate Pred;
1367     switch (CC) {
1368     default: llvm_unreachable("Unknown condition code");
1369     case 0: Pred = ICmpInst::ICMP_EQ;  break;
1370     case 1: Pred = Signed ? ICmpInst::ICMP_SLT : ICmpInst::ICMP_ULT; break;
1371     case 2: Pred = Signed ? ICmpInst::ICMP_SLE : ICmpInst::ICMP_ULE; break;
1372     case 4: Pred = ICmpInst::ICMP_NE;  break;
1373     case 5: Pred = Signed ? ICmpInst::ICMP_SGE : ICmpInst::ICMP_UGE; break;
1374     case 6: Pred = Signed ? ICmpInst::ICMP_SGT : ICmpInst::ICMP_UGT; break;
1375     }
1376     Cmp = Builder.CreateICmp(Pred, Op0, CI.getArgOperand(1));
1377   }
1378 
1379   Value *Mask = CI.getArgOperand(CI.getNumArgOperands() - 1);
1380 
1381   return ApplyX86MaskOn1BitsVec(Builder, Cmp, Mask);
1382 }
1383 
1384 // Replace a masked intrinsic with an older unmasked intrinsic.
1385 static Value *UpgradeX86MaskedShift(IRBuilder<> &Builder, CallInst &CI,
1386                                     Intrinsic::ID IID) {
1387   Function *Intrin = Intrinsic::getDeclaration(CI.getModule(), IID);
1388   Value *Rep = Builder.CreateCall(Intrin,
1389                                  { CI.getArgOperand(0), CI.getArgOperand(1) });
1390   return EmitX86Select(Builder, CI.getArgOperand(3), Rep, CI.getArgOperand(2));
1391 }
1392 
1393 static Value* upgradeMaskedMove(IRBuilder<> &Builder, CallInst &CI) {
1394   Value* A = CI.getArgOperand(0);
1395   Value* B = CI.getArgOperand(1);
1396   Value* Src = CI.getArgOperand(2);
1397   Value* Mask = CI.getArgOperand(3);
1398 
1399   Value* AndNode = Builder.CreateAnd(Mask, APInt(8, 1));
1400   Value* Cmp = Builder.CreateIsNotNull(AndNode);
1401   Value* Extract1 = Builder.CreateExtractElement(B, (uint64_t)0);
1402   Value* Extract2 = Builder.CreateExtractElement(Src, (uint64_t)0);
1403   Value* Select = Builder.CreateSelect(Cmp, Extract1, Extract2);
1404   return Builder.CreateInsertElement(A, Select, (uint64_t)0);
1405 }
1406 
1407 
1408 static Value* UpgradeMaskToInt(IRBuilder<> &Builder, CallInst &CI) {
1409   Value* Op = CI.getArgOperand(0);
1410   Type* ReturnOp = CI.getType();
1411   unsigned NumElts = CI.getType()->getVectorNumElements();
1412   Value *Mask = getX86MaskVec(Builder, Op, NumElts);
1413   return Builder.CreateSExt(Mask, ReturnOp, "vpmovm2");
1414 }
1415 
1416 // Replace intrinsic with unmasked version and a select.
1417 static bool upgradeAVX512MaskToSelect(StringRef Name, IRBuilder<> &Builder,
1418                                       CallInst &CI, Value *&Rep) {
1419   Name = Name.substr(12); // Remove avx512.mask.
1420 
1421   unsigned VecWidth = CI.getType()->getPrimitiveSizeInBits();
1422   unsigned EltWidth = CI.getType()->getScalarSizeInBits();
1423   Intrinsic::ID IID;
1424   if (Name.startswith("max.p")) {
1425     if (VecWidth == 128 && EltWidth == 32)
1426       IID = Intrinsic::x86_sse_max_ps;
1427     else if (VecWidth == 128 && EltWidth == 64)
1428       IID = Intrinsic::x86_sse2_max_pd;
1429     else if (VecWidth == 256 && EltWidth == 32)
1430       IID = Intrinsic::x86_avx_max_ps_256;
1431     else if (VecWidth == 256 && EltWidth == 64)
1432       IID = Intrinsic::x86_avx_max_pd_256;
1433     else
1434       llvm_unreachable("Unexpected intrinsic");
1435   } else if (Name.startswith("min.p")) {
1436     if (VecWidth == 128 && EltWidth == 32)
1437       IID = Intrinsic::x86_sse_min_ps;
1438     else if (VecWidth == 128 && EltWidth == 64)
1439       IID = Intrinsic::x86_sse2_min_pd;
1440     else if (VecWidth == 256 && EltWidth == 32)
1441       IID = Intrinsic::x86_avx_min_ps_256;
1442     else if (VecWidth == 256 && EltWidth == 64)
1443       IID = Intrinsic::x86_avx_min_pd_256;
1444     else
1445       llvm_unreachable("Unexpected intrinsic");
1446   } else if (Name.startswith("pshuf.b.")) {
1447     if (VecWidth == 128)
1448       IID = Intrinsic::x86_ssse3_pshuf_b_128;
1449     else if (VecWidth == 256)
1450       IID = Intrinsic::x86_avx2_pshuf_b;
1451     else if (VecWidth == 512)
1452       IID = Intrinsic::x86_avx512_pshuf_b_512;
1453     else
1454       llvm_unreachable("Unexpected intrinsic");
1455   } else if (Name.startswith("pmul.hr.sw.")) {
1456     if (VecWidth == 128)
1457       IID = Intrinsic::x86_ssse3_pmul_hr_sw_128;
1458     else if (VecWidth == 256)
1459       IID = Intrinsic::x86_avx2_pmul_hr_sw;
1460     else if (VecWidth == 512)
1461       IID = Intrinsic::x86_avx512_pmul_hr_sw_512;
1462     else
1463       llvm_unreachable("Unexpected intrinsic");
1464   } else if (Name.startswith("pmulh.w.")) {
1465     if (VecWidth == 128)
1466       IID = Intrinsic::x86_sse2_pmulh_w;
1467     else if (VecWidth == 256)
1468       IID = Intrinsic::x86_avx2_pmulh_w;
1469     else if (VecWidth == 512)
1470       IID = Intrinsic::x86_avx512_pmulh_w_512;
1471     else
1472       llvm_unreachable("Unexpected intrinsic");
1473   } else if (Name.startswith("pmulhu.w.")) {
1474     if (VecWidth == 128)
1475       IID = Intrinsic::x86_sse2_pmulhu_w;
1476     else if (VecWidth == 256)
1477       IID = Intrinsic::x86_avx2_pmulhu_w;
1478     else if (VecWidth == 512)
1479       IID = Intrinsic::x86_avx512_pmulhu_w_512;
1480     else
1481       llvm_unreachable("Unexpected intrinsic");
1482   } else if (Name.startswith("pmaddw.d.")) {
1483     if (VecWidth == 128)
1484       IID = Intrinsic::x86_sse2_pmadd_wd;
1485     else if (VecWidth == 256)
1486       IID = Intrinsic::x86_avx2_pmadd_wd;
1487     else if (VecWidth == 512)
1488       IID = Intrinsic::x86_avx512_pmaddw_d_512;
1489     else
1490       llvm_unreachable("Unexpected intrinsic");
1491   } else if (Name.startswith("pmaddubs.w.")) {
1492     if (VecWidth == 128)
1493       IID = Intrinsic::x86_ssse3_pmadd_ub_sw_128;
1494     else if (VecWidth == 256)
1495       IID = Intrinsic::x86_avx2_pmadd_ub_sw;
1496     else if (VecWidth == 512)
1497       IID = Intrinsic::x86_avx512_pmaddubs_w_512;
1498     else
1499       llvm_unreachable("Unexpected intrinsic");
1500   } else if (Name.startswith("packsswb.")) {
1501     if (VecWidth == 128)
1502       IID = Intrinsic::x86_sse2_packsswb_128;
1503     else if (VecWidth == 256)
1504       IID = Intrinsic::x86_avx2_packsswb;
1505     else if (VecWidth == 512)
1506       IID = Intrinsic::x86_avx512_packsswb_512;
1507     else
1508       llvm_unreachable("Unexpected intrinsic");
1509   } else if (Name.startswith("packssdw.")) {
1510     if (VecWidth == 128)
1511       IID = Intrinsic::x86_sse2_packssdw_128;
1512     else if (VecWidth == 256)
1513       IID = Intrinsic::x86_avx2_packssdw;
1514     else if (VecWidth == 512)
1515       IID = Intrinsic::x86_avx512_packssdw_512;
1516     else
1517       llvm_unreachable("Unexpected intrinsic");
1518   } else if (Name.startswith("packuswb.")) {
1519     if (VecWidth == 128)
1520       IID = Intrinsic::x86_sse2_packuswb_128;
1521     else if (VecWidth == 256)
1522       IID = Intrinsic::x86_avx2_packuswb;
1523     else if (VecWidth == 512)
1524       IID = Intrinsic::x86_avx512_packuswb_512;
1525     else
1526       llvm_unreachable("Unexpected intrinsic");
1527   } else if (Name.startswith("packusdw.")) {
1528     if (VecWidth == 128)
1529       IID = Intrinsic::x86_sse41_packusdw;
1530     else if (VecWidth == 256)
1531       IID = Intrinsic::x86_avx2_packusdw;
1532     else if (VecWidth == 512)
1533       IID = Intrinsic::x86_avx512_packusdw_512;
1534     else
1535       llvm_unreachable("Unexpected intrinsic");
1536   } else if (Name.startswith("vpermilvar.")) {
1537     if (VecWidth == 128 && EltWidth == 32)
1538       IID = Intrinsic::x86_avx_vpermilvar_ps;
1539     else if (VecWidth == 128 && EltWidth == 64)
1540       IID = Intrinsic::x86_avx_vpermilvar_pd;
1541     else if (VecWidth == 256 && EltWidth == 32)
1542       IID = Intrinsic::x86_avx_vpermilvar_ps_256;
1543     else if (VecWidth == 256 && EltWidth == 64)
1544       IID = Intrinsic::x86_avx_vpermilvar_pd_256;
1545     else if (VecWidth == 512 && EltWidth == 32)
1546       IID = Intrinsic::x86_avx512_vpermilvar_ps_512;
1547     else if (VecWidth == 512 && EltWidth == 64)
1548       IID = Intrinsic::x86_avx512_vpermilvar_pd_512;
1549     else
1550       llvm_unreachable("Unexpected intrinsic");
1551   } else if (Name == "cvtpd2dq.256") {
1552     IID = Intrinsic::x86_avx_cvt_pd2dq_256;
1553   } else if (Name == "cvtpd2ps.256") {
1554     IID = Intrinsic::x86_avx_cvt_pd2_ps_256;
1555   } else if (Name == "cvttpd2dq.256") {
1556     IID = Intrinsic::x86_avx_cvtt_pd2dq_256;
1557   } else if (Name == "cvttps2dq.128") {
1558     IID = Intrinsic::x86_sse2_cvttps2dq;
1559   } else if (Name == "cvttps2dq.256") {
1560     IID = Intrinsic::x86_avx_cvtt_ps2dq_256;
1561   } else if (Name.startswith("permvar.")) {
1562     bool IsFloat = CI.getType()->isFPOrFPVectorTy();
1563     if (VecWidth == 256 && EltWidth == 32 && IsFloat)
1564       IID = Intrinsic::x86_avx2_permps;
1565     else if (VecWidth == 256 && EltWidth == 32 && !IsFloat)
1566       IID = Intrinsic::x86_avx2_permd;
1567     else if (VecWidth == 256 && EltWidth == 64 && IsFloat)
1568       IID = Intrinsic::x86_avx512_permvar_df_256;
1569     else if (VecWidth == 256 && EltWidth == 64 && !IsFloat)
1570       IID = Intrinsic::x86_avx512_permvar_di_256;
1571     else if (VecWidth == 512 && EltWidth == 32 && IsFloat)
1572       IID = Intrinsic::x86_avx512_permvar_sf_512;
1573     else if (VecWidth == 512 && EltWidth == 32 && !IsFloat)
1574       IID = Intrinsic::x86_avx512_permvar_si_512;
1575     else if (VecWidth == 512 && EltWidth == 64 && IsFloat)
1576       IID = Intrinsic::x86_avx512_permvar_df_512;
1577     else if (VecWidth == 512 && EltWidth == 64 && !IsFloat)
1578       IID = Intrinsic::x86_avx512_permvar_di_512;
1579     else if (VecWidth == 128 && EltWidth == 16)
1580       IID = Intrinsic::x86_avx512_permvar_hi_128;
1581     else if (VecWidth == 256 && EltWidth == 16)
1582       IID = Intrinsic::x86_avx512_permvar_hi_256;
1583     else if (VecWidth == 512 && EltWidth == 16)
1584       IID = Intrinsic::x86_avx512_permvar_hi_512;
1585     else if (VecWidth == 128 && EltWidth == 8)
1586       IID = Intrinsic::x86_avx512_permvar_qi_128;
1587     else if (VecWidth == 256 && EltWidth == 8)
1588       IID = Intrinsic::x86_avx512_permvar_qi_256;
1589     else if (VecWidth == 512 && EltWidth == 8)
1590       IID = Intrinsic::x86_avx512_permvar_qi_512;
1591     else
1592       llvm_unreachable("Unexpected intrinsic");
1593   } else if (Name.startswith("dbpsadbw.")) {
1594     if (VecWidth == 128)
1595       IID = Intrinsic::x86_avx512_dbpsadbw_128;
1596     else if (VecWidth == 256)
1597       IID = Intrinsic::x86_avx512_dbpsadbw_256;
1598     else if (VecWidth == 512)
1599       IID = Intrinsic::x86_avx512_dbpsadbw_512;
1600     else
1601       llvm_unreachable("Unexpected intrinsic");
1602   } else if (Name.startswith("pmultishift.qb.")) {
1603     if (VecWidth == 128)
1604       IID = Intrinsic::x86_avx512_pmultishift_qb_128;
1605     else if (VecWidth == 256)
1606       IID = Intrinsic::x86_avx512_pmultishift_qb_256;
1607     else if (VecWidth == 512)
1608       IID = Intrinsic::x86_avx512_pmultishift_qb_512;
1609     else
1610       llvm_unreachable("Unexpected intrinsic");
1611   } else if (Name.startswith("conflict.")) {
1612     if (Name[9] == 'd' && VecWidth == 128)
1613       IID = Intrinsic::x86_avx512_conflict_d_128;
1614     else if (Name[9] == 'd' && VecWidth == 256)
1615       IID = Intrinsic::x86_avx512_conflict_d_256;
1616     else if (Name[9] == 'd' && VecWidth == 512)
1617       IID = Intrinsic::x86_avx512_conflict_d_512;
1618     else if (Name[9] == 'q' && VecWidth == 128)
1619       IID = Intrinsic::x86_avx512_conflict_q_128;
1620     else if (Name[9] == 'q' && VecWidth == 256)
1621       IID = Intrinsic::x86_avx512_conflict_q_256;
1622     else if (Name[9] == 'q' && VecWidth == 512)
1623       IID = Intrinsic::x86_avx512_conflict_q_512;
1624     else
1625       llvm_unreachable("Unexpected intrinsic");
1626   } else if (Name.startswith("pavg.")) {
1627     if (Name[5] == 'b' && VecWidth == 128)
1628       IID = Intrinsic::x86_sse2_pavg_b;
1629     else if (Name[5] == 'b' && VecWidth == 256)
1630       IID = Intrinsic::x86_avx2_pavg_b;
1631     else if (Name[5] == 'b' && VecWidth == 512)
1632       IID = Intrinsic::x86_avx512_pavg_b_512;
1633     else if (Name[5] == 'w' && VecWidth == 128)
1634       IID = Intrinsic::x86_sse2_pavg_w;
1635     else if (Name[5] == 'w' && VecWidth == 256)
1636       IID = Intrinsic::x86_avx2_pavg_w;
1637     else if (Name[5] == 'w' && VecWidth == 512)
1638       IID = Intrinsic::x86_avx512_pavg_w_512;
1639     else
1640       llvm_unreachable("Unexpected intrinsic");
1641   } else
1642     return false;
1643 
1644   SmallVector<Value *, 4> Args(CI.arg_operands().begin(),
1645                                CI.arg_operands().end());
1646   Args.pop_back();
1647   Args.pop_back();
1648   Rep = Builder.CreateCall(Intrinsic::getDeclaration(CI.getModule(), IID),
1649                            Args);
1650   unsigned NumArgs = CI.getNumArgOperands();
1651   Rep = EmitX86Select(Builder, CI.getArgOperand(NumArgs - 1), Rep,
1652                       CI.getArgOperand(NumArgs - 2));
1653   return true;
1654 }
1655 
1656 /// Upgrade comment in call to inline asm that represents an objc retain release
1657 /// marker.
1658 void llvm::UpgradeInlineAsmString(std::string *AsmStr) {
1659   size_t Pos;
1660   if (AsmStr->find("mov\tfp") == 0 &&
1661       AsmStr->find("objc_retainAutoreleaseReturnValue") != std::string::npos &&
1662       (Pos = AsmStr->find("# marker")) != std::string::npos) {
1663     AsmStr->replace(Pos, 1, ";");
1664   }
1665   return;
1666 }
1667 
1668 /// Upgrade a call to an old intrinsic. All argument and return casting must be
1669 /// provided to seamlessly integrate with existing context.
1670 void llvm::UpgradeIntrinsicCall(CallInst *CI, Function *NewFn) {
1671   Function *F = CI->getCalledFunction();
1672   LLVMContext &C = CI->getContext();
1673   IRBuilder<> Builder(C);
1674   Builder.SetInsertPoint(CI->getParent(), CI->getIterator());
1675 
1676   assert(F && "Intrinsic call is not direct?");
1677 
1678   if (!NewFn) {
1679     // Get the Function's name.
1680     StringRef Name = F->getName();
1681 
1682     assert(Name.startswith("llvm.") && "Intrinsic doesn't start with 'llvm.'");
1683     Name = Name.substr(5);
1684 
1685     bool IsX86 = Name.startswith("x86.");
1686     if (IsX86)
1687       Name = Name.substr(4);
1688     bool IsNVVM = Name.startswith("nvvm.");
1689     if (IsNVVM)
1690       Name = Name.substr(5);
1691 
1692     if (IsX86 && Name.startswith("sse4a.movnt.")) {
1693       Module *M = F->getParent();
1694       SmallVector<Metadata *, 1> Elts;
1695       Elts.push_back(
1696           ConstantAsMetadata::get(ConstantInt::get(Type::getInt32Ty(C), 1)));
1697       MDNode *Node = MDNode::get(C, Elts);
1698 
1699       Value *Arg0 = CI->getArgOperand(0);
1700       Value *Arg1 = CI->getArgOperand(1);
1701 
1702       // Nontemporal (unaligned) store of the 0'th element of the float/double
1703       // vector.
1704       Type *SrcEltTy = cast<VectorType>(Arg1->getType())->getElementType();
1705       PointerType *EltPtrTy = PointerType::getUnqual(SrcEltTy);
1706       Value *Addr = Builder.CreateBitCast(Arg0, EltPtrTy, "cast");
1707       Value *Extract =
1708           Builder.CreateExtractElement(Arg1, (uint64_t)0, "extractelement");
1709 
1710       StoreInst *SI = Builder.CreateAlignedStore(Extract, Addr, Align(1));
1711       SI->setMetadata(M->getMDKindID("nontemporal"), Node);
1712 
1713       // Remove intrinsic.
1714       CI->eraseFromParent();
1715       return;
1716     }
1717 
1718     if (IsX86 && (Name.startswith("avx.movnt.") ||
1719                   Name.startswith("avx512.storent."))) {
1720       Module *M = F->getParent();
1721       SmallVector<Metadata *, 1> Elts;
1722       Elts.push_back(
1723           ConstantAsMetadata::get(ConstantInt::get(Type::getInt32Ty(C), 1)));
1724       MDNode *Node = MDNode::get(C, Elts);
1725 
1726       Value *Arg0 = CI->getArgOperand(0);
1727       Value *Arg1 = CI->getArgOperand(1);
1728 
1729       // Convert the type of the pointer to a pointer to the stored type.
1730       Value *BC = Builder.CreateBitCast(Arg0,
1731                                         PointerType::getUnqual(Arg1->getType()),
1732                                         "cast");
1733       VectorType *VTy = cast<VectorType>(Arg1->getType());
1734       StoreInst *SI =
1735           Builder.CreateAlignedStore(Arg1, BC, Align(VTy->getBitWidth() / 8));
1736       SI->setMetadata(M->getMDKindID("nontemporal"), Node);
1737 
1738       // Remove intrinsic.
1739       CI->eraseFromParent();
1740       return;
1741     }
1742 
1743     if (IsX86 && Name == "sse2.storel.dq") {
1744       Value *Arg0 = CI->getArgOperand(0);
1745       Value *Arg1 = CI->getArgOperand(1);
1746 
1747       Type *NewVecTy = VectorType::get(Type::getInt64Ty(C), 2);
1748       Value *BC0 = Builder.CreateBitCast(Arg1, NewVecTy, "cast");
1749       Value *Elt = Builder.CreateExtractElement(BC0, (uint64_t)0);
1750       Value *BC = Builder.CreateBitCast(Arg0,
1751                                         PointerType::getUnqual(Elt->getType()),
1752                                         "cast");
1753       Builder.CreateAlignedStore(Elt, BC, Align(1));
1754 
1755       // Remove intrinsic.
1756       CI->eraseFromParent();
1757       return;
1758     }
1759 
1760     if (IsX86 && (Name.startswith("sse.storeu.") ||
1761                   Name.startswith("sse2.storeu.") ||
1762                   Name.startswith("avx.storeu."))) {
1763       Value *Arg0 = CI->getArgOperand(0);
1764       Value *Arg1 = CI->getArgOperand(1);
1765 
1766       Arg0 = Builder.CreateBitCast(Arg0,
1767                                    PointerType::getUnqual(Arg1->getType()),
1768                                    "cast");
1769       Builder.CreateAlignedStore(Arg1, Arg0, Align(1));
1770 
1771       // Remove intrinsic.
1772       CI->eraseFromParent();
1773       return;
1774     }
1775 
1776     if (IsX86 && Name == "avx512.mask.store.ss") {
1777       Value *Mask = Builder.CreateAnd(CI->getArgOperand(2), Builder.getInt8(1));
1778       UpgradeMaskedStore(Builder, CI->getArgOperand(0), CI->getArgOperand(1),
1779                          Mask, false);
1780 
1781       // Remove intrinsic.
1782       CI->eraseFromParent();
1783       return;
1784     }
1785 
1786     if (IsX86 && (Name.startswith("avx512.mask.store"))) {
1787       // "avx512.mask.storeu." or "avx512.mask.store."
1788       bool Aligned = Name[17] != 'u'; // "avx512.mask.storeu".
1789       UpgradeMaskedStore(Builder, CI->getArgOperand(0), CI->getArgOperand(1),
1790                          CI->getArgOperand(2), Aligned);
1791 
1792       // Remove intrinsic.
1793       CI->eraseFromParent();
1794       return;
1795     }
1796 
1797     Value *Rep;
1798     // Upgrade packed integer vector compare intrinsics to compare instructions.
1799     if (IsX86 && (Name.startswith("sse2.pcmp") ||
1800                   Name.startswith("avx2.pcmp"))) {
1801       // "sse2.pcpmpeq." "sse2.pcmpgt." "avx2.pcmpeq." or "avx2.pcmpgt."
1802       bool CmpEq = Name[9] == 'e';
1803       Rep = Builder.CreateICmp(CmpEq ? ICmpInst::ICMP_EQ : ICmpInst::ICMP_SGT,
1804                                CI->getArgOperand(0), CI->getArgOperand(1));
1805       Rep = Builder.CreateSExt(Rep, CI->getType(), "");
1806     } else if (IsX86 && (Name.startswith("avx512.broadcastm"))) {
1807       Type *ExtTy = Type::getInt32Ty(C);
1808       if (CI->getOperand(0)->getType()->isIntegerTy(8))
1809         ExtTy = Type::getInt64Ty(C);
1810       unsigned NumElts = CI->getType()->getPrimitiveSizeInBits() /
1811                          ExtTy->getPrimitiveSizeInBits();
1812       Rep = Builder.CreateZExt(CI->getArgOperand(0), ExtTy);
1813       Rep = Builder.CreateVectorSplat(NumElts, Rep);
1814     } else if (IsX86 && (Name == "sse.sqrt.ss" ||
1815                          Name == "sse2.sqrt.sd")) {
1816       Value *Vec = CI->getArgOperand(0);
1817       Value *Elt0 = Builder.CreateExtractElement(Vec, (uint64_t)0);
1818       Function *Intr = Intrinsic::getDeclaration(F->getParent(),
1819                                                  Intrinsic::sqrt, Elt0->getType());
1820       Elt0 = Builder.CreateCall(Intr, Elt0);
1821       Rep = Builder.CreateInsertElement(Vec, Elt0, (uint64_t)0);
1822     } else if (IsX86 && (Name.startswith("avx.sqrt.p") ||
1823                          Name.startswith("sse2.sqrt.p") ||
1824                          Name.startswith("sse.sqrt.p"))) {
1825       Rep = Builder.CreateCall(Intrinsic::getDeclaration(F->getParent(),
1826                                                          Intrinsic::sqrt,
1827                                                          CI->getType()),
1828                                {CI->getArgOperand(0)});
1829     } else if (IsX86 && (Name.startswith("avx512.mask.sqrt.p"))) {
1830       if (CI->getNumArgOperands() == 4 &&
1831           (!isa<ConstantInt>(CI->getArgOperand(3)) ||
1832            cast<ConstantInt>(CI->getArgOperand(3))->getZExtValue() != 4)) {
1833         Intrinsic::ID IID = Name[18] == 's' ? Intrinsic::x86_avx512_sqrt_ps_512
1834                                             : Intrinsic::x86_avx512_sqrt_pd_512;
1835 
1836         Value *Args[] = { CI->getArgOperand(0), CI->getArgOperand(3) };
1837         Rep = Builder.CreateCall(Intrinsic::getDeclaration(CI->getModule(),
1838                                                            IID), Args);
1839       } else {
1840         Rep = Builder.CreateCall(Intrinsic::getDeclaration(F->getParent(),
1841                                                            Intrinsic::sqrt,
1842                                                            CI->getType()),
1843                                  {CI->getArgOperand(0)});
1844       }
1845       Rep = EmitX86Select(Builder, CI->getArgOperand(2), Rep,
1846                           CI->getArgOperand(1));
1847     } else if (IsX86 && (Name.startswith("avx512.ptestm") ||
1848                          Name.startswith("avx512.ptestnm"))) {
1849       Value *Op0 = CI->getArgOperand(0);
1850       Value *Op1 = CI->getArgOperand(1);
1851       Value *Mask = CI->getArgOperand(2);
1852       Rep = Builder.CreateAnd(Op0, Op1);
1853       llvm::Type *Ty = Op0->getType();
1854       Value *Zero = llvm::Constant::getNullValue(Ty);
1855       ICmpInst::Predicate Pred =
1856         Name.startswith("avx512.ptestm") ? ICmpInst::ICMP_NE : ICmpInst::ICMP_EQ;
1857       Rep = Builder.CreateICmp(Pred, Rep, Zero);
1858       Rep = ApplyX86MaskOn1BitsVec(Builder, Rep, Mask);
1859     } else if (IsX86 && (Name.startswith("avx512.mask.pbroadcast"))){
1860       unsigned NumElts =
1861           CI->getArgOperand(1)->getType()->getVectorNumElements();
1862       Rep = Builder.CreateVectorSplat(NumElts, CI->getArgOperand(0));
1863       Rep = EmitX86Select(Builder, CI->getArgOperand(2), Rep,
1864                           CI->getArgOperand(1));
1865     } else if (IsX86 && (Name.startswith("avx512.kunpck"))) {
1866       unsigned NumElts = CI->getType()->getScalarSizeInBits();
1867       Value *LHS = getX86MaskVec(Builder, CI->getArgOperand(0), NumElts);
1868       Value *RHS = getX86MaskVec(Builder, CI->getArgOperand(1), NumElts);
1869       uint32_t Indices[64];
1870       for (unsigned i = 0; i != NumElts; ++i)
1871         Indices[i] = i;
1872 
1873       // First extract half of each vector. This gives better codegen than
1874       // doing it in a single shuffle.
1875       LHS = Builder.CreateShuffleVector(LHS, LHS,
1876                                         makeArrayRef(Indices, NumElts / 2));
1877       RHS = Builder.CreateShuffleVector(RHS, RHS,
1878                                         makeArrayRef(Indices, NumElts / 2));
1879       // Concat the vectors.
1880       // NOTE: Operands have to be swapped to match intrinsic definition.
1881       Rep = Builder.CreateShuffleVector(RHS, LHS,
1882                                         makeArrayRef(Indices, NumElts));
1883       Rep = Builder.CreateBitCast(Rep, CI->getType());
1884     } else if (IsX86 && Name == "avx512.kand.w") {
1885       Value *LHS = getX86MaskVec(Builder, CI->getArgOperand(0), 16);
1886       Value *RHS = getX86MaskVec(Builder, CI->getArgOperand(1), 16);
1887       Rep = Builder.CreateAnd(LHS, RHS);
1888       Rep = Builder.CreateBitCast(Rep, CI->getType());
1889     } else if (IsX86 && Name == "avx512.kandn.w") {
1890       Value *LHS = getX86MaskVec(Builder, CI->getArgOperand(0), 16);
1891       Value *RHS = getX86MaskVec(Builder, CI->getArgOperand(1), 16);
1892       LHS = Builder.CreateNot(LHS);
1893       Rep = Builder.CreateAnd(LHS, RHS);
1894       Rep = Builder.CreateBitCast(Rep, CI->getType());
1895     } else if (IsX86 && Name == "avx512.kor.w") {
1896       Value *LHS = getX86MaskVec(Builder, CI->getArgOperand(0), 16);
1897       Value *RHS = getX86MaskVec(Builder, CI->getArgOperand(1), 16);
1898       Rep = Builder.CreateOr(LHS, RHS);
1899       Rep = Builder.CreateBitCast(Rep, CI->getType());
1900     } else if (IsX86 && Name == "avx512.kxor.w") {
1901       Value *LHS = getX86MaskVec(Builder, CI->getArgOperand(0), 16);
1902       Value *RHS = getX86MaskVec(Builder, CI->getArgOperand(1), 16);
1903       Rep = Builder.CreateXor(LHS, RHS);
1904       Rep = Builder.CreateBitCast(Rep, CI->getType());
1905     } else if (IsX86 && Name == "avx512.kxnor.w") {
1906       Value *LHS = getX86MaskVec(Builder, CI->getArgOperand(0), 16);
1907       Value *RHS = getX86MaskVec(Builder, CI->getArgOperand(1), 16);
1908       LHS = Builder.CreateNot(LHS);
1909       Rep = Builder.CreateXor(LHS, RHS);
1910       Rep = Builder.CreateBitCast(Rep, CI->getType());
1911     } else if (IsX86 && Name == "avx512.knot.w") {
1912       Rep = getX86MaskVec(Builder, CI->getArgOperand(0), 16);
1913       Rep = Builder.CreateNot(Rep);
1914       Rep = Builder.CreateBitCast(Rep, CI->getType());
1915     } else if (IsX86 &&
1916                (Name == "avx512.kortestz.w" || Name == "avx512.kortestc.w")) {
1917       Value *LHS = getX86MaskVec(Builder, CI->getArgOperand(0), 16);
1918       Value *RHS = getX86MaskVec(Builder, CI->getArgOperand(1), 16);
1919       Rep = Builder.CreateOr(LHS, RHS);
1920       Rep = Builder.CreateBitCast(Rep, Builder.getInt16Ty());
1921       Value *C;
1922       if (Name[14] == 'c')
1923         C = ConstantInt::getAllOnesValue(Builder.getInt16Ty());
1924       else
1925         C = ConstantInt::getNullValue(Builder.getInt16Ty());
1926       Rep = Builder.CreateICmpEQ(Rep, C);
1927       Rep = Builder.CreateZExt(Rep, Builder.getInt32Ty());
1928     } else if (IsX86 && (Name == "sse.add.ss" || Name == "sse2.add.sd" ||
1929                          Name == "sse.sub.ss" || Name == "sse2.sub.sd" ||
1930                          Name == "sse.mul.ss" || Name == "sse2.mul.sd" ||
1931                          Name == "sse.div.ss" || Name == "sse2.div.sd")) {
1932       Type *I32Ty = Type::getInt32Ty(C);
1933       Value *Elt0 = Builder.CreateExtractElement(CI->getArgOperand(0),
1934                                                  ConstantInt::get(I32Ty, 0));
1935       Value *Elt1 = Builder.CreateExtractElement(CI->getArgOperand(1),
1936                                                  ConstantInt::get(I32Ty, 0));
1937       Value *EltOp;
1938       if (Name.contains(".add."))
1939         EltOp = Builder.CreateFAdd(Elt0, Elt1);
1940       else if (Name.contains(".sub."))
1941         EltOp = Builder.CreateFSub(Elt0, Elt1);
1942       else if (Name.contains(".mul."))
1943         EltOp = Builder.CreateFMul(Elt0, Elt1);
1944       else
1945         EltOp = Builder.CreateFDiv(Elt0, Elt1);
1946       Rep = Builder.CreateInsertElement(CI->getArgOperand(0), EltOp,
1947                                         ConstantInt::get(I32Ty, 0));
1948     } else if (IsX86 && Name.startswith("avx512.mask.pcmp")) {
1949       // "avx512.mask.pcmpeq." or "avx512.mask.pcmpgt."
1950       bool CmpEq = Name[16] == 'e';
1951       Rep = upgradeMaskedCompare(Builder, *CI, CmpEq ? 0 : 6, true);
1952     } else if (IsX86 && Name.startswith("avx512.mask.vpshufbitqmb.")) {
1953       Type *OpTy = CI->getArgOperand(0)->getType();
1954       unsigned VecWidth = OpTy->getPrimitiveSizeInBits();
1955       Intrinsic::ID IID;
1956       switch (VecWidth) {
1957       default: llvm_unreachable("Unexpected intrinsic");
1958       case 128: IID = Intrinsic::x86_avx512_vpshufbitqmb_128; break;
1959       case 256: IID = Intrinsic::x86_avx512_vpshufbitqmb_256; break;
1960       case 512: IID = Intrinsic::x86_avx512_vpshufbitqmb_512; break;
1961       }
1962 
1963       Rep = Builder.CreateCall(Intrinsic::getDeclaration(F->getParent(), IID),
1964                                { CI->getOperand(0), CI->getArgOperand(1) });
1965       Rep = ApplyX86MaskOn1BitsVec(Builder, Rep, CI->getArgOperand(2));
1966     } else if (IsX86 && Name.startswith("avx512.mask.fpclass.p")) {
1967       Type *OpTy = CI->getArgOperand(0)->getType();
1968       unsigned VecWidth = OpTy->getPrimitiveSizeInBits();
1969       unsigned EltWidth = OpTy->getScalarSizeInBits();
1970       Intrinsic::ID IID;
1971       if (VecWidth == 128 && EltWidth == 32)
1972         IID = Intrinsic::x86_avx512_fpclass_ps_128;
1973       else if (VecWidth == 256 && EltWidth == 32)
1974         IID = Intrinsic::x86_avx512_fpclass_ps_256;
1975       else if (VecWidth == 512 && EltWidth == 32)
1976         IID = Intrinsic::x86_avx512_fpclass_ps_512;
1977       else if (VecWidth == 128 && EltWidth == 64)
1978         IID = Intrinsic::x86_avx512_fpclass_pd_128;
1979       else if (VecWidth == 256 && EltWidth == 64)
1980         IID = Intrinsic::x86_avx512_fpclass_pd_256;
1981       else if (VecWidth == 512 && EltWidth == 64)
1982         IID = Intrinsic::x86_avx512_fpclass_pd_512;
1983       else
1984         llvm_unreachable("Unexpected intrinsic");
1985 
1986       Rep = Builder.CreateCall(Intrinsic::getDeclaration(F->getParent(), IID),
1987                                { CI->getOperand(0), CI->getArgOperand(1) });
1988       Rep = ApplyX86MaskOn1BitsVec(Builder, Rep, CI->getArgOperand(2));
1989     } else if (IsX86 && Name.startswith("avx512.mask.cmp.p")) {
1990       Type *OpTy = CI->getArgOperand(0)->getType();
1991       unsigned VecWidth = OpTy->getPrimitiveSizeInBits();
1992       unsigned EltWidth = OpTy->getScalarSizeInBits();
1993       Intrinsic::ID IID;
1994       if (VecWidth == 128 && EltWidth == 32)
1995         IID = Intrinsic::x86_avx512_cmp_ps_128;
1996       else if (VecWidth == 256 && EltWidth == 32)
1997         IID = Intrinsic::x86_avx512_cmp_ps_256;
1998       else if (VecWidth == 512 && EltWidth == 32)
1999         IID = Intrinsic::x86_avx512_cmp_ps_512;
2000       else if (VecWidth == 128 && EltWidth == 64)
2001         IID = Intrinsic::x86_avx512_cmp_pd_128;
2002       else if (VecWidth == 256 && EltWidth == 64)
2003         IID = Intrinsic::x86_avx512_cmp_pd_256;
2004       else if (VecWidth == 512 && EltWidth == 64)
2005         IID = Intrinsic::x86_avx512_cmp_pd_512;
2006       else
2007         llvm_unreachable("Unexpected intrinsic");
2008 
2009       SmallVector<Value *, 4> Args;
2010       Args.push_back(CI->getArgOperand(0));
2011       Args.push_back(CI->getArgOperand(1));
2012       Args.push_back(CI->getArgOperand(2));
2013       if (CI->getNumArgOperands() == 5)
2014         Args.push_back(CI->getArgOperand(4));
2015 
2016       Rep = Builder.CreateCall(Intrinsic::getDeclaration(F->getParent(), IID),
2017                                Args);
2018       Rep = ApplyX86MaskOn1BitsVec(Builder, Rep, CI->getArgOperand(3));
2019     } else if (IsX86 && Name.startswith("avx512.mask.cmp.") &&
2020                Name[16] != 'p') {
2021       // Integer compare intrinsics.
2022       unsigned Imm = cast<ConstantInt>(CI->getArgOperand(2))->getZExtValue();
2023       Rep = upgradeMaskedCompare(Builder, *CI, Imm, true);
2024     } else if (IsX86 && Name.startswith("avx512.mask.ucmp.")) {
2025       unsigned Imm = cast<ConstantInt>(CI->getArgOperand(2))->getZExtValue();
2026       Rep = upgradeMaskedCompare(Builder, *CI, Imm, false);
2027     } else if (IsX86 && (Name.startswith("avx512.cvtb2mask.") ||
2028                          Name.startswith("avx512.cvtw2mask.") ||
2029                          Name.startswith("avx512.cvtd2mask.") ||
2030                          Name.startswith("avx512.cvtq2mask."))) {
2031       Value *Op = CI->getArgOperand(0);
2032       Value *Zero = llvm::Constant::getNullValue(Op->getType());
2033       Rep = Builder.CreateICmp(ICmpInst::ICMP_SLT, Op, Zero);
2034       Rep = ApplyX86MaskOn1BitsVec(Builder, Rep, nullptr);
2035     } else if(IsX86 && (Name == "ssse3.pabs.b.128" ||
2036                         Name == "ssse3.pabs.w.128" ||
2037                         Name == "ssse3.pabs.d.128" ||
2038                         Name.startswith("avx2.pabs") ||
2039                         Name.startswith("avx512.mask.pabs"))) {
2040       Rep = upgradeAbs(Builder, *CI);
2041     } else if (IsX86 && (Name == "sse41.pmaxsb" ||
2042                          Name == "sse2.pmaxs.w" ||
2043                          Name == "sse41.pmaxsd" ||
2044                          Name.startswith("avx2.pmaxs") ||
2045                          Name.startswith("avx512.mask.pmaxs"))) {
2046       Rep = upgradeIntMinMax(Builder, *CI, ICmpInst::ICMP_SGT);
2047     } else if (IsX86 && (Name == "sse2.pmaxu.b" ||
2048                          Name == "sse41.pmaxuw" ||
2049                          Name == "sse41.pmaxud" ||
2050                          Name.startswith("avx2.pmaxu") ||
2051                          Name.startswith("avx512.mask.pmaxu"))) {
2052       Rep = upgradeIntMinMax(Builder, *CI, ICmpInst::ICMP_UGT);
2053     } else if (IsX86 && (Name == "sse41.pminsb" ||
2054                          Name == "sse2.pmins.w" ||
2055                          Name == "sse41.pminsd" ||
2056                          Name.startswith("avx2.pmins") ||
2057                          Name.startswith("avx512.mask.pmins"))) {
2058       Rep = upgradeIntMinMax(Builder, *CI, ICmpInst::ICMP_SLT);
2059     } else if (IsX86 && (Name == "sse2.pminu.b" ||
2060                          Name == "sse41.pminuw" ||
2061                          Name == "sse41.pminud" ||
2062                          Name.startswith("avx2.pminu") ||
2063                          Name.startswith("avx512.mask.pminu"))) {
2064       Rep = upgradeIntMinMax(Builder, *CI, ICmpInst::ICMP_ULT);
2065     } else if (IsX86 && (Name == "sse2.pmulu.dq" ||
2066                          Name == "avx2.pmulu.dq" ||
2067                          Name == "avx512.pmulu.dq.512" ||
2068                          Name.startswith("avx512.mask.pmulu.dq."))) {
2069       Rep = upgradePMULDQ(Builder, *CI, /*Signed*/false);
2070     } else if (IsX86 && (Name == "sse41.pmuldq" ||
2071                          Name == "avx2.pmul.dq" ||
2072                          Name == "avx512.pmul.dq.512" ||
2073                          Name.startswith("avx512.mask.pmul.dq."))) {
2074       Rep = upgradePMULDQ(Builder, *CI, /*Signed*/true);
2075     } else if (IsX86 && (Name == "sse.cvtsi2ss" ||
2076                          Name == "sse2.cvtsi2sd" ||
2077                          Name == "sse.cvtsi642ss" ||
2078                          Name == "sse2.cvtsi642sd")) {
2079       Rep = Builder.CreateSIToFP(CI->getArgOperand(1),
2080                                  CI->getType()->getVectorElementType());
2081       Rep = Builder.CreateInsertElement(CI->getArgOperand(0), Rep, (uint64_t)0);
2082     } else if (IsX86 && Name == "avx512.cvtusi2sd") {
2083       Rep = Builder.CreateUIToFP(CI->getArgOperand(1),
2084                                  CI->getType()->getVectorElementType());
2085       Rep = Builder.CreateInsertElement(CI->getArgOperand(0), Rep, (uint64_t)0);
2086     } else if (IsX86 && Name == "sse2.cvtss2sd") {
2087       Rep = Builder.CreateExtractElement(CI->getArgOperand(1), (uint64_t)0);
2088       Rep = Builder.CreateFPExt(Rep, CI->getType()->getVectorElementType());
2089       Rep = Builder.CreateInsertElement(CI->getArgOperand(0), Rep, (uint64_t)0);
2090     } else if (IsX86 && (Name == "sse2.cvtdq2pd" ||
2091                          Name == "sse2.cvtdq2ps" ||
2092                          Name == "avx.cvtdq2.pd.256" ||
2093                          Name == "avx.cvtdq2.ps.256" ||
2094                          Name.startswith("avx512.mask.cvtdq2pd.") ||
2095                          Name.startswith("avx512.mask.cvtudq2pd.") ||
2096                          Name.startswith("avx512.mask.cvtdq2ps.") ||
2097                          Name.startswith("avx512.mask.cvtudq2ps.") ||
2098                          Name.startswith("avx512.mask.cvtqq2pd.") ||
2099                          Name.startswith("avx512.mask.cvtuqq2pd.") ||
2100                          Name == "avx512.mask.cvtqq2ps.256" ||
2101                          Name == "avx512.mask.cvtqq2ps.512" ||
2102                          Name == "avx512.mask.cvtuqq2ps.256" ||
2103                          Name == "avx512.mask.cvtuqq2ps.512" ||
2104                          Name == "sse2.cvtps2pd" ||
2105                          Name == "avx.cvt.ps2.pd.256" ||
2106                          Name == "avx512.mask.cvtps2pd.128" ||
2107                          Name == "avx512.mask.cvtps2pd.256")) {
2108       Type *DstTy = CI->getType();
2109       Rep = CI->getArgOperand(0);
2110       Type *SrcTy = Rep->getType();
2111 
2112       unsigned NumDstElts = DstTy->getVectorNumElements();
2113       if (NumDstElts < SrcTy->getVectorNumElements()) {
2114         assert(NumDstElts == 2 && "Unexpected vector size");
2115         uint32_t ShuffleMask[2] = { 0, 1 };
2116         Rep = Builder.CreateShuffleVector(Rep, Rep, ShuffleMask);
2117       }
2118 
2119       bool IsPS2PD = SrcTy->getVectorElementType()->isFloatTy();
2120       bool IsUnsigned = (StringRef::npos != Name.find("cvtu"));
2121       if (IsPS2PD)
2122         Rep = Builder.CreateFPExt(Rep, DstTy, "cvtps2pd");
2123       else if (CI->getNumArgOperands() == 4 &&
2124                (!isa<ConstantInt>(CI->getArgOperand(3)) ||
2125                 cast<ConstantInt>(CI->getArgOperand(3))->getZExtValue() != 4)) {
2126         Intrinsic::ID IID = IsUnsigned ? Intrinsic::x86_avx512_uitofp_round
2127                                        : Intrinsic::x86_avx512_sitofp_round;
2128         Function *F = Intrinsic::getDeclaration(CI->getModule(), IID,
2129                                                 { DstTy, SrcTy });
2130         Rep = Builder.CreateCall(F, { Rep, CI->getArgOperand(3) });
2131       } else {
2132         Rep = IsUnsigned ? Builder.CreateUIToFP(Rep, DstTy, "cvt")
2133                          : Builder.CreateSIToFP(Rep, DstTy, "cvt");
2134       }
2135 
2136       if (CI->getNumArgOperands() >= 3)
2137         Rep = EmitX86Select(Builder, CI->getArgOperand(2), Rep,
2138                             CI->getArgOperand(1));
2139     } else if (IsX86 && (Name.startswith("avx512.mask.loadu."))) {
2140       Rep = UpgradeMaskedLoad(Builder, CI->getArgOperand(0),
2141                               CI->getArgOperand(1), CI->getArgOperand(2),
2142                               /*Aligned*/false);
2143     } else if (IsX86 && (Name.startswith("avx512.mask.load."))) {
2144       Rep = UpgradeMaskedLoad(Builder, CI->getArgOperand(0),
2145                               CI->getArgOperand(1),CI->getArgOperand(2),
2146                               /*Aligned*/true);
2147     } else if (IsX86 && Name.startswith("avx512.mask.expand.load.")) {
2148       Type *ResultTy = CI->getType();
2149       Type *PtrTy = ResultTy->getVectorElementType();
2150 
2151       // Cast the pointer to element type.
2152       Value *Ptr = Builder.CreateBitCast(CI->getOperand(0),
2153                                          llvm::PointerType::getUnqual(PtrTy));
2154 
2155       Value *MaskVec = getX86MaskVec(Builder, CI->getArgOperand(2),
2156                                      ResultTy->getVectorNumElements());
2157 
2158       Function *ELd = Intrinsic::getDeclaration(F->getParent(),
2159                                                 Intrinsic::masked_expandload,
2160                                                 ResultTy);
2161       Rep = Builder.CreateCall(ELd, { Ptr, MaskVec, CI->getOperand(1) });
2162     } else if (IsX86 && Name.startswith("avx512.mask.compress.store.")) {
2163       Type *ResultTy = CI->getArgOperand(1)->getType();
2164       Type *PtrTy = ResultTy->getVectorElementType();
2165 
2166       // Cast the pointer to element type.
2167       Value *Ptr = Builder.CreateBitCast(CI->getOperand(0),
2168                                          llvm::PointerType::getUnqual(PtrTy));
2169 
2170       Value *MaskVec = getX86MaskVec(Builder, CI->getArgOperand(2),
2171                                      ResultTy->getVectorNumElements());
2172 
2173       Function *CSt = Intrinsic::getDeclaration(F->getParent(),
2174                                                 Intrinsic::masked_compressstore,
2175                                                 ResultTy);
2176       Rep = Builder.CreateCall(CSt, { CI->getArgOperand(1), Ptr, MaskVec });
2177     } else if (IsX86 && (Name.startswith("avx512.mask.compress.") ||
2178                          Name.startswith("avx512.mask.expand."))) {
2179       Type *ResultTy = CI->getType();
2180 
2181       Value *MaskVec = getX86MaskVec(Builder, CI->getArgOperand(2),
2182                                      ResultTy->getVectorNumElements());
2183 
2184       bool IsCompress = Name[12] == 'c';
2185       Intrinsic::ID IID = IsCompress ? Intrinsic::x86_avx512_mask_compress
2186                                      : Intrinsic::x86_avx512_mask_expand;
2187       Function *Intr = Intrinsic::getDeclaration(F->getParent(), IID, ResultTy);
2188       Rep = Builder.CreateCall(Intr, { CI->getOperand(0), CI->getOperand(1),
2189                                        MaskVec });
2190     } else if (IsX86 && Name.startswith("xop.vpcom")) {
2191       bool IsSigned;
2192       if (Name.endswith("ub") || Name.endswith("uw") || Name.endswith("ud") ||
2193           Name.endswith("uq"))
2194         IsSigned = false;
2195       else if (Name.endswith("b") || Name.endswith("w") || Name.endswith("d") ||
2196                Name.endswith("q"))
2197         IsSigned = true;
2198       else
2199         llvm_unreachable("Unknown suffix");
2200 
2201       unsigned Imm;
2202       if (CI->getNumArgOperands() == 3) {
2203         Imm = cast<ConstantInt>(CI->getArgOperand(2))->getZExtValue();
2204       } else {
2205         Name = Name.substr(9); // strip off "xop.vpcom"
2206         if (Name.startswith("lt"))
2207           Imm = 0;
2208         else if (Name.startswith("le"))
2209           Imm = 1;
2210         else if (Name.startswith("gt"))
2211           Imm = 2;
2212         else if (Name.startswith("ge"))
2213           Imm = 3;
2214         else if (Name.startswith("eq"))
2215           Imm = 4;
2216         else if (Name.startswith("ne"))
2217           Imm = 5;
2218         else if (Name.startswith("false"))
2219           Imm = 6;
2220         else if (Name.startswith("true"))
2221           Imm = 7;
2222         else
2223           llvm_unreachable("Unknown condition");
2224       }
2225 
2226       Rep = upgradeX86vpcom(Builder, *CI, Imm, IsSigned);
2227     } else if (IsX86 && Name.startswith("xop.vpcmov")) {
2228       Value *Sel = CI->getArgOperand(2);
2229       Value *NotSel = Builder.CreateNot(Sel);
2230       Value *Sel0 = Builder.CreateAnd(CI->getArgOperand(0), Sel);
2231       Value *Sel1 = Builder.CreateAnd(CI->getArgOperand(1), NotSel);
2232       Rep = Builder.CreateOr(Sel0, Sel1);
2233     } else if (IsX86 && (Name.startswith("xop.vprot") ||
2234                          Name.startswith("avx512.prol") ||
2235                          Name.startswith("avx512.mask.prol"))) {
2236       Rep = upgradeX86Rotate(Builder, *CI, false);
2237     } else if (IsX86 && (Name.startswith("avx512.pror") ||
2238                          Name.startswith("avx512.mask.pror"))) {
2239       Rep = upgradeX86Rotate(Builder, *CI, true);
2240     } else if (IsX86 && (Name.startswith("avx512.vpshld.") ||
2241                          Name.startswith("avx512.mask.vpshld") ||
2242                          Name.startswith("avx512.maskz.vpshld"))) {
2243       bool ZeroMask = Name[11] == 'z';
2244       Rep = upgradeX86ConcatShift(Builder, *CI, false, ZeroMask);
2245     } else if (IsX86 && (Name.startswith("avx512.vpshrd.") ||
2246                          Name.startswith("avx512.mask.vpshrd") ||
2247                          Name.startswith("avx512.maskz.vpshrd"))) {
2248       bool ZeroMask = Name[11] == 'z';
2249       Rep = upgradeX86ConcatShift(Builder, *CI, true, ZeroMask);
2250     } else if (IsX86 && Name == "sse42.crc32.64.8") {
2251       Function *CRC32 = Intrinsic::getDeclaration(F->getParent(),
2252                                                Intrinsic::x86_sse42_crc32_32_8);
2253       Value *Trunc0 = Builder.CreateTrunc(CI->getArgOperand(0), Type::getInt32Ty(C));
2254       Rep = Builder.CreateCall(CRC32, {Trunc0, CI->getArgOperand(1)});
2255       Rep = Builder.CreateZExt(Rep, CI->getType(), "");
2256     } else if (IsX86 && (Name.startswith("avx.vbroadcast.s") ||
2257                          Name.startswith("avx512.vbroadcast.s"))) {
2258       // Replace broadcasts with a series of insertelements.
2259       Type *VecTy = CI->getType();
2260       Type *EltTy = VecTy->getVectorElementType();
2261       unsigned EltNum = VecTy->getVectorNumElements();
2262       Value *Cast = Builder.CreateBitCast(CI->getArgOperand(0),
2263                                           EltTy->getPointerTo());
2264       Value *Load = Builder.CreateLoad(EltTy, Cast);
2265       Type *I32Ty = Type::getInt32Ty(C);
2266       Rep = UndefValue::get(VecTy);
2267       for (unsigned I = 0; I < EltNum; ++I)
2268         Rep = Builder.CreateInsertElement(Rep, Load,
2269                                           ConstantInt::get(I32Ty, I));
2270     } else if (IsX86 && (Name.startswith("sse41.pmovsx") ||
2271                          Name.startswith("sse41.pmovzx") ||
2272                          Name.startswith("avx2.pmovsx") ||
2273                          Name.startswith("avx2.pmovzx") ||
2274                          Name.startswith("avx512.mask.pmovsx") ||
2275                          Name.startswith("avx512.mask.pmovzx"))) {
2276       VectorType *SrcTy = cast<VectorType>(CI->getArgOperand(0)->getType());
2277       VectorType *DstTy = cast<VectorType>(CI->getType());
2278       unsigned NumDstElts = DstTy->getNumElements();
2279 
2280       // Extract a subvector of the first NumDstElts lanes and sign/zero extend.
2281       SmallVector<uint32_t, 8> ShuffleMask(NumDstElts);
2282       for (unsigned i = 0; i != NumDstElts; ++i)
2283         ShuffleMask[i] = i;
2284 
2285       Value *SV = Builder.CreateShuffleVector(
2286           CI->getArgOperand(0), UndefValue::get(SrcTy), ShuffleMask);
2287 
2288       bool DoSext = (StringRef::npos != Name.find("pmovsx"));
2289       Rep = DoSext ? Builder.CreateSExt(SV, DstTy)
2290                    : Builder.CreateZExt(SV, DstTy);
2291       // If there are 3 arguments, it's a masked intrinsic so we need a select.
2292       if (CI->getNumArgOperands() == 3)
2293         Rep = EmitX86Select(Builder, CI->getArgOperand(2), Rep,
2294                             CI->getArgOperand(1));
2295     } else if (Name == "avx512.mask.pmov.qd.256" ||
2296                Name == "avx512.mask.pmov.qd.512" ||
2297                Name == "avx512.mask.pmov.wb.256" ||
2298                Name == "avx512.mask.pmov.wb.512") {
2299       Type *Ty = CI->getArgOperand(1)->getType();
2300       Rep = Builder.CreateTrunc(CI->getArgOperand(0), Ty);
2301       Rep = EmitX86Select(Builder, CI->getArgOperand(2), Rep,
2302                           CI->getArgOperand(1));
2303     } else if (IsX86 && (Name.startswith("avx.vbroadcastf128") ||
2304                          Name == "avx2.vbroadcasti128")) {
2305       // Replace vbroadcastf128/vbroadcasti128 with a vector load+shuffle.
2306       Type *EltTy = CI->getType()->getVectorElementType();
2307       unsigned NumSrcElts = 128 / EltTy->getPrimitiveSizeInBits();
2308       Type *VT = VectorType::get(EltTy, NumSrcElts);
2309       Value *Op = Builder.CreatePointerCast(CI->getArgOperand(0),
2310                                             PointerType::getUnqual(VT));
2311       Value *Load = Builder.CreateAlignedLoad(VT, Op, Align(1));
2312       if (NumSrcElts == 2)
2313         Rep = Builder.CreateShuffleVector(Load, UndefValue::get(Load->getType()),
2314                                           { 0, 1, 0, 1 });
2315       else
2316         Rep = Builder.CreateShuffleVector(Load, UndefValue::get(Load->getType()),
2317                                           { 0, 1, 2, 3, 0, 1, 2, 3 });
2318     } else if (IsX86 && (Name.startswith("avx512.mask.shuf.i") ||
2319                          Name.startswith("avx512.mask.shuf.f"))) {
2320       unsigned Imm = cast<ConstantInt>(CI->getArgOperand(2))->getZExtValue();
2321       Type *VT = CI->getType();
2322       unsigned NumLanes = VT->getPrimitiveSizeInBits() / 128;
2323       unsigned NumElementsInLane = 128 / VT->getScalarSizeInBits();
2324       unsigned ControlBitsMask = NumLanes - 1;
2325       unsigned NumControlBits = NumLanes / 2;
2326       SmallVector<uint32_t, 8> ShuffleMask(0);
2327 
2328       for (unsigned l = 0; l != NumLanes; ++l) {
2329         unsigned LaneMask = (Imm >> (l * NumControlBits)) & ControlBitsMask;
2330         // We actually need the other source.
2331         if (l >= NumLanes / 2)
2332           LaneMask += NumLanes;
2333         for (unsigned i = 0; i != NumElementsInLane; ++i)
2334           ShuffleMask.push_back(LaneMask * NumElementsInLane + i);
2335       }
2336       Rep = Builder.CreateShuffleVector(CI->getArgOperand(0),
2337                                         CI->getArgOperand(1), ShuffleMask);
2338       Rep = EmitX86Select(Builder, CI->getArgOperand(4), Rep,
2339                           CI->getArgOperand(3));
2340     }else if (IsX86 && (Name.startswith("avx512.mask.broadcastf") ||
2341                          Name.startswith("avx512.mask.broadcasti"))) {
2342       unsigned NumSrcElts =
2343                         CI->getArgOperand(0)->getType()->getVectorNumElements();
2344       unsigned NumDstElts = CI->getType()->getVectorNumElements();
2345 
2346       SmallVector<uint32_t, 8> ShuffleMask(NumDstElts);
2347       for (unsigned i = 0; i != NumDstElts; ++i)
2348         ShuffleMask[i] = i % NumSrcElts;
2349 
2350       Rep = Builder.CreateShuffleVector(CI->getArgOperand(0),
2351                                         CI->getArgOperand(0),
2352                                         ShuffleMask);
2353       Rep = EmitX86Select(Builder, CI->getArgOperand(2), Rep,
2354                           CI->getArgOperand(1));
2355     } else if (IsX86 && (Name.startswith("avx2.pbroadcast") ||
2356                          Name.startswith("avx2.vbroadcast") ||
2357                          Name.startswith("avx512.pbroadcast") ||
2358                          Name.startswith("avx512.mask.broadcast.s"))) {
2359       // Replace vp?broadcasts with a vector shuffle.
2360       Value *Op = CI->getArgOperand(0);
2361       unsigned NumElts = CI->getType()->getVectorNumElements();
2362       Type *MaskTy = VectorType::get(Type::getInt32Ty(C), NumElts);
2363       Rep = Builder.CreateShuffleVector(Op, UndefValue::get(Op->getType()),
2364                                         Constant::getNullValue(MaskTy));
2365 
2366       if (CI->getNumArgOperands() == 3)
2367         Rep = EmitX86Select(Builder, CI->getArgOperand(2), Rep,
2368                             CI->getArgOperand(1));
2369     } else if (IsX86 && (Name.startswith("sse2.padds.") ||
2370                          Name.startswith("sse2.psubs.") ||
2371                          Name.startswith("avx2.padds.") ||
2372                          Name.startswith("avx2.psubs.") ||
2373                          Name.startswith("avx512.padds.") ||
2374                          Name.startswith("avx512.psubs.") ||
2375                          Name.startswith("avx512.mask.padds.") ||
2376                          Name.startswith("avx512.mask.psubs."))) {
2377       bool IsAdd = Name.contains(".padds");
2378       Rep = UpgradeX86AddSubSatIntrinsics(Builder, *CI, true, IsAdd);
2379     } else if (IsX86 && (Name.startswith("sse2.paddus.") ||
2380                          Name.startswith("sse2.psubus.") ||
2381                          Name.startswith("avx2.paddus.") ||
2382                          Name.startswith("avx2.psubus.") ||
2383                          Name.startswith("avx512.mask.paddus.") ||
2384                          Name.startswith("avx512.mask.psubus."))) {
2385       bool IsAdd = Name.contains(".paddus");
2386       Rep = UpgradeX86AddSubSatIntrinsics(Builder, *CI, false, IsAdd);
2387     } else if (IsX86 && Name.startswith("avx512.mask.palignr.")) {
2388       Rep = UpgradeX86ALIGNIntrinsics(Builder, CI->getArgOperand(0),
2389                                       CI->getArgOperand(1),
2390                                       CI->getArgOperand(2),
2391                                       CI->getArgOperand(3),
2392                                       CI->getArgOperand(4),
2393                                       false);
2394     } else if (IsX86 && Name.startswith("avx512.mask.valign.")) {
2395       Rep = UpgradeX86ALIGNIntrinsics(Builder, CI->getArgOperand(0),
2396                                       CI->getArgOperand(1),
2397                                       CI->getArgOperand(2),
2398                                       CI->getArgOperand(3),
2399                                       CI->getArgOperand(4),
2400                                       true);
2401     } else if (IsX86 && (Name == "sse2.psll.dq" ||
2402                          Name == "avx2.psll.dq")) {
2403       // 128/256-bit shift left specified in bits.
2404       unsigned Shift = cast<ConstantInt>(CI->getArgOperand(1))->getZExtValue();
2405       Rep = UpgradeX86PSLLDQIntrinsics(Builder, CI->getArgOperand(0),
2406                                        Shift / 8); // Shift is in bits.
2407     } else if (IsX86 && (Name == "sse2.psrl.dq" ||
2408                          Name == "avx2.psrl.dq")) {
2409       // 128/256-bit shift right specified in bits.
2410       unsigned Shift = cast<ConstantInt>(CI->getArgOperand(1))->getZExtValue();
2411       Rep = UpgradeX86PSRLDQIntrinsics(Builder, CI->getArgOperand(0),
2412                                        Shift / 8); // Shift is in bits.
2413     } else if (IsX86 && (Name == "sse2.psll.dq.bs" ||
2414                          Name == "avx2.psll.dq.bs" ||
2415                          Name == "avx512.psll.dq.512")) {
2416       // 128/256/512-bit shift left specified in bytes.
2417       unsigned Shift = cast<ConstantInt>(CI->getArgOperand(1))->getZExtValue();
2418       Rep = UpgradeX86PSLLDQIntrinsics(Builder, CI->getArgOperand(0), Shift);
2419     } else if (IsX86 && (Name == "sse2.psrl.dq.bs" ||
2420                          Name == "avx2.psrl.dq.bs" ||
2421                          Name == "avx512.psrl.dq.512")) {
2422       // 128/256/512-bit shift right specified in bytes.
2423       unsigned Shift = cast<ConstantInt>(CI->getArgOperand(1))->getZExtValue();
2424       Rep = UpgradeX86PSRLDQIntrinsics(Builder, CI->getArgOperand(0), Shift);
2425     } else if (IsX86 && (Name == "sse41.pblendw" ||
2426                          Name.startswith("sse41.blendp") ||
2427                          Name.startswith("avx.blend.p") ||
2428                          Name == "avx2.pblendw" ||
2429                          Name.startswith("avx2.pblendd."))) {
2430       Value *Op0 = CI->getArgOperand(0);
2431       Value *Op1 = CI->getArgOperand(1);
2432       unsigned Imm = cast <ConstantInt>(CI->getArgOperand(2))->getZExtValue();
2433       VectorType *VecTy = cast<VectorType>(CI->getType());
2434       unsigned NumElts = VecTy->getNumElements();
2435 
2436       SmallVector<uint32_t, 16> Idxs(NumElts);
2437       for (unsigned i = 0; i != NumElts; ++i)
2438         Idxs[i] = ((Imm >> (i%8)) & 1) ? i + NumElts : i;
2439 
2440       Rep = Builder.CreateShuffleVector(Op0, Op1, Idxs);
2441     } else if (IsX86 && (Name.startswith("avx.vinsertf128.") ||
2442                          Name == "avx2.vinserti128" ||
2443                          Name.startswith("avx512.mask.insert"))) {
2444       Value *Op0 = CI->getArgOperand(0);
2445       Value *Op1 = CI->getArgOperand(1);
2446       unsigned Imm = cast<ConstantInt>(CI->getArgOperand(2))->getZExtValue();
2447       unsigned DstNumElts = CI->getType()->getVectorNumElements();
2448       unsigned SrcNumElts = Op1->getType()->getVectorNumElements();
2449       unsigned Scale = DstNumElts / SrcNumElts;
2450 
2451       // Mask off the high bits of the immediate value; hardware ignores those.
2452       Imm = Imm % Scale;
2453 
2454       // Extend the second operand into a vector the size of the destination.
2455       Value *UndefV = UndefValue::get(Op1->getType());
2456       SmallVector<uint32_t, 8> Idxs(DstNumElts);
2457       for (unsigned i = 0; i != SrcNumElts; ++i)
2458         Idxs[i] = i;
2459       for (unsigned i = SrcNumElts; i != DstNumElts; ++i)
2460         Idxs[i] = SrcNumElts;
2461       Rep = Builder.CreateShuffleVector(Op1, UndefV, Idxs);
2462 
2463       // Insert the second operand into the first operand.
2464 
2465       // Note that there is no guarantee that instruction lowering will actually
2466       // produce a vinsertf128 instruction for the created shuffles. In
2467       // particular, the 0 immediate case involves no lane changes, so it can
2468       // be handled as a blend.
2469 
2470       // Example of shuffle mask for 32-bit elements:
2471       // Imm = 1  <i32 0, i32 1, i32 2,  i32 3,  i32 8, i32 9, i32 10, i32 11>
2472       // Imm = 0  <i32 8, i32 9, i32 10, i32 11, i32 4, i32 5, i32 6,  i32 7 >
2473 
2474       // First fill with identify mask.
2475       for (unsigned i = 0; i != DstNumElts; ++i)
2476         Idxs[i] = i;
2477       // Then replace the elements where we need to insert.
2478       for (unsigned i = 0; i != SrcNumElts; ++i)
2479         Idxs[i + Imm * SrcNumElts] = i + DstNumElts;
2480       Rep = Builder.CreateShuffleVector(Op0, Rep, Idxs);
2481 
2482       // If the intrinsic has a mask operand, handle that.
2483       if (CI->getNumArgOperands() == 5)
2484         Rep = EmitX86Select(Builder, CI->getArgOperand(4), Rep,
2485                             CI->getArgOperand(3));
2486     } else if (IsX86 && (Name.startswith("avx.vextractf128.") ||
2487                          Name == "avx2.vextracti128" ||
2488                          Name.startswith("avx512.mask.vextract"))) {
2489       Value *Op0 = CI->getArgOperand(0);
2490       unsigned Imm = cast<ConstantInt>(CI->getArgOperand(1))->getZExtValue();
2491       unsigned DstNumElts = CI->getType()->getVectorNumElements();
2492       unsigned SrcNumElts = Op0->getType()->getVectorNumElements();
2493       unsigned Scale = SrcNumElts / DstNumElts;
2494 
2495       // Mask off the high bits of the immediate value; hardware ignores those.
2496       Imm = Imm % Scale;
2497 
2498       // Get indexes for the subvector of the input vector.
2499       SmallVector<uint32_t, 8> Idxs(DstNumElts);
2500       for (unsigned i = 0; i != DstNumElts; ++i) {
2501         Idxs[i] = i + (Imm * DstNumElts);
2502       }
2503       Rep = Builder.CreateShuffleVector(Op0, Op0, Idxs);
2504 
2505       // If the intrinsic has a mask operand, handle that.
2506       if (CI->getNumArgOperands() == 4)
2507         Rep = EmitX86Select(Builder, CI->getArgOperand(3), Rep,
2508                             CI->getArgOperand(2));
2509     } else if (!IsX86 && Name == "stackprotectorcheck") {
2510       Rep = nullptr;
2511     } else if (IsX86 && (Name.startswith("avx512.mask.perm.df.") ||
2512                          Name.startswith("avx512.mask.perm.di."))) {
2513       Value *Op0 = CI->getArgOperand(0);
2514       unsigned Imm = cast<ConstantInt>(CI->getArgOperand(1))->getZExtValue();
2515       VectorType *VecTy = cast<VectorType>(CI->getType());
2516       unsigned NumElts = VecTy->getNumElements();
2517 
2518       SmallVector<uint32_t, 8> Idxs(NumElts);
2519       for (unsigned i = 0; i != NumElts; ++i)
2520         Idxs[i] = (i & ~0x3) + ((Imm >> (2 * (i & 0x3))) & 3);
2521 
2522       Rep = Builder.CreateShuffleVector(Op0, Op0, Idxs);
2523 
2524       if (CI->getNumArgOperands() == 4)
2525         Rep = EmitX86Select(Builder, CI->getArgOperand(3), Rep,
2526                             CI->getArgOperand(2));
2527     } else if (IsX86 && (Name.startswith("avx.vperm2f128.") ||
2528                          Name == "avx2.vperm2i128")) {
2529       // The immediate permute control byte looks like this:
2530       //    [1:0] - select 128 bits from sources for low half of destination
2531       //    [2]   - ignore
2532       //    [3]   - zero low half of destination
2533       //    [5:4] - select 128 bits from sources for high half of destination
2534       //    [6]   - ignore
2535       //    [7]   - zero high half of destination
2536 
2537       uint8_t Imm = cast<ConstantInt>(CI->getArgOperand(2))->getZExtValue();
2538 
2539       unsigned NumElts = CI->getType()->getVectorNumElements();
2540       unsigned HalfSize = NumElts / 2;
2541       SmallVector<uint32_t, 8> ShuffleMask(NumElts);
2542 
2543       // Determine which operand(s) are actually in use for this instruction.
2544       Value *V0 = (Imm & 0x02) ? CI->getArgOperand(1) : CI->getArgOperand(0);
2545       Value *V1 = (Imm & 0x20) ? CI->getArgOperand(1) : CI->getArgOperand(0);
2546 
2547       // If needed, replace operands based on zero mask.
2548       V0 = (Imm & 0x08) ? ConstantAggregateZero::get(CI->getType()) : V0;
2549       V1 = (Imm & 0x80) ? ConstantAggregateZero::get(CI->getType()) : V1;
2550 
2551       // Permute low half of result.
2552       unsigned StartIndex = (Imm & 0x01) ? HalfSize : 0;
2553       for (unsigned i = 0; i < HalfSize; ++i)
2554         ShuffleMask[i] = StartIndex + i;
2555 
2556       // Permute high half of result.
2557       StartIndex = (Imm & 0x10) ? HalfSize : 0;
2558       for (unsigned i = 0; i < HalfSize; ++i)
2559         ShuffleMask[i + HalfSize] = NumElts + StartIndex + i;
2560 
2561       Rep = Builder.CreateShuffleVector(V0, V1, ShuffleMask);
2562 
2563     } else if (IsX86 && (Name.startswith("avx.vpermil.") ||
2564                          Name == "sse2.pshuf.d" ||
2565                          Name.startswith("avx512.mask.vpermil.p") ||
2566                          Name.startswith("avx512.mask.pshuf.d."))) {
2567       Value *Op0 = CI->getArgOperand(0);
2568       unsigned Imm = cast<ConstantInt>(CI->getArgOperand(1))->getZExtValue();
2569       VectorType *VecTy = cast<VectorType>(CI->getType());
2570       unsigned NumElts = VecTy->getNumElements();
2571       // Calculate the size of each index in the immediate.
2572       unsigned IdxSize = 64 / VecTy->getScalarSizeInBits();
2573       unsigned IdxMask = ((1 << IdxSize) - 1);
2574 
2575       SmallVector<uint32_t, 8> Idxs(NumElts);
2576       // Lookup the bits for this element, wrapping around the immediate every
2577       // 8-bits. Elements are grouped into sets of 2 or 4 elements so we need
2578       // to offset by the first index of each group.
2579       for (unsigned i = 0; i != NumElts; ++i)
2580         Idxs[i] = ((Imm >> ((i * IdxSize) % 8)) & IdxMask) | (i & ~IdxMask);
2581 
2582       Rep = Builder.CreateShuffleVector(Op0, Op0, Idxs);
2583 
2584       if (CI->getNumArgOperands() == 4)
2585         Rep = EmitX86Select(Builder, CI->getArgOperand(3), Rep,
2586                             CI->getArgOperand(2));
2587     } else if (IsX86 && (Name == "sse2.pshufl.w" ||
2588                          Name.startswith("avx512.mask.pshufl.w."))) {
2589       Value *Op0 = CI->getArgOperand(0);
2590       unsigned Imm = cast<ConstantInt>(CI->getArgOperand(1))->getZExtValue();
2591       unsigned NumElts = CI->getType()->getVectorNumElements();
2592 
2593       SmallVector<uint32_t, 16> Idxs(NumElts);
2594       for (unsigned l = 0; l != NumElts; l += 8) {
2595         for (unsigned i = 0; i != 4; ++i)
2596           Idxs[i + l] = ((Imm >> (2 * i)) & 0x3) + l;
2597         for (unsigned i = 4; i != 8; ++i)
2598           Idxs[i + l] = i + l;
2599       }
2600 
2601       Rep = Builder.CreateShuffleVector(Op0, Op0, Idxs);
2602 
2603       if (CI->getNumArgOperands() == 4)
2604         Rep = EmitX86Select(Builder, CI->getArgOperand(3), Rep,
2605                             CI->getArgOperand(2));
2606     } else if (IsX86 && (Name == "sse2.pshufh.w" ||
2607                          Name.startswith("avx512.mask.pshufh.w."))) {
2608       Value *Op0 = CI->getArgOperand(0);
2609       unsigned Imm = cast<ConstantInt>(CI->getArgOperand(1))->getZExtValue();
2610       unsigned NumElts = CI->getType()->getVectorNumElements();
2611 
2612       SmallVector<uint32_t, 16> Idxs(NumElts);
2613       for (unsigned l = 0; l != NumElts; l += 8) {
2614         for (unsigned i = 0; i != 4; ++i)
2615           Idxs[i + l] = i + l;
2616         for (unsigned i = 0; i != 4; ++i)
2617           Idxs[i + l + 4] = ((Imm >> (2 * i)) & 0x3) + 4 + l;
2618       }
2619 
2620       Rep = Builder.CreateShuffleVector(Op0, Op0, Idxs);
2621 
2622       if (CI->getNumArgOperands() == 4)
2623         Rep = EmitX86Select(Builder, CI->getArgOperand(3), Rep,
2624                             CI->getArgOperand(2));
2625     } else if (IsX86 && Name.startswith("avx512.mask.shuf.p")) {
2626       Value *Op0 = CI->getArgOperand(0);
2627       Value *Op1 = CI->getArgOperand(1);
2628       unsigned Imm = cast<ConstantInt>(CI->getArgOperand(2))->getZExtValue();
2629       unsigned NumElts = CI->getType()->getVectorNumElements();
2630 
2631       unsigned NumLaneElts = 128/CI->getType()->getScalarSizeInBits();
2632       unsigned HalfLaneElts = NumLaneElts / 2;
2633 
2634       SmallVector<uint32_t, 16> Idxs(NumElts);
2635       for (unsigned i = 0; i != NumElts; ++i) {
2636         // Base index is the starting element of the lane.
2637         Idxs[i] = i - (i % NumLaneElts);
2638         // If we are half way through the lane switch to the other source.
2639         if ((i % NumLaneElts) >= HalfLaneElts)
2640           Idxs[i] += NumElts;
2641         // Now select the specific element. By adding HalfLaneElts bits from
2642         // the immediate. Wrapping around the immediate every 8-bits.
2643         Idxs[i] += (Imm >> ((i * HalfLaneElts) % 8)) & ((1 << HalfLaneElts) - 1);
2644       }
2645 
2646       Rep = Builder.CreateShuffleVector(Op0, Op1, Idxs);
2647 
2648       Rep = EmitX86Select(Builder, CI->getArgOperand(4), Rep,
2649                           CI->getArgOperand(3));
2650     } else if (IsX86 && (Name.startswith("avx512.mask.movddup") ||
2651                          Name.startswith("avx512.mask.movshdup") ||
2652                          Name.startswith("avx512.mask.movsldup"))) {
2653       Value *Op0 = CI->getArgOperand(0);
2654       unsigned NumElts = CI->getType()->getVectorNumElements();
2655       unsigned NumLaneElts = 128/CI->getType()->getScalarSizeInBits();
2656 
2657       unsigned Offset = 0;
2658       if (Name.startswith("avx512.mask.movshdup."))
2659         Offset = 1;
2660 
2661       SmallVector<uint32_t, 16> Idxs(NumElts);
2662       for (unsigned l = 0; l != NumElts; l += NumLaneElts)
2663         for (unsigned i = 0; i != NumLaneElts; i += 2) {
2664           Idxs[i + l + 0] = i + l + Offset;
2665           Idxs[i + l + 1] = i + l + Offset;
2666         }
2667 
2668       Rep = Builder.CreateShuffleVector(Op0, Op0, Idxs);
2669 
2670       Rep = EmitX86Select(Builder, CI->getArgOperand(2), Rep,
2671                           CI->getArgOperand(1));
2672     } else if (IsX86 && (Name.startswith("avx512.mask.punpckl") ||
2673                          Name.startswith("avx512.mask.unpckl."))) {
2674       Value *Op0 = CI->getArgOperand(0);
2675       Value *Op1 = CI->getArgOperand(1);
2676       int NumElts = CI->getType()->getVectorNumElements();
2677       int NumLaneElts = 128/CI->getType()->getScalarSizeInBits();
2678 
2679       SmallVector<uint32_t, 64> Idxs(NumElts);
2680       for (int l = 0; l != NumElts; l += NumLaneElts)
2681         for (int i = 0; i != NumLaneElts; ++i)
2682           Idxs[i + l] = l + (i / 2) + NumElts * (i % 2);
2683 
2684       Rep = Builder.CreateShuffleVector(Op0, Op1, Idxs);
2685 
2686       Rep = EmitX86Select(Builder, CI->getArgOperand(3), Rep,
2687                           CI->getArgOperand(2));
2688     } else if (IsX86 && (Name.startswith("avx512.mask.punpckh") ||
2689                          Name.startswith("avx512.mask.unpckh."))) {
2690       Value *Op0 = CI->getArgOperand(0);
2691       Value *Op1 = CI->getArgOperand(1);
2692       int NumElts = CI->getType()->getVectorNumElements();
2693       int NumLaneElts = 128/CI->getType()->getScalarSizeInBits();
2694 
2695       SmallVector<uint32_t, 64> Idxs(NumElts);
2696       for (int l = 0; l != NumElts; l += NumLaneElts)
2697         for (int i = 0; i != NumLaneElts; ++i)
2698           Idxs[i + l] = (NumLaneElts / 2) + l + (i / 2) + NumElts * (i % 2);
2699 
2700       Rep = Builder.CreateShuffleVector(Op0, Op1, Idxs);
2701 
2702       Rep = EmitX86Select(Builder, CI->getArgOperand(3), Rep,
2703                           CI->getArgOperand(2));
2704     } else if (IsX86 && (Name.startswith("avx512.mask.and.") ||
2705                          Name.startswith("avx512.mask.pand."))) {
2706       VectorType *FTy = cast<VectorType>(CI->getType());
2707       VectorType *ITy = VectorType::getInteger(FTy);
2708       Rep = Builder.CreateAnd(Builder.CreateBitCast(CI->getArgOperand(0), ITy),
2709                               Builder.CreateBitCast(CI->getArgOperand(1), ITy));
2710       Rep = Builder.CreateBitCast(Rep, FTy);
2711       Rep = EmitX86Select(Builder, CI->getArgOperand(3), Rep,
2712                           CI->getArgOperand(2));
2713     } else if (IsX86 && (Name.startswith("avx512.mask.andn.") ||
2714                          Name.startswith("avx512.mask.pandn."))) {
2715       VectorType *FTy = cast<VectorType>(CI->getType());
2716       VectorType *ITy = VectorType::getInteger(FTy);
2717       Rep = Builder.CreateNot(Builder.CreateBitCast(CI->getArgOperand(0), ITy));
2718       Rep = Builder.CreateAnd(Rep,
2719                               Builder.CreateBitCast(CI->getArgOperand(1), ITy));
2720       Rep = Builder.CreateBitCast(Rep, FTy);
2721       Rep = EmitX86Select(Builder, CI->getArgOperand(3), Rep,
2722                           CI->getArgOperand(2));
2723     } else if (IsX86 && (Name.startswith("avx512.mask.or.") ||
2724                          Name.startswith("avx512.mask.por."))) {
2725       VectorType *FTy = cast<VectorType>(CI->getType());
2726       VectorType *ITy = VectorType::getInteger(FTy);
2727       Rep = Builder.CreateOr(Builder.CreateBitCast(CI->getArgOperand(0), ITy),
2728                              Builder.CreateBitCast(CI->getArgOperand(1), ITy));
2729       Rep = Builder.CreateBitCast(Rep, FTy);
2730       Rep = EmitX86Select(Builder, CI->getArgOperand(3), Rep,
2731                           CI->getArgOperand(2));
2732     } else if (IsX86 && (Name.startswith("avx512.mask.xor.") ||
2733                          Name.startswith("avx512.mask.pxor."))) {
2734       VectorType *FTy = cast<VectorType>(CI->getType());
2735       VectorType *ITy = VectorType::getInteger(FTy);
2736       Rep = Builder.CreateXor(Builder.CreateBitCast(CI->getArgOperand(0), ITy),
2737                               Builder.CreateBitCast(CI->getArgOperand(1), ITy));
2738       Rep = Builder.CreateBitCast(Rep, FTy);
2739       Rep = EmitX86Select(Builder, CI->getArgOperand(3), Rep,
2740                           CI->getArgOperand(2));
2741     } else if (IsX86 && Name.startswith("avx512.mask.padd.")) {
2742       Rep = Builder.CreateAdd(CI->getArgOperand(0), CI->getArgOperand(1));
2743       Rep = EmitX86Select(Builder, CI->getArgOperand(3), Rep,
2744                           CI->getArgOperand(2));
2745     } else if (IsX86 && Name.startswith("avx512.mask.psub.")) {
2746       Rep = Builder.CreateSub(CI->getArgOperand(0), CI->getArgOperand(1));
2747       Rep = EmitX86Select(Builder, CI->getArgOperand(3), Rep,
2748                           CI->getArgOperand(2));
2749     } else if (IsX86 && Name.startswith("avx512.mask.pmull.")) {
2750       Rep = Builder.CreateMul(CI->getArgOperand(0), CI->getArgOperand(1));
2751       Rep = EmitX86Select(Builder, CI->getArgOperand(3), Rep,
2752                           CI->getArgOperand(2));
2753     } else if (IsX86 && Name.startswith("avx512.mask.add.p")) {
2754       if (Name.endswith(".512")) {
2755         Intrinsic::ID IID;
2756         if (Name[17] == 's')
2757           IID = Intrinsic::x86_avx512_add_ps_512;
2758         else
2759           IID = Intrinsic::x86_avx512_add_pd_512;
2760 
2761         Rep = Builder.CreateCall(Intrinsic::getDeclaration(F->getParent(), IID),
2762                                  { CI->getArgOperand(0), CI->getArgOperand(1),
2763                                    CI->getArgOperand(4) });
2764       } else {
2765         Rep = Builder.CreateFAdd(CI->getArgOperand(0), CI->getArgOperand(1));
2766       }
2767       Rep = EmitX86Select(Builder, CI->getArgOperand(3), Rep,
2768                           CI->getArgOperand(2));
2769     } else if (IsX86 && Name.startswith("avx512.mask.div.p")) {
2770       if (Name.endswith(".512")) {
2771         Intrinsic::ID IID;
2772         if (Name[17] == 's')
2773           IID = Intrinsic::x86_avx512_div_ps_512;
2774         else
2775           IID = Intrinsic::x86_avx512_div_pd_512;
2776 
2777         Rep = Builder.CreateCall(Intrinsic::getDeclaration(F->getParent(), IID),
2778                                  { CI->getArgOperand(0), CI->getArgOperand(1),
2779                                    CI->getArgOperand(4) });
2780       } else {
2781         Rep = Builder.CreateFDiv(CI->getArgOperand(0), CI->getArgOperand(1));
2782       }
2783       Rep = EmitX86Select(Builder, CI->getArgOperand(3), Rep,
2784                           CI->getArgOperand(2));
2785     } else if (IsX86 && Name.startswith("avx512.mask.mul.p")) {
2786       if (Name.endswith(".512")) {
2787         Intrinsic::ID IID;
2788         if (Name[17] == 's')
2789           IID = Intrinsic::x86_avx512_mul_ps_512;
2790         else
2791           IID = Intrinsic::x86_avx512_mul_pd_512;
2792 
2793         Rep = Builder.CreateCall(Intrinsic::getDeclaration(F->getParent(), IID),
2794                                  { CI->getArgOperand(0), CI->getArgOperand(1),
2795                                    CI->getArgOperand(4) });
2796       } else {
2797         Rep = Builder.CreateFMul(CI->getArgOperand(0), CI->getArgOperand(1));
2798       }
2799       Rep = EmitX86Select(Builder, CI->getArgOperand(3), Rep,
2800                           CI->getArgOperand(2));
2801     } else if (IsX86 && Name.startswith("avx512.mask.sub.p")) {
2802       if (Name.endswith(".512")) {
2803         Intrinsic::ID IID;
2804         if (Name[17] == 's')
2805           IID = Intrinsic::x86_avx512_sub_ps_512;
2806         else
2807           IID = Intrinsic::x86_avx512_sub_pd_512;
2808 
2809         Rep = Builder.CreateCall(Intrinsic::getDeclaration(F->getParent(), IID),
2810                                  { CI->getArgOperand(0), CI->getArgOperand(1),
2811                                    CI->getArgOperand(4) });
2812       } else {
2813         Rep = Builder.CreateFSub(CI->getArgOperand(0), CI->getArgOperand(1));
2814       }
2815       Rep = EmitX86Select(Builder, CI->getArgOperand(3), Rep,
2816                           CI->getArgOperand(2));
2817     } else if (IsX86 && (Name.startswith("avx512.mask.max.p") ||
2818                          Name.startswith("avx512.mask.min.p")) &&
2819                Name.drop_front(18) == ".512") {
2820       bool IsDouble = Name[17] == 'd';
2821       bool IsMin = Name[13] == 'i';
2822       static const Intrinsic::ID MinMaxTbl[2][2] = {
2823         { Intrinsic::x86_avx512_max_ps_512, Intrinsic::x86_avx512_max_pd_512 },
2824         { Intrinsic::x86_avx512_min_ps_512, Intrinsic::x86_avx512_min_pd_512 }
2825       };
2826       Intrinsic::ID IID = MinMaxTbl[IsMin][IsDouble];
2827 
2828       Rep = Builder.CreateCall(Intrinsic::getDeclaration(F->getParent(), IID),
2829                                { CI->getArgOperand(0), CI->getArgOperand(1),
2830                                  CI->getArgOperand(4) });
2831       Rep = EmitX86Select(Builder, CI->getArgOperand(3), Rep,
2832                           CI->getArgOperand(2));
2833     } else if (IsX86 && Name.startswith("avx512.mask.lzcnt.")) {
2834       Rep = Builder.CreateCall(Intrinsic::getDeclaration(F->getParent(),
2835                                                          Intrinsic::ctlz,
2836                                                          CI->getType()),
2837                                { CI->getArgOperand(0), Builder.getInt1(false) });
2838       Rep = EmitX86Select(Builder, CI->getArgOperand(2), Rep,
2839                           CI->getArgOperand(1));
2840     } else if (IsX86 && Name.startswith("avx512.mask.psll")) {
2841       bool IsImmediate = Name[16] == 'i' ||
2842                          (Name.size() > 18 && Name[18] == 'i');
2843       bool IsVariable = Name[16] == 'v';
2844       char Size = Name[16] == '.' ? Name[17] :
2845                   Name[17] == '.' ? Name[18] :
2846                   Name[18] == '.' ? Name[19] :
2847                                     Name[20];
2848 
2849       Intrinsic::ID IID;
2850       if (IsVariable && Name[17] != '.') {
2851         if (Size == 'd' && Name[17] == '2') // avx512.mask.psllv2.di
2852           IID = Intrinsic::x86_avx2_psllv_q;
2853         else if (Size == 'd' && Name[17] == '4') // avx512.mask.psllv4.di
2854           IID = Intrinsic::x86_avx2_psllv_q_256;
2855         else if (Size == 's' && Name[17] == '4') // avx512.mask.psllv4.si
2856           IID = Intrinsic::x86_avx2_psllv_d;
2857         else if (Size == 's' && Name[17] == '8') // avx512.mask.psllv8.si
2858           IID = Intrinsic::x86_avx2_psllv_d_256;
2859         else if (Size == 'h' && Name[17] == '8') // avx512.mask.psllv8.hi
2860           IID = Intrinsic::x86_avx512_psllv_w_128;
2861         else if (Size == 'h' && Name[17] == '1') // avx512.mask.psllv16.hi
2862           IID = Intrinsic::x86_avx512_psllv_w_256;
2863         else if (Name[17] == '3' && Name[18] == '2') // avx512.mask.psllv32hi
2864           IID = Intrinsic::x86_avx512_psllv_w_512;
2865         else
2866           llvm_unreachable("Unexpected size");
2867       } else if (Name.endswith(".128")) {
2868         if (Size == 'd') // avx512.mask.psll.d.128, avx512.mask.psll.di.128
2869           IID = IsImmediate ? Intrinsic::x86_sse2_pslli_d
2870                             : Intrinsic::x86_sse2_psll_d;
2871         else if (Size == 'q') // avx512.mask.psll.q.128, avx512.mask.psll.qi.128
2872           IID = IsImmediate ? Intrinsic::x86_sse2_pslli_q
2873                             : Intrinsic::x86_sse2_psll_q;
2874         else if (Size == 'w') // avx512.mask.psll.w.128, avx512.mask.psll.wi.128
2875           IID = IsImmediate ? Intrinsic::x86_sse2_pslli_w
2876                             : Intrinsic::x86_sse2_psll_w;
2877         else
2878           llvm_unreachable("Unexpected size");
2879       } else if (Name.endswith(".256")) {
2880         if (Size == 'd') // avx512.mask.psll.d.256, avx512.mask.psll.di.256
2881           IID = IsImmediate ? Intrinsic::x86_avx2_pslli_d
2882                             : Intrinsic::x86_avx2_psll_d;
2883         else if (Size == 'q') // avx512.mask.psll.q.256, avx512.mask.psll.qi.256
2884           IID = IsImmediate ? Intrinsic::x86_avx2_pslli_q
2885                             : Intrinsic::x86_avx2_psll_q;
2886         else if (Size == 'w') // avx512.mask.psll.w.256, avx512.mask.psll.wi.256
2887           IID = IsImmediate ? Intrinsic::x86_avx2_pslli_w
2888                             : Intrinsic::x86_avx2_psll_w;
2889         else
2890           llvm_unreachable("Unexpected size");
2891       } else {
2892         if (Size == 'd') // psll.di.512, pslli.d, psll.d, psllv.d.512
2893           IID = IsImmediate ? Intrinsic::x86_avx512_pslli_d_512 :
2894                 IsVariable  ? Intrinsic::x86_avx512_psllv_d_512 :
2895                               Intrinsic::x86_avx512_psll_d_512;
2896         else if (Size == 'q') // psll.qi.512, pslli.q, psll.q, psllv.q.512
2897           IID = IsImmediate ? Intrinsic::x86_avx512_pslli_q_512 :
2898                 IsVariable  ? Intrinsic::x86_avx512_psllv_q_512 :
2899                               Intrinsic::x86_avx512_psll_q_512;
2900         else if (Size == 'w') // psll.wi.512, pslli.w, psll.w
2901           IID = IsImmediate ? Intrinsic::x86_avx512_pslli_w_512
2902                             : Intrinsic::x86_avx512_psll_w_512;
2903         else
2904           llvm_unreachable("Unexpected size");
2905       }
2906 
2907       Rep = UpgradeX86MaskedShift(Builder, *CI, IID);
2908     } else if (IsX86 && Name.startswith("avx512.mask.psrl")) {
2909       bool IsImmediate = Name[16] == 'i' ||
2910                          (Name.size() > 18 && Name[18] == 'i');
2911       bool IsVariable = Name[16] == 'v';
2912       char Size = Name[16] == '.' ? Name[17] :
2913                   Name[17] == '.' ? Name[18] :
2914                   Name[18] == '.' ? Name[19] :
2915                                     Name[20];
2916 
2917       Intrinsic::ID IID;
2918       if (IsVariable && Name[17] != '.') {
2919         if (Size == 'd' && Name[17] == '2') // avx512.mask.psrlv2.di
2920           IID = Intrinsic::x86_avx2_psrlv_q;
2921         else if (Size == 'd' && Name[17] == '4') // avx512.mask.psrlv4.di
2922           IID = Intrinsic::x86_avx2_psrlv_q_256;
2923         else if (Size == 's' && Name[17] == '4') // avx512.mask.psrlv4.si
2924           IID = Intrinsic::x86_avx2_psrlv_d;
2925         else if (Size == 's' && Name[17] == '8') // avx512.mask.psrlv8.si
2926           IID = Intrinsic::x86_avx2_psrlv_d_256;
2927         else if (Size == 'h' && Name[17] == '8') // avx512.mask.psrlv8.hi
2928           IID = Intrinsic::x86_avx512_psrlv_w_128;
2929         else if (Size == 'h' && Name[17] == '1') // avx512.mask.psrlv16.hi
2930           IID = Intrinsic::x86_avx512_psrlv_w_256;
2931         else if (Name[17] == '3' && Name[18] == '2') // avx512.mask.psrlv32hi
2932           IID = Intrinsic::x86_avx512_psrlv_w_512;
2933         else
2934           llvm_unreachable("Unexpected size");
2935       } else if (Name.endswith(".128")) {
2936         if (Size == 'd') // avx512.mask.psrl.d.128, avx512.mask.psrl.di.128
2937           IID = IsImmediate ? Intrinsic::x86_sse2_psrli_d
2938                             : Intrinsic::x86_sse2_psrl_d;
2939         else if (Size == 'q') // avx512.mask.psrl.q.128, avx512.mask.psrl.qi.128
2940           IID = IsImmediate ? Intrinsic::x86_sse2_psrli_q
2941                             : Intrinsic::x86_sse2_psrl_q;
2942         else if (Size == 'w') // avx512.mask.psrl.w.128, avx512.mask.psrl.wi.128
2943           IID = IsImmediate ? Intrinsic::x86_sse2_psrli_w
2944                             : Intrinsic::x86_sse2_psrl_w;
2945         else
2946           llvm_unreachable("Unexpected size");
2947       } else if (Name.endswith(".256")) {
2948         if (Size == 'd') // avx512.mask.psrl.d.256, avx512.mask.psrl.di.256
2949           IID = IsImmediate ? Intrinsic::x86_avx2_psrli_d
2950                             : Intrinsic::x86_avx2_psrl_d;
2951         else if (Size == 'q') // avx512.mask.psrl.q.256, avx512.mask.psrl.qi.256
2952           IID = IsImmediate ? Intrinsic::x86_avx2_psrli_q
2953                             : Intrinsic::x86_avx2_psrl_q;
2954         else if (Size == 'w') // avx512.mask.psrl.w.256, avx512.mask.psrl.wi.256
2955           IID = IsImmediate ? Intrinsic::x86_avx2_psrli_w
2956                             : Intrinsic::x86_avx2_psrl_w;
2957         else
2958           llvm_unreachable("Unexpected size");
2959       } else {
2960         if (Size == 'd') // psrl.di.512, psrli.d, psrl.d, psrl.d.512
2961           IID = IsImmediate ? Intrinsic::x86_avx512_psrli_d_512 :
2962                 IsVariable  ? Intrinsic::x86_avx512_psrlv_d_512 :
2963                               Intrinsic::x86_avx512_psrl_d_512;
2964         else if (Size == 'q') // psrl.qi.512, psrli.q, psrl.q, psrl.q.512
2965           IID = IsImmediate ? Intrinsic::x86_avx512_psrli_q_512 :
2966                 IsVariable  ? Intrinsic::x86_avx512_psrlv_q_512 :
2967                               Intrinsic::x86_avx512_psrl_q_512;
2968         else if (Size == 'w') // psrl.wi.512, psrli.w, psrl.w)
2969           IID = IsImmediate ? Intrinsic::x86_avx512_psrli_w_512
2970                             : Intrinsic::x86_avx512_psrl_w_512;
2971         else
2972           llvm_unreachable("Unexpected size");
2973       }
2974 
2975       Rep = UpgradeX86MaskedShift(Builder, *CI, IID);
2976     } else if (IsX86 && Name.startswith("avx512.mask.psra")) {
2977       bool IsImmediate = Name[16] == 'i' ||
2978                          (Name.size() > 18 && Name[18] == 'i');
2979       bool IsVariable = Name[16] == 'v';
2980       char Size = Name[16] == '.' ? Name[17] :
2981                   Name[17] == '.' ? Name[18] :
2982                   Name[18] == '.' ? Name[19] :
2983                                     Name[20];
2984 
2985       Intrinsic::ID IID;
2986       if (IsVariable && Name[17] != '.') {
2987         if (Size == 's' && Name[17] == '4') // avx512.mask.psrav4.si
2988           IID = Intrinsic::x86_avx2_psrav_d;
2989         else if (Size == 's' && Name[17] == '8') // avx512.mask.psrav8.si
2990           IID = Intrinsic::x86_avx2_psrav_d_256;
2991         else if (Size == 'h' && Name[17] == '8') // avx512.mask.psrav8.hi
2992           IID = Intrinsic::x86_avx512_psrav_w_128;
2993         else if (Size == 'h' && Name[17] == '1') // avx512.mask.psrav16.hi
2994           IID = Intrinsic::x86_avx512_psrav_w_256;
2995         else if (Name[17] == '3' && Name[18] == '2') // avx512.mask.psrav32hi
2996           IID = Intrinsic::x86_avx512_psrav_w_512;
2997         else
2998           llvm_unreachable("Unexpected size");
2999       } else if (Name.endswith(".128")) {
3000         if (Size == 'd') // avx512.mask.psra.d.128, avx512.mask.psra.di.128
3001           IID = IsImmediate ? Intrinsic::x86_sse2_psrai_d
3002                             : Intrinsic::x86_sse2_psra_d;
3003         else if (Size == 'q') // avx512.mask.psra.q.128, avx512.mask.psra.qi.128
3004           IID = IsImmediate ? Intrinsic::x86_avx512_psrai_q_128 :
3005                 IsVariable  ? Intrinsic::x86_avx512_psrav_q_128 :
3006                               Intrinsic::x86_avx512_psra_q_128;
3007         else if (Size == 'w') // avx512.mask.psra.w.128, avx512.mask.psra.wi.128
3008           IID = IsImmediate ? Intrinsic::x86_sse2_psrai_w
3009                             : Intrinsic::x86_sse2_psra_w;
3010         else
3011           llvm_unreachable("Unexpected size");
3012       } else if (Name.endswith(".256")) {
3013         if (Size == 'd') // avx512.mask.psra.d.256, avx512.mask.psra.di.256
3014           IID = IsImmediate ? Intrinsic::x86_avx2_psrai_d
3015                             : Intrinsic::x86_avx2_psra_d;
3016         else if (Size == 'q') // avx512.mask.psra.q.256, avx512.mask.psra.qi.256
3017           IID = IsImmediate ? Intrinsic::x86_avx512_psrai_q_256 :
3018                 IsVariable  ? Intrinsic::x86_avx512_psrav_q_256 :
3019                               Intrinsic::x86_avx512_psra_q_256;
3020         else if (Size == 'w') // avx512.mask.psra.w.256, avx512.mask.psra.wi.256
3021           IID = IsImmediate ? Intrinsic::x86_avx2_psrai_w
3022                             : Intrinsic::x86_avx2_psra_w;
3023         else
3024           llvm_unreachable("Unexpected size");
3025       } else {
3026         if (Size == 'd') // psra.di.512, psrai.d, psra.d, psrav.d.512
3027           IID = IsImmediate ? Intrinsic::x86_avx512_psrai_d_512 :
3028                 IsVariable  ? Intrinsic::x86_avx512_psrav_d_512 :
3029                               Intrinsic::x86_avx512_psra_d_512;
3030         else if (Size == 'q') // psra.qi.512, psrai.q, psra.q
3031           IID = IsImmediate ? Intrinsic::x86_avx512_psrai_q_512 :
3032                 IsVariable  ? Intrinsic::x86_avx512_psrav_q_512 :
3033                               Intrinsic::x86_avx512_psra_q_512;
3034         else if (Size == 'w') // psra.wi.512, psrai.w, psra.w
3035           IID = IsImmediate ? Intrinsic::x86_avx512_psrai_w_512
3036                             : Intrinsic::x86_avx512_psra_w_512;
3037         else
3038           llvm_unreachable("Unexpected size");
3039       }
3040 
3041       Rep = UpgradeX86MaskedShift(Builder, *CI, IID);
3042     } else if (IsX86 && Name.startswith("avx512.mask.move.s")) {
3043       Rep = upgradeMaskedMove(Builder, *CI);
3044     } else if (IsX86 && Name.startswith("avx512.cvtmask2")) {
3045       Rep = UpgradeMaskToInt(Builder, *CI);
3046     } else if (IsX86 && Name.endswith(".movntdqa")) {
3047       Module *M = F->getParent();
3048       MDNode *Node = MDNode::get(
3049           C, ConstantAsMetadata::get(ConstantInt::get(Type::getInt32Ty(C), 1)));
3050 
3051       Value *Ptr = CI->getArgOperand(0);
3052       VectorType *VTy = cast<VectorType>(CI->getType());
3053 
3054       // Convert the type of the pointer to a pointer to the stored type.
3055       Value *BC =
3056           Builder.CreateBitCast(Ptr, PointerType::getUnqual(VTy), "cast");
3057       LoadInst *LI =
3058           Builder.CreateAlignedLoad(VTy, BC, Align(VTy->getBitWidth() / 8));
3059       LI->setMetadata(M->getMDKindID("nontemporal"), Node);
3060       Rep = LI;
3061     } else if (IsX86 && (Name.startswith("fma.vfmadd.") ||
3062                          Name.startswith("fma.vfmsub.") ||
3063                          Name.startswith("fma.vfnmadd.") ||
3064                          Name.startswith("fma.vfnmsub."))) {
3065       bool NegMul = Name[6] == 'n';
3066       bool NegAcc = NegMul ? Name[8] == 's' : Name[7] == 's';
3067       bool IsScalar = NegMul ? Name[12] == 's' : Name[11] == 's';
3068 
3069       Value *Ops[] = { CI->getArgOperand(0), CI->getArgOperand(1),
3070                        CI->getArgOperand(2) };
3071 
3072       if (IsScalar) {
3073         Ops[0] = Builder.CreateExtractElement(Ops[0], (uint64_t)0);
3074         Ops[1] = Builder.CreateExtractElement(Ops[1], (uint64_t)0);
3075         Ops[2] = Builder.CreateExtractElement(Ops[2], (uint64_t)0);
3076       }
3077 
3078       if (NegMul && !IsScalar)
3079         Ops[0] = Builder.CreateFNeg(Ops[0]);
3080       if (NegMul && IsScalar)
3081         Ops[1] = Builder.CreateFNeg(Ops[1]);
3082       if (NegAcc)
3083         Ops[2] = Builder.CreateFNeg(Ops[2]);
3084 
3085       Rep = Builder.CreateCall(Intrinsic::getDeclaration(CI->getModule(),
3086                                                          Intrinsic::fma,
3087                                                          Ops[0]->getType()),
3088                                Ops);
3089 
3090       if (IsScalar)
3091         Rep = Builder.CreateInsertElement(CI->getArgOperand(0), Rep,
3092                                           (uint64_t)0);
3093     } else if (IsX86 && Name.startswith("fma4.vfmadd.s")) {
3094       Value *Ops[] = { CI->getArgOperand(0), CI->getArgOperand(1),
3095                        CI->getArgOperand(2) };
3096 
3097       Ops[0] = Builder.CreateExtractElement(Ops[0], (uint64_t)0);
3098       Ops[1] = Builder.CreateExtractElement(Ops[1], (uint64_t)0);
3099       Ops[2] = Builder.CreateExtractElement(Ops[2], (uint64_t)0);
3100 
3101       Rep = Builder.CreateCall(Intrinsic::getDeclaration(CI->getModule(),
3102                                                          Intrinsic::fma,
3103                                                          Ops[0]->getType()),
3104                                Ops);
3105 
3106       Rep = Builder.CreateInsertElement(Constant::getNullValue(CI->getType()),
3107                                         Rep, (uint64_t)0);
3108     } else if (IsX86 && (Name.startswith("avx512.mask.vfmadd.s") ||
3109                          Name.startswith("avx512.maskz.vfmadd.s") ||
3110                          Name.startswith("avx512.mask3.vfmadd.s") ||
3111                          Name.startswith("avx512.mask3.vfmsub.s") ||
3112                          Name.startswith("avx512.mask3.vfnmsub.s"))) {
3113       bool IsMask3 = Name[11] == '3';
3114       bool IsMaskZ = Name[11] == 'z';
3115       // Drop the "avx512.mask." to make it easier.
3116       Name = Name.drop_front(IsMask3 || IsMaskZ ? 13 : 12);
3117       bool NegMul = Name[2] == 'n';
3118       bool NegAcc = NegMul ? Name[4] == 's' : Name[3] == 's';
3119 
3120       Value *A = CI->getArgOperand(0);
3121       Value *B = CI->getArgOperand(1);
3122       Value *C = CI->getArgOperand(2);
3123 
3124       if (NegMul && (IsMask3 || IsMaskZ))
3125         A = Builder.CreateFNeg(A);
3126       if (NegMul && !(IsMask3 || IsMaskZ))
3127         B = Builder.CreateFNeg(B);
3128       if (NegAcc)
3129         C = Builder.CreateFNeg(C);
3130 
3131       A = Builder.CreateExtractElement(A, (uint64_t)0);
3132       B = Builder.CreateExtractElement(B, (uint64_t)0);
3133       C = Builder.CreateExtractElement(C, (uint64_t)0);
3134 
3135       if (!isa<ConstantInt>(CI->getArgOperand(4)) ||
3136           cast<ConstantInt>(CI->getArgOperand(4))->getZExtValue() != 4) {
3137         Value *Ops[] = { A, B, C, CI->getArgOperand(4) };
3138 
3139         Intrinsic::ID IID;
3140         if (Name.back() == 'd')
3141           IID = Intrinsic::x86_avx512_vfmadd_f64;
3142         else
3143           IID = Intrinsic::x86_avx512_vfmadd_f32;
3144         Function *FMA = Intrinsic::getDeclaration(CI->getModule(), IID);
3145         Rep = Builder.CreateCall(FMA, Ops);
3146       } else {
3147         Function *FMA = Intrinsic::getDeclaration(CI->getModule(),
3148                                                   Intrinsic::fma,
3149                                                   A->getType());
3150         Rep = Builder.CreateCall(FMA, { A, B, C });
3151       }
3152 
3153       Value *PassThru = IsMaskZ ? Constant::getNullValue(Rep->getType()) :
3154                         IsMask3 ? C : A;
3155 
3156       // For Mask3 with NegAcc, we need to create a new extractelement that
3157       // avoids the negation above.
3158       if (NegAcc && IsMask3)
3159         PassThru = Builder.CreateExtractElement(CI->getArgOperand(2),
3160                                                 (uint64_t)0);
3161 
3162       Rep = EmitX86ScalarSelect(Builder, CI->getArgOperand(3),
3163                                 Rep, PassThru);
3164       Rep = Builder.CreateInsertElement(CI->getArgOperand(IsMask3 ? 2 : 0),
3165                                         Rep, (uint64_t)0);
3166     } else if (IsX86 && (Name.startswith("avx512.mask.vfmadd.p") ||
3167                          Name.startswith("avx512.mask.vfnmadd.p") ||
3168                          Name.startswith("avx512.mask.vfnmsub.p") ||
3169                          Name.startswith("avx512.mask3.vfmadd.p") ||
3170                          Name.startswith("avx512.mask3.vfmsub.p") ||
3171                          Name.startswith("avx512.mask3.vfnmsub.p") ||
3172                          Name.startswith("avx512.maskz.vfmadd.p"))) {
3173       bool IsMask3 = Name[11] == '3';
3174       bool IsMaskZ = Name[11] == 'z';
3175       // Drop the "avx512.mask." to make it easier.
3176       Name = Name.drop_front(IsMask3 || IsMaskZ ? 13 : 12);
3177       bool NegMul = Name[2] == 'n';
3178       bool NegAcc = NegMul ? Name[4] == 's' : Name[3] == 's';
3179 
3180       Value *A = CI->getArgOperand(0);
3181       Value *B = CI->getArgOperand(1);
3182       Value *C = CI->getArgOperand(2);
3183 
3184       if (NegMul && (IsMask3 || IsMaskZ))
3185         A = Builder.CreateFNeg(A);
3186       if (NegMul && !(IsMask3 || IsMaskZ))
3187         B = Builder.CreateFNeg(B);
3188       if (NegAcc)
3189         C = Builder.CreateFNeg(C);
3190 
3191       if (CI->getNumArgOperands() == 5 &&
3192           (!isa<ConstantInt>(CI->getArgOperand(4)) ||
3193            cast<ConstantInt>(CI->getArgOperand(4))->getZExtValue() != 4)) {
3194         Intrinsic::ID IID;
3195         // Check the character before ".512" in string.
3196         if (Name[Name.size()-5] == 's')
3197           IID = Intrinsic::x86_avx512_vfmadd_ps_512;
3198         else
3199           IID = Intrinsic::x86_avx512_vfmadd_pd_512;
3200 
3201         Rep = Builder.CreateCall(Intrinsic::getDeclaration(F->getParent(), IID),
3202                                  { A, B, C, CI->getArgOperand(4) });
3203       } else {
3204         Function *FMA = Intrinsic::getDeclaration(CI->getModule(),
3205                                                   Intrinsic::fma,
3206                                                   A->getType());
3207         Rep = Builder.CreateCall(FMA, { A, B, C });
3208       }
3209 
3210       Value *PassThru = IsMaskZ ? llvm::Constant::getNullValue(CI->getType()) :
3211                         IsMask3 ? CI->getArgOperand(2) :
3212                                   CI->getArgOperand(0);
3213 
3214       Rep = EmitX86Select(Builder, CI->getArgOperand(3), Rep, PassThru);
3215     } else if (IsX86 && (Name.startswith("fma.vfmaddsub.p") ||
3216                          Name.startswith("fma.vfmsubadd.p"))) {
3217       bool IsSubAdd = Name[7] == 's';
3218       int NumElts = CI->getType()->getVectorNumElements();
3219 
3220       Value *Ops[] = { CI->getArgOperand(0), CI->getArgOperand(1),
3221                        CI->getArgOperand(2) };
3222 
3223       Function *FMA = Intrinsic::getDeclaration(CI->getModule(), Intrinsic::fma,
3224                                                 Ops[0]->getType());
3225       Value *Odd = Builder.CreateCall(FMA, Ops);
3226       Ops[2] = Builder.CreateFNeg(Ops[2]);
3227       Value *Even = Builder.CreateCall(FMA, Ops);
3228 
3229       if (IsSubAdd)
3230         std::swap(Even, Odd);
3231 
3232       SmallVector<uint32_t, 32> Idxs(NumElts);
3233       for (int i = 0; i != NumElts; ++i)
3234         Idxs[i] = i + (i % 2) * NumElts;
3235 
3236       Rep = Builder.CreateShuffleVector(Even, Odd, Idxs);
3237     } else if (IsX86 && (Name.startswith("avx512.mask.vfmaddsub.p") ||
3238                          Name.startswith("avx512.mask3.vfmaddsub.p") ||
3239                          Name.startswith("avx512.maskz.vfmaddsub.p") ||
3240                          Name.startswith("avx512.mask3.vfmsubadd.p"))) {
3241       bool IsMask3 = Name[11] == '3';
3242       bool IsMaskZ = Name[11] == 'z';
3243       // Drop the "avx512.mask." to make it easier.
3244       Name = Name.drop_front(IsMask3 || IsMaskZ ? 13 : 12);
3245       bool IsSubAdd = Name[3] == 's';
3246       if (CI->getNumArgOperands() == 5 &&
3247           (!isa<ConstantInt>(CI->getArgOperand(4)) ||
3248            cast<ConstantInt>(CI->getArgOperand(4))->getZExtValue() != 4)) {
3249         Intrinsic::ID IID;
3250         // Check the character before ".512" in string.
3251         if (Name[Name.size()-5] == 's')
3252           IID = Intrinsic::x86_avx512_vfmaddsub_ps_512;
3253         else
3254           IID = Intrinsic::x86_avx512_vfmaddsub_pd_512;
3255 
3256         Value *Ops[] = { CI->getArgOperand(0), CI->getArgOperand(1),
3257                          CI->getArgOperand(2), CI->getArgOperand(4) };
3258         if (IsSubAdd)
3259           Ops[2] = Builder.CreateFNeg(Ops[2]);
3260 
3261         Rep = Builder.CreateCall(Intrinsic::getDeclaration(F->getParent(), IID),
3262                                  Ops);
3263       } else {
3264         int NumElts = CI->getType()->getVectorNumElements();
3265 
3266         Value *Ops[] = { CI->getArgOperand(0), CI->getArgOperand(1),
3267                          CI->getArgOperand(2) };
3268 
3269         Function *FMA = Intrinsic::getDeclaration(CI->getModule(), Intrinsic::fma,
3270                                                   Ops[0]->getType());
3271         Value *Odd = Builder.CreateCall(FMA, Ops);
3272         Ops[2] = Builder.CreateFNeg(Ops[2]);
3273         Value *Even = Builder.CreateCall(FMA, Ops);
3274 
3275         if (IsSubAdd)
3276           std::swap(Even, Odd);
3277 
3278         SmallVector<uint32_t, 32> Idxs(NumElts);
3279         for (int i = 0; i != NumElts; ++i)
3280           Idxs[i] = i + (i % 2) * NumElts;
3281 
3282         Rep = Builder.CreateShuffleVector(Even, Odd, Idxs);
3283       }
3284 
3285       Value *PassThru = IsMaskZ ? llvm::Constant::getNullValue(CI->getType()) :
3286                         IsMask3 ? CI->getArgOperand(2) :
3287                                   CI->getArgOperand(0);
3288 
3289       Rep = EmitX86Select(Builder, CI->getArgOperand(3), Rep, PassThru);
3290     } else if (IsX86 && (Name.startswith("avx512.mask.pternlog.") ||
3291                          Name.startswith("avx512.maskz.pternlog."))) {
3292       bool ZeroMask = Name[11] == 'z';
3293       unsigned VecWidth = CI->getType()->getPrimitiveSizeInBits();
3294       unsigned EltWidth = CI->getType()->getScalarSizeInBits();
3295       Intrinsic::ID IID;
3296       if (VecWidth == 128 && EltWidth == 32)
3297         IID = Intrinsic::x86_avx512_pternlog_d_128;
3298       else if (VecWidth == 256 && EltWidth == 32)
3299         IID = Intrinsic::x86_avx512_pternlog_d_256;
3300       else if (VecWidth == 512 && EltWidth == 32)
3301         IID = Intrinsic::x86_avx512_pternlog_d_512;
3302       else if (VecWidth == 128 && EltWidth == 64)
3303         IID = Intrinsic::x86_avx512_pternlog_q_128;
3304       else if (VecWidth == 256 && EltWidth == 64)
3305         IID = Intrinsic::x86_avx512_pternlog_q_256;
3306       else if (VecWidth == 512 && EltWidth == 64)
3307         IID = Intrinsic::x86_avx512_pternlog_q_512;
3308       else
3309         llvm_unreachable("Unexpected intrinsic");
3310 
3311       Value *Args[] = { CI->getArgOperand(0) , CI->getArgOperand(1),
3312                         CI->getArgOperand(2), CI->getArgOperand(3) };
3313       Rep = Builder.CreateCall(Intrinsic::getDeclaration(CI->getModule(), IID),
3314                                Args);
3315       Value *PassThru = ZeroMask ? ConstantAggregateZero::get(CI->getType())
3316                                  : CI->getArgOperand(0);
3317       Rep = EmitX86Select(Builder, CI->getArgOperand(4), Rep, PassThru);
3318     } else if (IsX86 && (Name.startswith("avx512.mask.vpmadd52") ||
3319                          Name.startswith("avx512.maskz.vpmadd52"))) {
3320       bool ZeroMask = Name[11] == 'z';
3321       bool High = Name[20] == 'h' || Name[21] == 'h';
3322       unsigned VecWidth = CI->getType()->getPrimitiveSizeInBits();
3323       Intrinsic::ID IID;
3324       if (VecWidth == 128 && !High)
3325         IID = Intrinsic::x86_avx512_vpmadd52l_uq_128;
3326       else if (VecWidth == 256 && !High)
3327         IID = Intrinsic::x86_avx512_vpmadd52l_uq_256;
3328       else if (VecWidth == 512 && !High)
3329         IID = Intrinsic::x86_avx512_vpmadd52l_uq_512;
3330       else if (VecWidth == 128 && High)
3331         IID = Intrinsic::x86_avx512_vpmadd52h_uq_128;
3332       else if (VecWidth == 256 && High)
3333         IID = Intrinsic::x86_avx512_vpmadd52h_uq_256;
3334       else if (VecWidth == 512 && High)
3335         IID = Intrinsic::x86_avx512_vpmadd52h_uq_512;
3336       else
3337         llvm_unreachable("Unexpected intrinsic");
3338 
3339       Value *Args[] = { CI->getArgOperand(0) , CI->getArgOperand(1),
3340                         CI->getArgOperand(2) };
3341       Rep = Builder.CreateCall(Intrinsic::getDeclaration(CI->getModule(), IID),
3342                                Args);
3343       Value *PassThru = ZeroMask ? ConstantAggregateZero::get(CI->getType())
3344                                  : CI->getArgOperand(0);
3345       Rep = EmitX86Select(Builder, CI->getArgOperand(3), Rep, PassThru);
3346     } else if (IsX86 && (Name.startswith("avx512.mask.vpermi2var.") ||
3347                          Name.startswith("avx512.mask.vpermt2var.") ||
3348                          Name.startswith("avx512.maskz.vpermt2var."))) {
3349       bool ZeroMask = Name[11] == 'z';
3350       bool IndexForm = Name[17] == 'i';
3351       Rep = UpgradeX86VPERMT2Intrinsics(Builder, *CI, ZeroMask, IndexForm);
3352     } else if (IsX86 && (Name.startswith("avx512.mask.vpdpbusd.") ||
3353                          Name.startswith("avx512.maskz.vpdpbusd.") ||
3354                          Name.startswith("avx512.mask.vpdpbusds.") ||
3355                          Name.startswith("avx512.maskz.vpdpbusds."))) {
3356       bool ZeroMask = Name[11] == 'z';
3357       bool IsSaturating = Name[ZeroMask ? 21 : 20] == 's';
3358       unsigned VecWidth = CI->getType()->getPrimitiveSizeInBits();
3359       Intrinsic::ID IID;
3360       if (VecWidth == 128 && !IsSaturating)
3361         IID = Intrinsic::x86_avx512_vpdpbusd_128;
3362       else if (VecWidth == 256 && !IsSaturating)
3363         IID = Intrinsic::x86_avx512_vpdpbusd_256;
3364       else if (VecWidth == 512 && !IsSaturating)
3365         IID = Intrinsic::x86_avx512_vpdpbusd_512;
3366       else if (VecWidth == 128 && IsSaturating)
3367         IID = Intrinsic::x86_avx512_vpdpbusds_128;
3368       else if (VecWidth == 256 && IsSaturating)
3369         IID = Intrinsic::x86_avx512_vpdpbusds_256;
3370       else if (VecWidth == 512 && IsSaturating)
3371         IID = Intrinsic::x86_avx512_vpdpbusds_512;
3372       else
3373         llvm_unreachable("Unexpected intrinsic");
3374 
3375       Value *Args[] = { CI->getArgOperand(0), CI->getArgOperand(1),
3376                         CI->getArgOperand(2)  };
3377       Rep = Builder.CreateCall(Intrinsic::getDeclaration(CI->getModule(), IID),
3378                                Args);
3379       Value *PassThru = ZeroMask ? ConstantAggregateZero::get(CI->getType())
3380                                  : CI->getArgOperand(0);
3381       Rep = EmitX86Select(Builder, CI->getArgOperand(3), Rep, PassThru);
3382     } else if (IsX86 && (Name.startswith("avx512.mask.vpdpwssd.") ||
3383                          Name.startswith("avx512.maskz.vpdpwssd.") ||
3384                          Name.startswith("avx512.mask.vpdpwssds.") ||
3385                          Name.startswith("avx512.maskz.vpdpwssds."))) {
3386       bool ZeroMask = Name[11] == 'z';
3387       bool IsSaturating = Name[ZeroMask ? 21 : 20] == 's';
3388       unsigned VecWidth = CI->getType()->getPrimitiveSizeInBits();
3389       Intrinsic::ID IID;
3390       if (VecWidth == 128 && !IsSaturating)
3391         IID = Intrinsic::x86_avx512_vpdpwssd_128;
3392       else if (VecWidth == 256 && !IsSaturating)
3393         IID = Intrinsic::x86_avx512_vpdpwssd_256;
3394       else if (VecWidth == 512 && !IsSaturating)
3395         IID = Intrinsic::x86_avx512_vpdpwssd_512;
3396       else if (VecWidth == 128 && IsSaturating)
3397         IID = Intrinsic::x86_avx512_vpdpwssds_128;
3398       else if (VecWidth == 256 && IsSaturating)
3399         IID = Intrinsic::x86_avx512_vpdpwssds_256;
3400       else if (VecWidth == 512 && IsSaturating)
3401         IID = Intrinsic::x86_avx512_vpdpwssds_512;
3402       else
3403         llvm_unreachable("Unexpected intrinsic");
3404 
3405       Value *Args[] = { CI->getArgOperand(0), CI->getArgOperand(1),
3406                         CI->getArgOperand(2)  };
3407       Rep = Builder.CreateCall(Intrinsic::getDeclaration(CI->getModule(), IID),
3408                                Args);
3409       Value *PassThru = ZeroMask ? ConstantAggregateZero::get(CI->getType())
3410                                  : CI->getArgOperand(0);
3411       Rep = EmitX86Select(Builder, CI->getArgOperand(3), Rep, PassThru);
3412     } else if (IsX86 && (Name == "addcarryx.u32" || Name == "addcarryx.u64" ||
3413                          Name == "addcarry.u32" || Name == "addcarry.u64" ||
3414                          Name == "subborrow.u32" || Name == "subborrow.u64")) {
3415       Intrinsic::ID IID;
3416       if (Name[0] == 'a' && Name.back() == '2')
3417         IID = Intrinsic::x86_addcarry_32;
3418       else if (Name[0] == 'a' && Name.back() == '4')
3419         IID = Intrinsic::x86_addcarry_64;
3420       else if (Name[0] == 's' && Name.back() == '2')
3421         IID = Intrinsic::x86_subborrow_32;
3422       else if (Name[0] == 's' && Name.back() == '4')
3423         IID = Intrinsic::x86_subborrow_64;
3424       else
3425         llvm_unreachable("Unexpected intrinsic");
3426 
3427       // Make a call with 3 operands.
3428       Value *Args[] = { CI->getArgOperand(0), CI->getArgOperand(1),
3429                         CI->getArgOperand(2)};
3430       Value *NewCall = Builder.CreateCall(
3431                                 Intrinsic::getDeclaration(CI->getModule(), IID),
3432                                 Args);
3433 
3434       // Extract the second result and store it.
3435       Value *Data = Builder.CreateExtractValue(NewCall, 1);
3436       // Cast the pointer to the right type.
3437       Value *Ptr = Builder.CreateBitCast(CI->getArgOperand(3),
3438                                  llvm::PointerType::getUnqual(Data->getType()));
3439       Builder.CreateAlignedStore(Data, Ptr, Align(1));
3440       // Replace the original call result with the first result of the new call.
3441       Value *CF = Builder.CreateExtractValue(NewCall, 0);
3442 
3443       CI->replaceAllUsesWith(CF);
3444       Rep = nullptr;
3445     } else if (IsX86 && Name.startswith("avx512.mask.") &&
3446                upgradeAVX512MaskToSelect(Name, Builder, *CI, Rep)) {
3447       // Rep will be updated by the call in the condition.
3448     } else if (IsNVVM && (Name == "abs.i" || Name == "abs.ll")) {
3449       Value *Arg = CI->getArgOperand(0);
3450       Value *Neg = Builder.CreateNeg(Arg, "neg");
3451       Value *Cmp = Builder.CreateICmpSGE(
3452           Arg, llvm::Constant::getNullValue(Arg->getType()), "abs.cond");
3453       Rep = Builder.CreateSelect(Cmp, Arg, Neg, "abs");
3454     } else if (IsNVVM && (Name.startswith("atomic.load.add.f32.p") ||
3455                           Name.startswith("atomic.load.add.f64.p"))) {
3456       Value *Ptr = CI->getArgOperand(0);
3457       Value *Val = CI->getArgOperand(1);
3458       Rep = Builder.CreateAtomicRMW(AtomicRMWInst::FAdd, Ptr, Val,
3459                                     AtomicOrdering::SequentiallyConsistent);
3460     } else if (IsNVVM && (Name == "max.i" || Name == "max.ll" ||
3461                           Name == "max.ui" || Name == "max.ull")) {
3462       Value *Arg0 = CI->getArgOperand(0);
3463       Value *Arg1 = CI->getArgOperand(1);
3464       Value *Cmp = Name.endswith(".ui") || Name.endswith(".ull")
3465                        ? Builder.CreateICmpUGE(Arg0, Arg1, "max.cond")
3466                        : Builder.CreateICmpSGE(Arg0, Arg1, "max.cond");
3467       Rep = Builder.CreateSelect(Cmp, Arg0, Arg1, "max");
3468     } else if (IsNVVM && (Name == "min.i" || Name == "min.ll" ||
3469                           Name == "min.ui" || Name == "min.ull")) {
3470       Value *Arg0 = CI->getArgOperand(0);
3471       Value *Arg1 = CI->getArgOperand(1);
3472       Value *Cmp = Name.endswith(".ui") || Name.endswith(".ull")
3473                        ? Builder.CreateICmpULE(Arg0, Arg1, "min.cond")
3474                        : Builder.CreateICmpSLE(Arg0, Arg1, "min.cond");
3475       Rep = Builder.CreateSelect(Cmp, Arg0, Arg1, "min");
3476     } else if (IsNVVM && Name == "clz.ll") {
3477       // llvm.nvvm.clz.ll returns an i32, but llvm.ctlz.i64 and returns an i64.
3478       Value *Arg = CI->getArgOperand(0);
3479       Value *Ctlz = Builder.CreateCall(
3480           Intrinsic::getDeclaration(F->getParent(), Intrinsic::ctlz,
3481                                     {Arg->getType()}),
3482           {Arg, Builder.getFalse()}, "ctlz");
3483       Rep = Builder.CreateTrunc(Ctlz, Builder.getInt32Ty(), "ctlz.trunc");
3484     } else if (IsNVVM && Name == "popc.ll") {
3485       // llvm.nvvm.popc.ll returns an i32, but llvm.ctpop.i64 and returns an
3486       // i64.
3487       Value *Arg = CI->getArgOperand(0);
3488       Value *Popc = Builder.CreateCall(
3489           Intrinsic::getDeclaration(F->getParent(), Intrinsic::ctpop,
3490                                     {Arg->getType()}),
3491           Arg, "ctpop");
3492       Rep = Builder.CreateTrunc(Popc, Builder.getInt32Ty(), "ctpop.trunc");
3493     } else if (IsNVVM && Name == "h2f") {
3494       Rep = Builder.CreateCall(Intrinsic::getDeclaration(
3495                                    F->getParent(), Intrinsic::convert_from_fp16,
3496                                    {Builder.getFloatTy()}),
3497                                CI->getArgOperand(0), "h2f");
3498     } else {
3499       llvm_unreachable("Unknown function for CallInst upgrade.");
3500     }
3501 
3502     if (Rep)
3503       CI->replaceAllUsesWith(Rep);
3504     CI->eraseFromParent();
3505     return;
3506   }
3507 
3508   const auto &DefaultCase = [&NewFn, &CI]() -> void {
3509     // Handle generic mangling change, but nothing else
3510     assert(
3511         (CI->getCalledFunction()->getName() != NewFn->getName()) &&
3512         "Unknown function for CallInst upgrade and isn't just a name change");
3513     CI->setCalledFunction(NewFn);
3514   };
3515   CallInst *NewCall = nullptr;
3516   switch (NewFn->getIntrinsicID()) {
3517   default: {
3518     DefaultCase();
3519     return;
3520   }
3521   case Intrinsic::experimental_vector_reduce_v2_fmul: {
3522     SmallVector<Value *, 2> Args;
3523     if (CI->isFast())
3524       Args.push_back(ConstantFP::get(CI->getOperand(0)->getType(), 1.0));
3525     else
3526       Args.push_back(CI->getOperand(0));
3527     Args.push_back(CI->getOperand(1));
3528     NewCall = Builder.CreateCall(NewFn, Args);
3529     cast<Instruction>(NewCall)->copyFastMathFlags(CI);
3530     break;
3531   }
3532   case Intrinsic::experimental_vector_reduce_v2_fadd: {
3533     SmallVector<Value *, 2> Args;
3534     if (CI->isFast())
3535       Args.push_back(Constant::getNullValue(CI->getOperand(0)->getType()));
3536     else
3537       Args.push_back(CI->getOperand(0));
3538     Args.push_back(CI->getOperand(1));
3539     NewCall = Builder.CreateCall(NewFn, Args);
3540     cast<Instruction>(NewCall)->copyFastMathFlags(CI);
3541     break;
3542   }
3543   case Intrinsic::arm_neon_vld1:
3544   case Intrinsic::arm_neon_vld2:
3545   case Intrinsic::arm_neon_vld3:
3546   case Intrinsic::arm_neon_vld4:
3547   case Intrinsic::arm_neon_vld2lane:
3548   case Intrinsic::arm_neon_vld3lane:
3549   case Intrinsic::arm_neon_vld4lane:
3550   case Intrinsic::arm_neon_vst1:
3551   case Intrinsic::arm_neon_vst2:
3552   case Intrinsic::arm_neon_vst3:
3553   case Intrinsic::arm_neon_vst4:
3554   case Intrinsic::arm_neon_vst2lane:
3555   case Intrinsic::arm_neon_vst3lane:
3556   case Intrinsic::arm_neon_vst4lane: {
3557     SmallVector<Value *, 4> Args(CI->arg_operands().begin(),
3558                                  CI->arg_operands().end());
3559     NewCall = Builder.CreateCall(NewFn, Args);
3560     break;
3561   }
3562 
3563   case Intrinsic::bitreverse:
3564     NewCall = Builder.CreateCall(NewFn, {CI->getArgOperand(0)});
3565     break;
3566 
3567   case Intrinsic::ctlz:
3568   case Intrinsic::cttz:
3569     assert(CI->getNumArgOperands() == 1 &&
3570            "Mismatch between function args and call args");
3571     NewCall =
3572         Builder.CreateCall(NewFn, {CI->getArgOperand(0), Builder.getFalse()});
3573     break;
3574 
3575   case Intrinsic::objectsize: {
3576     Value *NullIsUnknownSize = CI->getNumArgOperands() == 2
3577                                    ? Builder.getFalse()
3578                                    : CI->getArgOperand(2);
3579     Value *Dynamic =
3580         CI->getNumArgOperands() < 4 ? Builder.getFalse() : CI->getArgOperand(3);
3581     NewCall = Builder.CreateCall(
3582         NewFn, {CI->getArgOperand(0), CI->getArgOperand(1), NullIsUnknownSize, Dynamic});
3583     break;
3584   }
3585 
3586   case Intrinsic::ctpop:
3587     NewCall = Builder.CreateCall(NewFn, {CI->getArgOperand(0)});
3588     break;
3589 
3590   case Intrinsic::convert_from_fp16:
3591     NewCall = Builder.CreateCall(NewFn, {CI->getArgOperand(0)});
3592     break;
3593 
3594   case Intrinsic::dbg_value:
3595     // Upgrade from the old version that had an extra offset argument.
3596     assert(CI->getNumArgOperands() == 4);
3597     // Drop nonzero offsets instead of attempting to upgrade them.
3598     if (auto *Offset = dyn_cast_or_null<Constant>(CI->getArgOperand(1)))
3599       if (Offset->isZeroValue()) {
3600         NewCall = Builder.CreateCall(
3601             NewFn,
3602             {CI->getArgOperand(0), CI->getArgOperand(2), CI->getArgOperand(3)});
3603         break;
3604       }
3605     CI->eraseFromParent();
3606     return;
3607 
3608   case Intrinsic::x86_xop_vfrcz_ss:
3609   case Intrinsic::x86_xop_vfrcz_sd:
3610     NewCall = Builder.CreateCall(NewFn, {CI->getArgOperand(1)});
3611     break;
3612 
3613   case Intrinsic::x86_xop_vpermil2pd:
3614   case Intrinsic::x86_xop_vpermil2ps:
3615   case Intrinsic::x86_xop_vpermil2pd_256:
3616   case Intrinsic::x86_xop_vpermil2ps_256: {
3617     SmallVector<Value *, 4> Args(CI->arg_operands().begin(),
3618                                  CI->arg_operands().end());
3619     VectorType *FltIdxTy = cast<VectorType>(Args[2]->getType());
3620     VectorType *IntIdxTy = VectorType::getInteger(FltIdxTy);
3621     Args[2] = Builder.CreateBitCast(Args[2], IntIdxTy);
3622     NewCall = Builder.CreateCall(NewFn, Args);
3623     break;
3624   }
3625 
3626   case Intrinsic::x86_sse41_ptestc:
3627   case Intrinsic::x86_sse41_ptestz:
3628   case Intrinsic::x86_sse41_ptestnzc: {
3629     // The arguments for these intrinsics used to be v4f32, and changed
3630     // to v2i64. This is purely a nop, since those are bitwise intrinsics.
3631     // So, the only thing required is a bitcast for both arguments.
3632     // First, check the arguments have the old type.
3633     Value *Arg0 = CI->getArgOperand(0);
3634     if (Arg0->getType() != VectorType::get(Type::getFloatTy(C), 4))
3635       return;
3636 
3637     // Old intrinsic, add bitcasts
3638     Value *Arg1 = CI->getArgOperand(1);
3639 
3640     Type *NewVecTy = VectorType::get(Type::getInt64Ty(C), 2);
3641 
3642     Value *BC0 = Builder.CreateBitCast(Arg0, NewVecTy, "cast");
3643     Value *BC1 = Builder.CreateBitCast(Arg1, NewVecTy, "cast");
3644 
3645     NewCall = Builder.CreateCall(NewFn, {BC0, BC1});
3646     break;
3647   }
3648 
3649   case Intrinsic::x86_rdtscp: {
3650     // This used to take 1 arguments. If we have no arguments, it is already
3651     // upgraded.
3652     if (CI->getNumOperands() == 0)
3653       return;
3654 
3655     NewCall = Builder.CreateCall(NewFn);
3656     // Extract the second result and store it.
3657     Value *Data = Builder.CreateExtractValue(NewCall, 1);
3658     // Cast the pointer to the right type.
3659     Value *Ptr = Builder.CreateBitCast(CI->getArgOperand(0),
3660                                  llvm::PointerType::getUnqual(Data->getType()));
3661     Builder.CreateAlignedStore(Data, Ptr, Align(1));
3662     // Replace the original call result with the first result of the new call.
3663     Value *TSC = Builder.CreateExtractValue(NewCall, 0);
3664 
3665     std::string Name = std::string(CI->getName());
3666     if (!Name.empty()) {
3667       CI->setName(Name + ".old");
3668       NewCall->setName(Name);
3669     }
3670     CI->replaceAllUsesWith(TSC);
3671     CI->eraseFromParent();
3672     return;
3673   }
3674 
3675   case Intrinsic::x86_sse41_insertps:
3676   case Intrinsic::x86_sse41_dppd:
3677   case Intrinsic::x86_sse41_dpps:
3678   case Intrinsic::x86_sse41_mpsadbw:
3679   case Intrinsic::x86_avx_dp_ps_256:
3680   case Intrinsic::x86_avx2_mpsadbw: {
3681     // Need to truncate the last argument from i32 to i8 -- this argument models
3682     // an inherently 8-bit immediate operand to these x86 instructions.
3683     SmallVector<Value *, 4> Args(CI->arg_operands().begin(),
3684                                  CI->arg_operands().end());
3685 
3686     // Replace the last argument with a trunc.
3687     Args.back() = Builder.CreateTrunc(Args.back(), Type::getInt8Ty(C), "trunc");
3688     NewCall = Builder.CreateCall(NewFn, Args);
3689     break;
3690   }
3691 
3692   case Intrinsic::thread_pointer: {
3693     NewCall = Builder.CreateCall(NewFn, {});
3694     break;
3695   }
3696 
3697   case Intrinsic::invariant_start:
3698   case Intrinsic::invariant_end:
3699   case Intrinsic::masked_load:
3700   case Intrinsic::masked_store:
3701   case Intrinsic::masked_gather:
3702   case Intrinsic::masked_scatter: {
3703     SmallVector<Value *, 4> Args(CI->arg_operands().begin(),
3704                                  CI->arg_operands().end());
3705     NewCall = Builder.CreateCall(NewFn, Args);
3706     break;
3707   }
3708 
3709   case Intrinsic::memcpy:
3710   case Intrinsic::memmove:
3711   case Intrinsic::memset: {
3712     // We have to make sure that the call signature is what we're expecting.
3713     // We only want to change the old signatures by removing the alignment arg:
3714     //  @llvm.mem[cpy|move]...(i8*, i8*, i[32|i64], i32, i1)
3715     //    -> @llvm.mem[cpy|move]...(i8*, i8*, i[32|i64], i1)
3716     //  @llvm.memset...(i8*, i8, i[32|64], i32, i1)
3717     //    -> @llvm.memset...(i8*, i8, i[32|64], i1)
3718     // Note: i8*'s in the above can be any pointer type
3719     if (CI->getNumArgOperands() != 5) {
3720       DefaultCase();
3721       return;
3722     }
3723     // Remove alignment argument (3), and add alignment attributes to the
3724     // dest/src pointers.
3725     Value *Args[4] = {CI->getArgOperand(0), CI->getArgOperand(1),
3726                       CI->getArgOperand(2), CI->getArgOperand(4)};
3727     NewCall = Builder.CreateCall(NewFn, Args);
3728     auto *MemCI = cast<MemIntrinsic>(NewCall);
3729     // All mem intrinsics support dest alignment.
3730     const ConstantInt *Align = cast<ConstantInt>(CI->getArgOperand(3));
3731     MemCI->setDestAlignment(Align->getZExtValue());
3732     // Memcpy/Memmove also support source alignment.
3733     if (auto *MTI = dyn_cast<MemTransferInst>(MemCI))
3734       MTI->setSourceAlignment(Align->getZExtValue());
3735     break;
3736   }
3737   }
3738   assert(NewCall && "Should have either set this variable or returned through "
3739                     "the default case");
3740   std::string Name = std::string(CI->getName());
3741   if (!Name.empty()) {
3742     CI->setName(Name + ".old");
3743     NewCall->setName(Name);
3744   }
3745   CI->replaceAllUsesWith(NewCall);
3746   CI->eraseFromParent();
3747 }
3748 
3749 void llvm::UpgradeCallsToIntrinsic(Function *F) {
3750   assert(F && "Illegal attempt to upgrade a non-existent intrinsic.");
3751 
3752   // Check if this function should be upgraded and get the replacement function
3753   // if there is one.
3754   Function *NewFn;
3755   if (UpgradeIntrinsicFunction(F, NewFn)) {
3756     // Replace all users of the old function with the new function or new
3757     // instructions. This is not a range loop because the call is deleted.
3758     for (auto UI = F->user_begin(), UE = F->user_end(); UI != UE; )
3759       if (CallInst *CI = dyn_cast<CallInst>(*UI++))
3760         UpgradeIntrinsicCall(CI, NewFn);
3761 
3762     // Remove old function, no longer used, from the module.
3763     F->eraseFromParent();
3764   }
3765 }
3766 
3767 MDNode *llvm::UpgradeTBAANode(MDNode &MD) {
3768   // Check if the tag uses struct-path aware TBAA format.
3769   if (isa<MDNode>(MD.getOperand(0)) && MD.getNumOperands() >= 3)
3770     return &MD;
3771 
3772   auto &Context = MD.getContext();
3773   if (MD.getNumOperands() == 3) {
3774     Metadata *Elts[] = {MD.getOperand(0), MD.getOperand(1)};
3775     MDNode *ScalarType = MDNode::get(Context, Elts);
3776     // Create a MDNode <ScalarType, ScalarType, offset 0, const>
3777     Metadata *Elts2[] = {ScalarType, ScalarType,
3778                          ConstantAsMetadata::get(
3779                              Constant::getNullValue(Type::getInt64Ty(Context))),
3780                          MD.getOperand(2)};
3781     return MDNode::get(Context, Elts2);
3782   }
3783   // Create a MDNode <MD, MD, offset 0>
3784   Metadata *Elts[] = {&MD, &MD, ConstantAsMetadata::get(Constant::getNullValue(
3785                                     Type::getInt64Ty(Context)))};
3786   return MDNode::get(Context, Elts);
3787 }
3788 
3789 Instruction *llvm::UpgradeBitCastInst(unsigned Opc, Value *V, Type *DestTy,
3790                                       Instruction *&Temp) {
3791   if (Opc != Instruction::BitCast)
3792     return nullptr;
3793 
3794   Temp = nullptr;
3795   Type *SrcTy = V->getType();
3796   if (SrcTy->isPtrOrPtrVectorTy() && DestTy->isPtrOrPtrVectorTy() &&
3797       SrcTy->getPointerAddressSpace() != DestTy->getPointerAddressSpace()) {
3798     LLVMContext &Context = V->getContext();
3799 
3800     // We have no information about target data layout, so we assume that
3801     // the maximum pointer size is 64bit.
3802     Type *MidTy = Type::getInt64Ty(Context);
3803     Temp = CastInst::Create(Instruction::PtrToInt, V, MidTy);
3804 
3805     return CastInst::Create(Instruction::IntToPtr, Temp, DestTy);
3806   }
3807 
3808   return nullptr;
3809 }
3810 
3811 Value *llvm::UpgradeBitCastExpr(unsigned Opc, Constant *C, Type *DestTy) {
3812   if (Opc != Instruction::BitCast)
3813     return nullptr;
3814 
3815   Type *SrcTy = C->getType();
3816   if (SrcTy->isPtrOrPtrVectorTy() && DestTy->isPtrOrPtrVectorTy() &&
3817       SrcTy->getPointerAddressSpace() != DestTy->getPointerAddressSpace()) {
3818     LLVMContext &Context = C->getContext();
3819 
3820     // We have no information about target data layout, so we assume that
3821     // the maximum pointer size is 64bit.
3822     Type *MidTy = Type::getInt64Ty(Context);
3823 
3824     return ConstantExpr::getIntToPtr(ConstantExpr::getPtrToInt(C, MidTy),
3825                                      DestTy);
3826   }
3827 
3828   return nullptr;
3829 }
3830 
3831 /// Check the debug info version number, if it is out-dated, drop the debug
3832 /// info. Return true if module is modified.
3833 bool llvm::UpgradeDebugInfo(Module &M) {
3834   unsigned Version = getDebugMetadataVersionFromModule(M);
3835   if (Version == DEBUG_METADATA_VERSION) {
3836     bool BrokenDebugInfo = false;
3837     if (verifyModule(M, &llvm::errs(), &BrokenDebugInfo))
3838       report_fatal_error("Broken module found, compilation aborted!");
3839     if (!BrokenDebugInfo)
3840       // Everything is ok.
3841       return false;
3842     else {
3843       // Diagnose malformed debug info.
3844       DiagnosticInfoIgnoringInvalidDebugMetadata Diag(M);
3845       M.getContext().diagnose(Diag);
3846     }
3847   }
3848   bool Modified = StripDebugInfo(M);
3849   if (Modified && Version != DEBUG_METADATA_VERSION) {
3850     // Diagnose a version mismatch.
3851     DiagnosticInfoDebugMetadataVersion DiagVersion(M, Version);
3852     M.getContext().diagnose(DiagVersion);
3853   }
3854   return Modified;
3855 }
3856 
3857 /// This checks for objc retain release marker which should be upgraded. It
3858 /// returns true if module is modified.
3859 static bool UpgradeRetainReleaseMarker(Module &M) {
3860   bool Changed = false;
3861   const char *MarkerKey = "clang.arc.retainAutoreleasedReturnValueMarker";
3862   NamedMDNode *ModRetainReleaseMarker = M.getNamedMetadata(MarkerKey);
3863   if (ModRetainReleaseMarker) {
3864     MDNode *Op = ModRetainReleaseMarker->getOperand(0);
3865     if (Op) {
3866       MDString *ID = dyn_cast_or_null<MDString>(Op->getOperand(0));
3867       if (ID) {
3868         SmallVector<StringRef, 4> ValueComp;
3869         ID->getString().split(ValueComp, "#");
3870         if (ValueComp.size() == 2) {
3871           std::string NewValue = ValueComp[0].str() + ";" + ValueComp[1].str();
3872           ID = MDString::get(M.getContext(), NewValue);
3873         }
3874         M.addModuleFlag(Module::Error, MarkerKey, ID);
3875         M.eraseNamedMetadata(ModRetainReleaseMarker);
3876         Changed = true;
3877       }
3878     }
3879   }
3880   return Changed;
3881 }
3882 
3883 void llvm::UpgradeARCRuntime(Module &M) {
3884   // This lambda converts normal function calls to ARC runtime functions to
3885   // intrinsic calls.
3886   auto UpgradeToIntrinsic = [&](const char *OldFunc,
3887                                 llvm::Intrinsic::ID IntrinsicFunc) {
3888     Function *Fn = M.getFunction(OldFunc);
3889 
3890     if (!Fn)
3891       return;
3892 
3893     Function *NewFn = llvm::Intrinsic::getDeclaration(&M, IntrinsicFunc);
3894 
3895     for (auto I = Fn->user_begin(), E = Fn->user_end(); I != E;) {
3896       CallInst *CI = dyn_cast<CallInst>(*I++);
3897       if (!CI || CI->getCalledFunction() != Fn)
3898         continue;
3899 
3900       IRBuilder<> Builder(CI->getParent(), CI->getIterator());
3901       FunctionType *NewFuncTy = NewFn->getFunctionType();
3902       SmallVector<Value *, 2> Args;
3903 
3904       // Don't upgrade the intrinsic if it's not valid to bitcast the return
3905       // value to the return type of the old function.
3906       if (NewFuncTy->getReturnType() != CI->getType() &&
3907           !CastInst::castIsValid(Instruction::BitCast, CI,
3908                                  NewFuncTy->getReturnType()))
3909         continue;
3910 
3911       bool InvalidCast = false;
3912 
3913       for (unsigned I = 0, E = CI->getNumArgOperands(); I != E; ++I) {
3914         Value *Arg = CI->getArgOperand(I);
3915 
3916         // Bitcast argument to the parameter type of the new function if it's
3917         // not a variadic argument.
3918         if (I < NewFuncTy->getNumParams()) {
3919           // Don't upgrade the intrinsic if it's not valid to bitcast the argument
3920           // to the parameter type of the new function.
3921           if (!CastInst::castIsValid(Instruction::BitCast, Arg,
3922                                      NewFuncTy->getParamType(I))) {
3923             InvalidCast = true;
3924             break;
3925           }
3926           Arg = Builder.CreateBitCast(Arg, NewFuncTy->getParamType(I));
3927         }
3928         Args.push_back(Arg);
3929       }
3930 
3931       if (InvalidCast)
3932         continue;
3933 
3934       // Create a call instruction that calls the new function.
3935       CallInst *NewCall = Builder.CreateCall(NewFuncTy, NewFn, Args);
3936       NewCall->setTailCallKind(cast<CallInst>(CI)->getTailCallKind());
3937       NewCall->setName(CI->getName());
3938 
3939       // Bitcast the return value back to the type of the old call.
3940       Value *NewRetVal = Builder.CreateBitCast(NewCall, CI->getType());
3941 
3942       if (!CI->use_empty())
3943         CI->replaceAllUsesWith(NewRetVal);
3944       CI->eraseFromParent();
3945     }
3946 
3947     if (Fn->use_empty())
3948       Fn->eraseFromParent();
3949   };
3950 
3951   // Unconditionally convert a call to "clang.arc.use" to a call to
3952   // "llvm.objc.clang.arc.use".
3953   UpgradeToIntrinsic("clang.arc.use", llvm::Intrinsic::objc_clang_arc_use);
3954 
3955   // Upgrade the retain release marker. If there is no need to upgrade
3956   // the marker, that means either the module is already new enough to contain
3957   // new intrinsics or it is not ARC. There is no need to upgrade runtime call.
3958   if (!UpgradeRetainReleaseMarker(M))
3959     return;
3960 
3961   std::pair<const char *, llvm::Intrinsic::ID> RuntimeFuncs[] = {
3962       {"objc_autorelease", llvm::Intrinsic::objc_autorelease},
3963       {"objc_autoreleasePoolPop", llvm::Intrinsic::objc_autoreleasePoolPop},
3964       {"objc_autoreleasePoolPush", llvm::Intrinsic::objc_autoreleasePoolPush},
3965       {"objc_autoreleaseReturnValue",
3966        llvm::Intrinsic::objc_autoreleaseReturnValue},
3967       {"objc_copyWeak", llvm::Intrinsic::objc_copyWeak},
3968       {"objc_destroyWeak", llvm::Intrinsic::objc_destroyWeak},
3969       {"objc_initWeak", llvm::Intrinsic::objc_initWeak},
3970       {"objc_loadWeak", llvm::Intrinsic::objc_loadWeak},
3971       {"objc_loadWeakRetained", llvm::Intrinsic::objc_loadWeakRetained},
3972       {"objc_moveWeak", llvm::Intrinsic::objc_moveWeak},
3973       {"objc_release", llvm::Intrinsic::objc_release},
3974       {"objc_retain", llvm::Intrinsic::objc_retain},
3975       {"objc_retainAutorelease", llvm::Intrinsic::objc_retainAutorelease},
3976       {"objc_retainAutoreleaseReturnValue",
3977        llvm::Intrinsic::objc_retainAutoreleaseReturnValue},
3978       {"objc_retainAutoreleasedReturnValue",
3979        llvm::Intrinsic::objc_retainAutoreleasedReturnValue},
3980       {"objc_retainBlock", llvm::Intrinsic::objc_retainBlock},
3981       {"objc_storeStrong", llvm::Intrinsic::objc_storeStrong},
3982       {"objc_storeWeak", llvm::Intrinsic::objc_storeWeak},
3983       {"objc_unsafeClaimAutoreleasedReturnValue",
3984        llvm::Intrinsic::objc_unsafeClaimAutoreleasedReturnValue},
3985       {"objc_retainedObject", llvm::Intrinsic::objc_retainedObject},
3986       {"objc_unretainedObject", llvm::Intrinsic::objc_unretainedObject},
3987       {"objc_unretainedPointer", llvm::Intrinsic::objc_unretainedPointer},
3988       {"objc_retain_autorelease", llvm::Intrinsic::objc_retain_autorelease},
3989       {"objc_sync_enter", llvm::Intrinsic::objc_sync_enter},
3990       {"objc_sync_exit", llvm::Intrinsic::objc_sync_exit},
3991       {"objc_arc_annotation_topdown_bbstart",
3992        llvm::Intrinsic::objc_arc_annotation_topdown_bbstart},
3993       {"objc_arc_annotation_topdown_bbend",
3994        llvm::Intrinsic::objc_arc_annotation_topdown_bbend},
3995       {"objc_arc_annotation_bottomup_bbstart",
3996        llvm::Intrinsic::objc_arc_annotation_bottomup_bbstart},
3997       {"objc_arc_annotation_bottomup_bbend",
3998        llvm::Intrinsic::objc_arc_annotation_bottomup_bbend}};
3999 
4000   for (auto &I : RuntimeFuncs)
4001     UpgradeToIntrinsic(I.first, I.second);
4002 }
4003 
4004 bool llvm::UpgradeModuleFlags(Module &M) {
4005   NamedMDNode *ModFlags = M.getModuleFlagsMetadata();
4006   if (!ModFlags)
4007     return false;
4008 
4009   bool HasObjCFlag = false, HasClassProperties = false, Changed = false;
4010   for (unsigned I = 0, E = ModFlags->getNumOperands(); I != E; ++I) {
4011     MDNode *Op = ModFlags->getOperand(I);
4012     if (Op->getNumOperands() != 3)
4013       continue;
4014     MDString *ID = dyn_cast_or_null<MDString>(Op->getOperand(1));
4015     if (!ID)
4016       continue;
4017     if (ID->getString() == "Objective-C Image Info Version")
4018       HasObjCFlag = true;
4019     if (ID->getString() == "Objective-C Class Properties")
4020       HasClassProperties = true;
4021     // Upgrade PIC/PIE Module Flags. The module flag behavior for these two
4022     // field was Error and now they are Max.
4023     if (ID->getString() == "PIC Level" || ID->getString() == "PIE Level") {
4024       if (auto *Behavior =
4025               mdconst::dyn_extract_or_null<ConstantInt>(Op->getOperand(0))) {
4026         if (Behavior->getLimitedValue() == Module::Error) {
4027           Type *Int32Ty = Type::getInt32Ty(M.getContext());
4028           Metadata *Ops[3] = {
4029               ConstantAsMetadata::get(ConstantInt::get(Int32Ty, Module::Max)),
4030               MDString::get(M.getContext(), ID->getString()),
4031               Op->getOperand(2)};
4032           ModFlags->setOperand(I, MDNode::get(M.getContext(), Ops));
4033           Changed = true;
4034         }
4035       }
4036     }
4037     // Upgrade Objective-C Image Info Section. Removed the whitespce in the
4038     // section name so that llvm-lto will not complain about mismatching
4039     // module flags that is functionally the same.
4040     if (ID->getString() == "Objective-C Image Info Section") {
4041       if (auto *Value = dyn_cast_or_null<MDString>(Op->getOperand(2))) {
4042         SmallVector<StringRef, 4> ValueComp;
4043         Value->getString().split(ValueComp, " ");
4044         if (ValueComp.size() != 1) {
4045           std::string NewValue;
4046           for (auto &S : ValueComp)
4047             NewValue += S.str();
4048           Metadata *Ops[3] = {Op->getOperand(0), Op->getOperand(1),
4049                               MDString::get(M.getContext(), NewValue)};
4050           ModFlags->setOperand(I, MDNode::get(M.getContext(), Ops));
4051           Changed = true;
4052         }
4053       }
4054     }
4055   }
4056 
4057   // "Objective-C Class Properties" is recently added for Objective-C. We
4058   // upgrade ObjC bitcodes to contain a "Objective-C Class Properties" module
4059   // flag of value 0, so we can correclty downgrade this flag when trying to
4060   // link an ObjC bitcode without this module flag with an ObjC bitcode with
4061   // this module flag.
4062   if (HasObjCFlag && !HasClassProperties) {
4063     M.addModuleFlag(llvm::Module::Override, "Objective-C Class Properties",
4064                     (uint32_t)0);
4065     Changed = true;
4066   }
4067 
4068   return Changed;
4069 }
4070 
4071 void llvm::UpgradeSectionAttributes(Module &M) {
4072   auto TrimSpaces = [](StringRef Section) -> std::string {
4073     SmallVector<StringRef, 5> Components;
4074     Section.split(Components, ',');
4075 
4076     SmallString<32> Buffer;
4077     raw_svector_ostream OS(Buffer);
4078 
4079     for (auto Component : Components)
4080       OS << ',' << Component.trim();
4081 
4082     return std::string(OS.str().substr(1));
4083   };
4084 
4085   for (auto &GV : M.globals()) {
4086     if (!GV.hasSection())
4087       continue;
4088 
4089     StringRef Section = GV.getSection();
4090 
4091     if (!Section.startswith("__DATA, __objc_catlist"))
4092       continue;
4093 
4094     // __DATA, __objc_catlist, regular, no_dead_strip
4095     // __DATA,__objc_catlist,regular,no_dead_strip
4096     GV.setSection(TrimSpaces(Section));
4097   }
4098 }
4099 
4100 static bool isOldLoopArgument(Metadata *MD) {
4101   auto *T = dyn_cast_or_null<MDTuple>(MD);
4102   if (!T)
4103     return false;
4104   if (T->getNumOperands() < 1)
4105     return false;
4106   auto *S = dyn_cast_or_null<MDString>(T->getOperand(0));
4107   if (!S)
4108     return false;
4109   return S->getString().startswith("llvm.vectorizer.");
4110 }
4111 
4112 static MDString *upgradeLoopTag(LLVMContext &C, StringRef OldTag) {
4113   StringRef OldPrefix = "llvm.vectorizer.";
4114   assert(OldTag.startswith(OldPrefix) && "Expected old prefix");
4115 
4116   if (OldTag == "llvm.vectorizer.unroll")
4117     return MDString::get(C, "llvm.loop.interleave.count");
4118 
4119   return MDString::get(
4120       C, (Twine("llvm.loop.vectorize.") + OldTag.drop_front(OldPrefix.size()))
4121              .str());
4122 }
4123 
4124 static Metadata *upgradeLoopArgument(Metadata *MD) {
4125   auto *T = dyn_cast_or_null<MDTuple>(MD);
4126   if (!T)
4127     return MD;
4128   if (T->getNumOperands() < 1)
4129     return MD;
4130   auto *OldTag = dyn_cast_or_null<MDString>(T->getOperand(0));
4131   if (!OldTag)
4132     return MD;
4133   if (!OldTag->getString().startswith("llvm.vectorizer."))
4134     return MD;
4135 
4136   // This has an old tag.  Upgrade it.
4137   SmallVector<Metadata *, 8> Ops;
4138   Ops.reserve(T->getNumOperands());
4139   Ops.push_back(upgradeLoopTag(T->getContext(), OldTag->getString()));
4140   for (unsigned I = 1, E = T->getNumOperands(); I != E; ++I)
4141     Ops.push_back(T->getOperand(I));
4142 
4143   return MDTuple::get(T->getContext(), Ops);
4144 }
4145 
4146 MDNode *llvm::upgradeInstructionLoopAttachment(MDNode &N) {
4147   auto *T = dyn_cast<MDTuple>(&N);
4148   if (!T)
4149     return &N;
4150 
4151   if (none_of(T->operands(), isOldLoopArgument))
4152     return &N;
4153 
4154   SmallVector<Metadata *, 8> Ops;
4155   Ops.reserve(T->getNumOperands());
4156   for (Metadata *MD : T->operands())
4157     Ops.push_back(upgradeLoopArgument(MD));
4158 
4159   return MDTuple::get(T->getContext(), Ops);
4160 }
4161 
4162 std::string llvm::UpgradeDataLayoutString(StringRef DL, StringRef TT) {
4163   std::string AddrSpaces = "-p270:32:32-p271:32:32-p272:64:64";
4164 
4165   // If X86, and the datalayout matches the expected format, add pointer size
4166   // address spaces to the datalayout.
4167   if (!Triple(TT).isX86() || DL.contains(AddrSpaces))
4168     return std::string(DL);
4169 
4170   SmallVector<StringRef, 4> Groups;
4171   Regex R("(e-m:[a-z](-p:32:32)?)(-[if]64:.*$)");
4172   if (!R.match(DL, &Groups))
4173     return std::string(DL);
4174 
4175   SmallString<1024> Buf;
4176   std::string Res = (Groups[1] + AddrSpaces + Groups[3]).toStringRef(Buf).str();
4177   return Res;
4178 }
4179 
4180 void llvm::UpgradeFramePointerAttributes(AttrBuilder &B) {
4181   StringRef FramePointer;
4182   if (B.contains("no-frame-pointer-elim")) {
4183     // The value can be "true" or "false".
4184     for (const auto &I : B.td_attrs())
4185       if (I.first == "no-frame-pointer-elim")
4186         FramePointer = I.second == "true" ? "all" : "none";
4187     B.removeAttribute("no-frame-pointer-elim");
4188   }
4189   if (B.contains("no-frame-pointer-elim-non-leaf")) {
4190     // The value is ignored. "no-frame-pointer-elim"="true" takes priority.
4191     if (FramePointer != "all")
4192       FramePointer = "non-leaf";
4193     B.removeAttribute("no-frame-pointer-elim-non-leaf");
4194   }
4195 
4196   if (!FramePointer.empty())
4197     B.addAttribute("frame-pointer", FramePointer);
4198 }
4199