1 //===-- AutoUpgrade.cpp - Implement auto-upgrade helper functions ---------===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 // This file implements the auto-upgrade helper functions.
10 // This is where deprecated IR intrinsics and other IR features are updated to
11 // current specifications.
12 //
13 //===----------------------------------------------------------------------===//
14 
15 #include "llvm/IR/AutoUpgrade.h"
16 #include "llvm/ADT/StringSwitch.h"
17 #include "llvm/IR/Constants.h"
18 #include "llvm/IR/DIBuilder.h"
19 #include "llvm/IR/DebugInfo.h"
20 #include "llvm/IR/DiagnosticInfo.h"
21 #include "llvm/IR/Function.h"
22 #include "llvm/IR/IRBuilder.h"
23 #include "llvm/IR/Instruction.h"
24 #include "llvm/IR/IntrinsicInst.h"
25 #include "llvm/IR/LLVMContext.h"
26 #include "llvm/IR/Module.h"
27 #include "llvm/IR/Verifier.h"
28 #include "llvm/Support/ErrorHandling.h"
29 #include "llvm/Support/Regex.h"
30 #include <cstring>
31 using namespace llvm;
32 
33 static void rename(GlobalValue *GV) { GV->setName(GV->getName() + ".old"); }
34 
35 // Upgrade the declarations of the SSE4.1 ptest intrinsics whose arguments have
36 // changed their type from v4f32 to v2i64.
37 static bool UpgradePTESTIntrinsic(Function* F, Intrinsic::ID IID,
38                                   Function *&NewFn) {
39   // Check whether this is an old version of the function, which received
40   // v4f32 arguments.
41   Type *Arg0Type = F->getFunctionType()->getParamType(0);
42   if (Arg0Type != VectorType::get(Type::getFloatTy(F->getContext()), 4))
43     return false;
44 
45   // Yes, it's old, replace it with new version.
46   rename(F);
47   NewFn = Intrinsic::getDeclaration(F->getParent(), IID);
48   return true;
49 }
50 
51 // Upgrade the declarations of intrinsic functions whose 8-bit immediate mask
52 // arguments have changed their type from i32 to i8.
53 static bool UpgradeX86IntrinsicsWith8BitMask(Function *F, Intrinsic::ID IID,
54                                              Function *&NewFn) {
55   // Check that the last argument is an i32.
56   Type *LastArgType = F->getFunctionType()->getParamType(
57      F->getFunctionType()->getNumParams() - 1);
58   if (!LastArgType->isIntegerTy(32))
59     return false;
60 
61   // Move this function aside and map down.
62   rename(F);
63   NewFn = Intrinsic::getDeclaration(F->getParent(), IID);
64   return true;
65 }
66 
67 static bool ShouldUpgradeX86Intrinsic(Function *F, StringRef Name) {
68   // All of the intrinsics matches below should be marked with which llvm
69   // version started autoupgrading them. At some point in the future we would
70   // like to use this information to remove upgrade code for some older
71   // intrinsics. It is currently undecided how we will determine that future
72   // point.
73   if (Name == "addcarryx.u32" || // Added in 8.0
74       Name == "addcarryx.u64" || // Added in 8.0
75       Name == "addcarry.u32" || // Added in 8.0
76       Name == "addcarry.u64" || // Added in 8.0
77       Name == "subborrow.u32" || // Added in 8.0
78       Name == "subborrow.u64" || // Added in 8.0
79       Name.startswith("sse2.padds.") || // Added in 8.0
80       Name.startswith("sse2.psubs.") || // Added in 8.0
81       Name.startswith("sse2.paddus.") || // Added in 8.0
82       Name.startswith("sse2.psubus.") || // Added in 8.0
83       Name.startswith("avx2.padds.") || // Added in 8.0
84       Name.startswith("avx2.psubs.") || // Added in 8.0
85       Name.startswith("avx2.paddus.") || // Added in 8.0
86       Name.startswith("avx2.psubus.") || // Added in 8.0
87       Name.startswith("avx512.padds.") || // Added in 8.0
88       Name.startswith("avx512.psubs.") || // Added in 8.0
89       Name.startswith("avx512.mask.padds.") || // Added in 8.0
90       Name.startswith("avx512.mask.psubs.") || // Added in 8.0
91       Name.startswith("avx512.mask.paddus.") || // Added in 8.0
92       Name.startswith("avx512.mask.psubus.") || // Added in 8.0
93       Name=="ssse3.pabs.b.128" || // Added in 6.0
94       Name=="ssse3.pabs.w.128" || // Added in 6.0
95       Name=="ssse3.pabs.d.128" || // Added in 6.0
96       Name.startswith("fma4.vfmadd.s") || // Added in 7.0
97       Name.startswith("fma.vfmadd.") || // Added in 7.0
98       Name.startswith("fma.vfmsub.") || // Added in 7.0
99       Name.startswith("fma.vfmaddsub.") || // Added in 7.0
100       Name.startswith("fma.vfmsubadd.") || // Added in 7.0
101       Name.startswith("fma.vfnmadd.") || // Added in 7.0
102       Name.startswith("fma.vfnmsub.") || // Added in 7.0
103       Name.startswith("avx512.mask.vfmadd.") || // Added in 7.0
104       Name.startswith("avx512.mask.vfnmadd.") || // Added in 7.0
105       Name.startswith("avx512.mask.vfnmsub.") || // Added in 7.0
106       Name.startswith("avx512.mask3.vfmadd.") || // Added in 7.0
107       Name.startswith("avx512.maskz.vfmadd.") || // Added in 7.0
108       Name.startswith("avx512.mask3.vfmsub.") || // Added in 7.0
109       Name.startswith("avx512.mask3.vfnmsub.") || // Added in 7.0
110       Name.startswith("avx512.mask.vfmaddsub.") || // Added in 7.0
111       Name.startswith("avx512.maskz.vfmaddsub.") || // Added in 7.0
112       Name.startswith("avx512.mask3.vfmaddsub.") || // Added in 7.0
113       Name.startswith("avx512.mask3.vfmsubadd.") || // Added in 7.0
114       Name.startswith("avx512.mask.shuf.i") || // Added in 6.0
115       Name.startswith("avx512.mask.shuf.f") || // Added in 6.0
116       Name.startswith("avx512.kunpck") || //added in 6.0
117       Name.startswith("avx2.pabs.") || // Added in 6.0
118       Name.startswith("avx512.mask.pabs.") || // Added in 6.0
119       Name.startswith("avx512.broadcastm") || // Added in 6.0
120       Name == "sse.sqrt.ss" || // Added in 7.0
121       Name == "sse2.sqrt.sd" || // Added in 7.0
122       Name.startswith("avx512.mask.sqrt.p") || // Added in 7.0
123       Name.startswith("avx.sqrt.p") || // Added in 7.0
124       Name.startswith("sse2.sqrt.p") || // Added in 7.0
125       Name.startswith("sse.sqrt.p") || // Added in 7.0
126       Name.startswith("avx512.mask.pbroadcast") || // Added in 6.0
127       Name.startswith("sse2.pcmpeq.") || // Added in 3.1
128       Name.startswith("sse2.pcmpgt.") || // Added in 3.1
129       Name.startswith("avx2.pcmpeq.") || // Added in 3.1
130       Name.startswith("avx2.pcmpgt.") || // Added in 3.1
131       Name.startswith("avx512.mask.pcmpeq.") || // Added in 3.9
132       Name.startswith("avx512.mask.pcmpgt.") || // Added in 3.9
133       Name.startswith("avx.vperm2f128.") || // Added in 6.0
134       Name == "avx2.vperm2i128" || // Added in 6.0
135       Name == "sse.add.ss" || // Added in 4.0
136       Name == "sse2.add.sd" || // Added in 4.0
137       Name == "sse.sub.ss" || // Added in 4.0
138       Name == "sse2.sub.sd" || // Added in 4.0
139       Name == "sse.mul.ss" || // Added in 4.0
140       Name == "sse2.mul.sd" || // Added in 4.0
141       Name == "sse.div.ss" || // Added in 4.0
142       Name == "sse2.div.sd" || // Added in 4.0
143       Name == "sse41.pmaxsb" || // Added in 3.9
144       Name == "sse2.pmaxs.w" || // Added in 3.9
145       Name == "sse41.pmaxsd" || // Added in 3.9
146       Name == "sse2.pmaxu.b" || // Added in 3.9
147       Name == "sse41.pmaxuw" || // Added in 3.9
148       Name == "sse41.pmaxud" || // Added in 3.9
149       Name == "sse41.pminsb" || // Added in 3.9
150       Name == "sse2.pmins.w" || // Added in 3.9
151       Name == "sse41.pminsd" || // Added in 3.9
152       Name == "sse2.pminu.b" || // Added in 3.9
153       Name == "sse41.pminuw" || // Added in 3.9
154       Name == "sse41.pminud" || // Added in 3.9
155       Name == "avx512.kand.w" || // Added in 7.0
156       Name == "avx512.kandn.w" || // Added in 7.0
157       Name == "avx512.knot.w" || // Added in 7.0
158       Name == "avx512.kor.w" || // Added in 7.0
159       Name == "avx512.kxor.w" || // Added in 7.0
160       Name == "avx512.kxnor.w" || // Added in 7.0
161       Name == "avx512.kortestc.w" || // Added in 7.0
162       Name == "avx512.kortestz.w" || // Added in 7.0
163       Name.startswith("avx512.mask.pshuf.b.") || // Added in 4.0
164       Name.startswith("avx2.pmax") || // Added in 3.9
165       Name.startswith("avx2.pmin") || // Added in 3.9
166       Name.startswith("avx512.mask.pmax") || // Added in 4.0
167       Name.startswith("avx512.mask.pmin") || // Added in 4.0
168       Name.startswith("avx2.vbroadcast") || // Added in 3.8
169       Name.startswith("avx2.pbroadcast") || // Added in 3.8
170       Name.startswith("avx.vpermil.") || // Added in 3.1
171       Name.startswith("sse2.pshuf") || // Added in 3.9
172       Name.startswith("avx512.pbroadcast") || // Added in 3.9
173       Name.startswith("avx512.mask.broadcast.s") || // Added in 3.9
174       Name.startswith("avx512.mask.movddup") || // Added in 3.9
175       Name.startswith("avx512.mask.movshdup") || // Added in 3.9
176       Name.startswith("avx512.mask.movsldup") || // Added in 3.9
177       Name.startswith("avx512.mask.pshuf.d.") || // Added in 3.9
178       Name.startswith("avx512.mask.pshufl.w.") || // Added in 3.9
179       Name.startswith("avx512.mask.pshufh.w.") || // Added in 3.9
180       Name.startswith("avx512.mask.shuf.p") || // Added in 4.0
181       Name.startswith("avx512.mask.vpermil.p") || // Added in 3.9
182       Name.startswith("avx512.mask.perm.df.") || // Added in 3.9
183       Name.startswith("avx512.mask.perm.di.") || // Added in 3.9
184       Name.startswith("avx512.mask.punpckl") || // Added in 3.9
185       Name.startswith("avx512.mask.punpckh") || // Added in 3.9
186       Name.startswith("avx512.mask.unpckl.") || // Added in 3.9
187       Name.startswith("avx512.mask.unpckh.") || // Added in 3.9
188       Name.startswith("avx512.mask.pand.") || // Added in 3.9
189       Name.startswith("avx512.mask.pandn.") || // Added in 3.9
190       Name.startswith("avx512.mask.por.") || // Added in 3.9
191       Name.startswith("avx512.mask.pxor.") || // Added in 3.9
192       Name.startswith("avx512.mask.and.") || // Added in 3.9
193       Name.startswith("avx512.mask.andn.") || // Added in 3.9
194       Name.startswith("avx512.mask.or.") || // Added in 3.9
195       Name.startswith("avx512.mask.xor.") || // Added in 3.9
196       Name.startswith("avx512.mask.padd.") || // Added in 4.0
197       Name.startswith("avx512.mask.psub.") || // Added in 4.0
198       Name.startswith("avx512.mask.pmull.") || // Added in 4.0
199       Name.startswith("avx512.mask.cvtdq2pd.") || // Added in 4.0
200       Name.startswith("avx512.mask.cvtudq2pd.") || // Added in 4.0
201       Name.startswith("avx512.mask.cvtudq2ps.") || // Added in 7.0 updated 9.0
202       Name.startswith("avx512.mask.cvtqq2pd.") || // Added in 7.0 updated 9.0
203       Name.startswith("avx512.mask.cvtuqq2pd.") || // Added in 7.0 updated 9.0
204       Name.startswith("avx512.mask.cvtdq2ps.") || // Added in 7.0 updated 9.0
205       Name == "avx512.mask.cvtqq2ps.256" || // Added in 9.0
206       Name == "avx512.mask.cvtqq2ps.512" || // Added in 9.0
207       Name == "avx512.mask.cvtuqq2ps.256" || // Added in 9.0
208       Name == "avx512.mask.cvtuqq2ps.512" || // Added in 9.0
209       Name == "avx512.mask.cvtpd2dq.256" || // Added in 7.0
210       Name == "avx512.mask.cvtpd2ps.256" || // Added in 7.0
211       Name == "avx512.mask.cvttpd2dq.256" || // Added in 7.0
212       Name == "avx512.mask.cvttps2dq.128" || // Added in 7.0
213       Name == "avx512.mask.cvttps2dq.256" || // Added in 7.0
214       Name == "avx512.mask.cvtps2pd.128" || // Added in 7.0
215       Name == "avx512.mask.cvtps2pd.256" || // Added in 7.0
216       Name == "avx512.cvtusi2sd" || // Added in 7.0
217       Name.startswith("avx512.mask.permvar.") || // Added in 7.0
218       Name == "sse2.pmulu.dq" || // Added in 7.0
219       Name == "sse41.pmuldq" || // Added in 7.0
220       Name == "avx2.pmulu.dq" || // Added in 7.0
221       Name == "avx2.pmul.dq" || // Added in 7.0
222       Name == "avx512.pmulu.dq.512" || // Added in 7.0
223       Name == "avx512.pmul.dq.512" || // Added in 7.0
224       Name.startswith("avx512.mask.pmul.dq.") || // Added in 4.0
225       Name.startswith("avx512.mask.pmulu.dq.") || // Added in 4.0
226       Name.startswith("avx512.mask.pmul.hr.sw.") || // Added in 7.0
227       Name.startswith("avx512.mask.pmulh.w.") || // Added in 7.0
228       Name.startswith("avx512.mask.pmulhu.w.") || // Added in 7.0
229       Name.startswith("avx512.mask.pmaddw.d.") || // Added in 7.0
230       Name.startswith("avx512.mask.pmaddubs.w.") || // Added in 7.0
231       Name.startswith("avx512.mask.packsswb.") || // Added in 5.0
232       Name.startswith("avx512.mask.packssdw.") || // Added in 5.0
233       Name.startswith("avx512.mask.packuswb.") || // Added in 5.0
234       Name.startswith("avx512.mask.packusdw.") || // Added in 5.0
235       Name.startswith("avx512.mask.cmp.b") || // Added in 5.0
236       Name.startswith("avx512.mask.cmp.d") || // Added in 5.0
237       Name.startswith("avx512.mask.cmp.q") || // Added in 5.0
238       Name.startswith("avx512.mask.cmp.w") || // Added in 5.0
239       Name.startswith("avx512.mask.cmp.p") || // Added in 7.0
240       Name.startswith("avx512.mask.ucmp.") || // Added in 5.0
241       Name.startswith("avx512.cvtb2mask.") || // Added in 7.0
242       Name.startswith("avx512.cvtw2mask.") || // Added in 7.0
243       Name.startswith("avx512.cvtd2mask.") || // Added in 7.0
244       Name.startswith("avx512.cvtq2mask.") || // Added in 7.0
245       Name.startswith("avx512.mask.vpermilvar.") || // Added in 4.0
246       Name.startswith("avx512.mask.psll.d") || // Added in 4.0
247       Name.startswith("avx512.mask.psll.q") || // Added in 4.0
248       Name.startswith("avx512.mask.psll.w") || // Added in 4.0
249       Name.startswith("avx512.mask.psra.d") || // Added in 4.0
250       Name.startswith("avx512.mask.psra.q") || // Added in 4.0
251       Name.startswith("avx512.mask.psra.w") || // Added in 4.0
252       Name.startswith("avx512.mask.psrl.d") || // Added in 4.0
253       Name.startswith("avx512.mask.psrl.q") || // Added in 4.0
254       Name.startswith("avx512.mask.psrl.w") || // Added in 4.0
255       Name.startswith("avx512.mask.pslli") || // Added in 4.0
256       Name.startswith("avx512.mask.psrai") || // Added in 4.0
257       Name.startswith("avx512.mask.psrli") || // Added in 4.0
258       Name.startswith("avx512.mask.psllv") || // Added in 4.0
259       Name.startswith("avx512.mask.psrav") || // Added in 4.0
260       Name.startswith("avx512.mask.psrlv") || // Added in 4.0
261       Name.startswith("sse41.pmovsx") || // Added in 3.8
262       Name.startswith("sse41.pmovzx") || // Added in 3.9
263       Name.startswith("avx2.pmovsx") || // Added in 3.9
264       Name.startswith("avx2.pmovzx") || // Added in 3.9
265       Name.startswith("avx512.mask.pmovsx") || // Added in 4.0
266       Name.startswith("avx512.mask.pmovzx") || // Added in 4.0
267       Name.startswith("avx512.mask.lzcnt.") || // Added in 5.0
268       Name.startswith("avx512.mask.pternlog.") || // Added in 7.0
269       Name.startswith("avx512.maskz.pternlog.") || // Added in 7.0
270       Name.startswith("avx512.mask.vpmadd52") || // Added in 7.0
271       Name.startswith("avx512.maskz.vpmadd52") || // Added in 7.0
272       Name.startswith("avx512.mask.vpermi2var.") || // Added in 7.0
273       Name.startswith("avx512.mask.vpermt2var.") || // Added in 7.0
274       Name.startswith("avx512.maskz.vpermt2var.") || // Added in 7.0
275       Name.startswith("avx512.mask.vpdpbusd.") || // Added in 7.0
276       Name.startswith("avx512.maskz.vpdpbusd.") || // Added in 7.0
277       Name.startswith("avx512.mask.vpdpbusds.") || // Added in 7.0
278       Name.startswith("avx512.maskz.vpdpbusds.") || // Added in 7.0
279       Name.startswith("avx512.mask.vpdpwssd.") || // Added in 7.0
280       Name.startswith("avx512.maskz.vpdpwssd.") || // Added in 7.0
281       Name.startswith("avx512.mask.vpdpwssds.") || // Added in 7.0
282       Name.startswith("avx512.maskz.vpdpwssds.") || // Added in 7.0
283       Name.startswith("avx512.mask.dbpsadbw.") || // Added in 7.0
284       Name.startswith("avx512.mask.vpshld.") || // Added in 7.0
285       Name.startswith("avx512.mask.vpshrd.") || // Added in 7.0
286       Name.startswith("avx512.mask.vpshldv.") || // Added in 8.0
287       Name.startswith("avx512.mask.vpshrdv.") || // Added in 8.0
288       Name.startswith("avx512.maskz.vpshldv.") || // Added in 8.0
289       Name.startswith("avx512.maskz.vpshrdv.") || // Added in 8.0
290       Name.startswith("avx512.vpshld.") || // Added in 8.0
291       Name.startswith("avx512.vpshrd.") || // Added in 8.0
292       Name.startswith("avx512.mask.add.p") || // Added in 7.0. 128/256 in 4.0
293       Name.startswith("avx512.mask.sub.p") || // Added in 7.0. 128/256 in 4.0
294       Name.startswith("avx512.mask.mul.p") || // Added in 7.0. 128/256 in 4.0
295       Name.startswith("avx512.mask.div.p") || // Added in 7.0. 128/256 in 4.0
296       Name.startswith("avx512.mask.max.p") || // Added in 7.0. 128/256 in 5.0
297       Name.startswith("avx512.mask.min.p") || // Added in 7.0. 128/256 in 5.0
298       Name.startswith("avx512.mask.fpclass.p") || // Added in 7.0
299       Name.startswith("avx512.mask.vpshufbitqmb.") || // Added in 8.0
300       Name.startswith("avx512.mask.pmultishift.qb.") || // Added in 8.0
301       Name.startswith("avx512.mask.conflict.") || // Added in 9.0
302       Name == "avx512.mask.pmov.qd.256" || // Added in 9.0
303       Name == "avx512.mask.pmov.qd.512" || // Added in 9.0
304       Name == "avx512.mask.pmov.wb.256" || // Added in 9.0
305       Name == "avx512.mask.pmov.wb.512" || // Added in 9.0
306       Name == "sse.cvtsi2ss" || // Added in 7.0
307       Name == "sse.cvtsi642ss" || // Added in 7.0
308       Name == "sse2.cvtsi2sd" || // Added in 7.0
309       Name == "sse2.cvtsi642sd" || // Added in 7.0
310       Name == "sse2.cvtss2sd" || // Added in 7.0
311       Name == "sse2.cvtdq2pd" || // Added in 3.9
312       Name == "sse2.cvtdq2ps" || // Added in 7.0
313       Name == "sse2.cvtps2pd" || // Added in 3.9
314       Name == "avx.cvtdq2.pd.256" || // Added in 3.9
315       Name == "avx.cvtdq2.ps.256" || // Added in 7.0
316       Name == "avx.cvt.ps2.pd.256" || // Added in 3.9
317       Name.startswith("avx.vinsertf128.") || // Added in 3.7
318       Name == "avx2.vinserti128" || // Added in 3.7
319       Name.startswith("avx512.mask.insert") || // Added in 4.0
320       Name.startswith("avx.vextractf128.") || // Added in 3.7
321       Name == "avx2.vextracti128" || // Added in 3.7
322       Name.startswith("avx512.mask.vextract") || // Added in 4.0
323       Name.startswith("sse4a.movnt.") || // Added in 3.9
324       Name.startswith("avx.movnt.") || // Added in 3.2
325       Name.startswith("avx512.storent.") || // Added in 3.9
326       Name == "sse41.movntdqa" || // Added in 5.0
327       Name == "avx2.movntdqa" || // Added in 5.0
328       Name == "avx512.movntdqa" || // Added in 5.0
329       Name == "sse2.storel.dq" || // Added in 3.9
330       Name.startswith("sse.storeu.") || // Added in 3.9
331       Name.startswith("sse2.storeu.") || // Added in 3.9
332       Name.startswith("avx.storeu.") || // Added in 3.9
333       Name.startswith("avx512.mask.storeu.") || // Added in 3.9
334       Name.startswith("avx512.mask.store.p") || // Added in 3.9
335       Name.startswith("avx512.mask.store.b.") || // Added in 3.9
336       Name.startswith("avx512.mask.store.w.") || // Added in 3.9
337       Name.startswith("avx512.mask.store.d.") || // Added in 3.9
338       Name.startswith("avx512.mask.store.q.") || // Added in 3.9
339       Name == "avx512.mask.store.ss" || // Added in 7.0
340       Name.startswith("avx512.mask.loadu.") || // Added in 3.9
341       Name.startswith("avx512.mask.load.") || // Added in 3.9
342       Name.startswith("avx512.mask.expand.load.") || // Added in 7.0
343       Name.startswith("avx512.mask.compress.store.") || // Added in 7.0
344       Name.startswith("avx512.mask.expand.b") || // Added in 9.0
345       Name.startswith("avx512.mask.expand.w") || // Added in 9.0
346       Name.startswith("avx512.mask.expand.d") || // Added in 9.0
347       Name.startswith("avx512.mask.expand.q") || // Added in 9.0
348       Name.startswith("avx512.mask.expand.p") || // Added in 9.0
349       Name.startswith("avx512.mask.compress.b") || // Added in 9.0
350       Name.startswith("avx512.mask.compress.w") || // Added in 9.0
351       Name.startswith("avx512.mask.compress.d") || // Added in 9.0
352       Name.startswith("avx512.mask.compress.q") || // Added in 9.0
353       Name.startswith("avx512.mask.compress.p") || // Added in 9.0
354       Name == "sse42.crc32.64.8" || // Added in 3.4
355       Name.startswith("avx.vbroadcast.s") || // Added in 3.5
356       Name.startswith("avx512.vbroadcast.s") || // Added in 7.0
357       Name.startswith("avx512.mask.palignr.") || // Added in 3.9
358       Name.startswith("avx512.mask.valign.") || // Added in 4.0
359       Name.startswith("sse2.psll.dq") || // Added in 3.7
360       Name.startswith("sse2.psrl.dq") || // Added in 3.7
361       Name.startswith("avx2.psll.dq") || // Added in 3.7
362       Name.startswith("avx2.psrl.dq") || // Added in 3.7
363       Name.startswith("avx512.psll.dq") || // Added in 3.9
364       Name.startswith("avx512.psrl.dq") || // Added in 3.9
365       Name == "sse41.pblendw" || // Added in 3.7
366       Name.startswith("sse41.blendp") || // Added in 3.7
367       Name.startswith("avx.blend.p") || // Added in 3.7
368       Name == "avx2.pblendw" || // Added in 3.7
369       Name.startswith("avx2.pblendd.") || // Added in 3.7
370       Name.startswith("avx.vbroadcastf128") || // Added in 4.0
371       Name == "avx2.vbroadcasti128" || // Added in 3.7
372       Name.startswith("avx512.mask.broadcastf") || // Added in 6.0
373       Name.startswith("avx512.mask.broadcasti") || // Added in 6.0
374       Name == "xop.vpcmov" || // Added in 3.8
375       Name == "xop.vpcmov.256" || // Added in 5.0
376       Name.startswith("avx512.mask.move.s") || // Added in 4.0
377       Name.startswith("avx512.cvtmask2") || // Added in 5.0
378       Name.startswith("xop.vpcom") || // Added in 3.2, Updated in 9.0
379       Name.startswith("xop.vprot") || // Added in 8.0
380       Name.startswith("avx512.prol") || // Added in 8.0
381       Name.startswith("avx512.pror") || // Added in 8.0
382       Name.startswith("avx512.mask.prorv.") || // Added in 8.0
383       Name.startswith("avx512.mask.pror.") ||  // Added in 8.0
384       Name.startswith("avx512.mask.prolv.") || // Added in 8.0
385       Name.startswith("avx512.mask.prol.") ||  // Added in 8.0
386       Name.startswith("avx512.ptestm") || //Added in 6.0
387       Name.startswith("avx512.ptestnm") || //Added in 6.0
388       Name.startswith("avx512.mask.pavg")) // Added in 6.0
389     return true;
390 
391   return false;
392 }
393 
394 static bool UpgradeX86IntrinsicFunction(Function *F, StringRef Name,
395                                         Function *&NewFn) {
396   // Only handle intrinsics that start with "x86.".
397   if (!Name.startswith("x86."))
398     return false;
399   // Remove "x86." prefix.
400   Name = Name.substr(4);
401 
402   if (ShouldUpgradeX86Intrinsic(F, Name)) {
403     NewFn = nullptr;
404     return true;
405   }
406 
407   if (Name == "rdtscp") { // Added in 8.0
408     // If this intrinsic has 0 operands, it's the new version.
409     if (F->getFunctionType()->getNumParams() == 0)
410       return false;
411 
412     rename(F);
413     NewFn = Intrinsic::getDeclaration(F->getParent(),
414                                       Intrinsic::x86_rdtscp);
415     return true;
416   }
417 
418   // SSE4.1 ptest functions may have an old signature.
419   if (Name.startswith("sse41.ptest")) { // Added in 3.2
420     if (Name.substr(11) == "c")
421       return UpgradePTESTIntrinsic(F, Intrinsic::x86_sse41_ptestc, NewFn);
422     if (Name.substr(11) == "z")
423       return UpgradePTESTIntrinsic(F, Intrinsic::x86_sse41_ptestz, NewFn);
424     if (Name.substr(11) == "nzc")
425       return UpgradePTESTIntrinsic(F, Intrinsic::x86_sse41_ptestnzc, NewFn);
426   }
427   // Several blend and other instructions with masks used the wrong number of
428   // bits.
429   if (Name == "sse41.insertps") // Added in 3.6
430     return UpgradeX86IntrinsicsWith8BitMask(F, Intrinsic::x86_sse41_insertps,
431                                             NewFn);
432   if (Name == "sse41.dppd") // Added in 3.6
433     return UpgradeX86IntrinsicsWith8BitMask(F, Intrinsic::x86_sse41_dppd,
434                                             NewFn);
435   if (Name == "sse41.dpps") // Added in 3.6
436     return UpgradeX86IntrinsicsWith8BitMask(F, Intrinsic::x86_sse41_dpps,
437                                             NewFn);
438   if (Name == "sse41.mpsadbw") // Added in 3.6
439     return UpgradeX86IntrinsicsWith8BitMask(F, Intrinsic::x86_sse41_mpsadbw,
440                                             NewFn);
441   if (Name == "avx.dp.ps.256") // Added in 3.6
442     return UpgradeX86IntrinsicsWith8BitMask(F, Intrinsic::x86_avx_dp_ps_256,
443                                             NewFn);
444   if (Name == "avx2.mpsadbw") // Added in 3.6
445     return UpgradeX86IntrinsicsWith8BitMask(F, Intrinsic::x86_avx2_mpsadbw,
446                                             NewFn);
447 
448   // frcz.ss/sd may need to have an argument dropped. Added in 3.2
449   if (Name.startswith("xop.vfrcz.ss") && F->arg_size() == 2) {
450     rename(F);
451     NewFn = Intrinsic::getDeclaration(F->getParent(),
452                                       Intrinsic::x86_xop_vfrcz_ss);
453     return true;
454   }
455   if (Name.startswith("xop.vfrcz.sd") && F->arg_size() == 2) {
456     rename(F);
457     NewFn = Intrinsic::getDeclaration(F->getParent(),
458                                       Intrinsic::x86_xop_vfrcz_sd);
459     return true;
460   }
461   // Upgrade any XOP PERMIL2 index operand still using a float/double vector.
462   if (Name.startswith("xop.vpermil2")) { // Added in 3.9
463     auto Idx = F->getFunctionType()->getParamType(2);
464     if (Idx->isFPOrFPVectorTy()) {
465       rename(F);
466       unsigned IdxSize = Idx->getPrimitiveSizeInBits();
467       unsigned EltSize = Idx->getScalarSizeInBits();
468       Intrinsic::ID Permil2ID;
469       if (EltSize == 64 && IdxSize == 128)
470         Permil2ID = Intrinsic::x86_xop_vpermil2pd;
471       else if (EltSize == 32 && IdxSize == 128)
472         Permil2ID = Intrinsic::x86_xop_vpermil2ps;
473       else if (EltSize == 64 && IdxSize == 256)
474         Permil2ID = Intrinsic::x86_xop_vpermil2pd_256;
475       else
476         Permil2ID = Intrinsic::x86_xop_vpermil2ps_256;
477       NewFn = Intrinsic::getDeclaration(F->getParent(), Permil2ID);
478       return true;
479     }
480   }
481 
482   if (Name == "seh.recoverfp") {
483     NewFn = Intrinsic::getDeclaration(F->getParent(), Intrinsic::eh_recoverfp);
484     return true;
485   }
486 
487   return false;
488 }
489 
490 static bool UpgradeIntrinsicFunction1(Function *F, Function *&NewFn) {
491   assert(F && "Illegal to upgrade a non-existent Function.");
492 
493   // Upgrade intrinsics "clang.arc.use" which doesn't start with "llvm.".
494   if (F->getName() == "clang.arc.use") {
495     NewFn = nullptr;
496     return true;
497   }
498 
499   // Quickly eliminate it, if it's not a candidate.
500   StringRef Name = F->getName();
501   if (Name.size() <= 8 || !Name.startswith("llvm."))
502     return false;
503   Name = Name.substr(5); // Strip off "llvm."
504 
505   switch (Name[0]) {
506   default: break;
507   case 'a': {
508     if (Name.startswith("arm.rbit") || Name.startswith("aarch64.rbit")) {
509       NewFn = Intrinsic::getDeclaration(F->getParent(), Intrinsic::bitreverse,
510                                         F->arg_begin()->getType());
511       return true;
512     }
513     if (Name.startswith("arm.neon.vclz")) {
514       Type* args[2] = {
515         F->arg_begin()->getType(),
516         Type::getInt1Ty(F->getContext())
517       };
518       // Can't use Intrinsic::getDeclaration here as it adds a ".i1" to
519       // the end of the name. Change name from llvm.arm.neon.vclz.* to
520       //  llvm.ctlz.*
521       FunctionType* fType = FunctionType::get(F->getReturnType(), args, false);
522       NewFn = Function::Create(fType, F->getLinkage(), F->getAddressSpace(),
523                                "llvm.ctlz." + Name.substr(14), F->getParent());
524       return true;
525     }
526     if (Name.startswith("arm.neon.vcnt")) {
527       NewFn = Intrinsic::getDeclaration(F->getParent(), Intrinsic::ctpop,
528                                         F->arg_begin()->getType());
529       return true;
530     }
531     Regex vldRegex("^arm\\.neon\\.vld([1234]|[234]lane)\\.v[a-z0-9]*$");
532     if (vldRegex.match(Name)) {
533       auto fArgs = F->getFunctionType()->params();
534       SmallVector<Type *, 4> Tys(fArgs.begin(), fArgs.end());
535       // Can't use Intrinsic::getDeclaration here as the return types might
536       // then only be structurally equal.
537       FunctionType* fType = FunctionType::get(F->getReturnType(), Tys, false);
538       NewFn = Function::Create(fType, F->getLinkage(), F->getAddressSpace(),
539                                "llvm." + Name + ".p0i8", F->getParent());
540       return true;
541     }
542     Regex vstRegex("^arm\\.neon\\.vst([1234]|[234]lane)\\.v[a-z0-9]*$");
543     if (vstRegex.match(Name)) {
544       static const Intrinsic::ID StoreInts[] = {Intrinsic::arm_neon_vst1,
545                                                 Intrinsic::arm_neon_vst2,
546                                                 Intrinsic::arm_neon_vst3,
547                                                 Intrinsic::arm_neon_vst4};
548 
549       static const Intrinsic::ID StoreLaneInts[] = {
550         Intrinsic::arm_neon_vst2lane, Intrinsic::arm_neon_vst3lane,
551         Intrinsic::arm_neon_vst4lane
552       };
553 
554       auto fArgs = F->getFunctionType()->params();
555       Type *Tys[] = {fArgs[0], fArgs[1]};
556       if (Name.find("lane") == StringRef::npos)
557         NewFn = Intrinsic::getDeclaration(F->getParent(),
558                                           StoreInts[fArgs.size() - 3], Tys);
559       else
560         NewFn = Intrinsic::getDeclaration(F->getParent(),
561                                           StoreLaneInts[fArgs.size() - 5], Tys);
562       return true;
563     }
564     if (Name == "aarch64.thread.pointer" || Name == "arm.thread.pointer") {
565       NewFn = Intrinsic::getDeclaration(F->getParent(), Intrinsic::thread_pointer);
566       return true;
567     }
568     if (Name.startswith("aarch64.neon.addp")) {
569       if (F->arg_size() != 2)
570         break; // Invalid IR.
571       auto fArgs = F->getFunctionType()->params();
572       VectorType *ArgTy = dyn_cast<VectorType>(fArgs[0]);
573       if (ArgTy && ArgTy->getElementType()->isFloatingPointTy()) {
574         NewFn = Intrinsic::getDeclaration(F->getParent(),
575                                           Intrinsic::aarch64_neon_faddp, fArgs);
576         return true;
577       }
578     }
579     break;
580   }
581 
582   case 'c': {
583     if (Name.startswith("ctlz.") && F->arg_size() == 1) {
584       rename(F);
585       NewFn = Intrinsic::getDeclaration(F->getParent(), Intrinsic::ctlz,
586                                         F->arg_begin()->getType());
587       return true;
588     }
589     if (Name.startswith("cttz.") && F->arg_size() == 1) {
590       rename(F);
591       NewFn = Intrinsic::getDeclaration(F->getParent(), Intrinsic::cttz,
592                                         F->arg_begin()->getType());
593       return true;
594     }
595     break;
596   }
597   case 'd': {
598     if (Name == "dbg.value" && F->arg_size() == 4) {
599       rename(F);
600       NewFn = Intrinsic::getDeclaration(F->getParent(), Intrinsic::dbg_value);
601       return true;
602     }
603     break;
604   }
605   case 'i':
606   case 'l': {
607     bool IsLifetimeStart = Name.startswith("lifetime.start");
608     if (IsLifetimeStart || Name.startswith("invariant.start")) {
609       Intrinsic::ID ID = IsLifetimeStart ?
610         Intrinsic::lifetime_start : Intrinsic::invariant_start;
611       auto Args = F->getFunctionType()->params();
612       Type* ObjectPtr[1] = {Args[1]};
613       if (F->getName() != Intrinsic::getName(ID, ObjectPtr)) {
614         rename(F);
615         NewFn = Intrinsic::getDeclaration(F->getParent(), ID, ObjectPtr);
616         return true;
617       }
618     }
619 
620     bool IsLifetimeEnd = Name.startswith("lifetime.end");
621     if (IsLifetimeEnd || Name.startswith("invariant.end")) {
622       Intrinsic::ID ID = IsLifetimeEnd ?
623         Intrinsic::lifetime_end : Intrinsic::invariant_end;
624 
625       auto Args = F->getFunctionType()->params();
626       Type* ObjectPtr[1] = {Args[IsLifetimeEnd ? 1 : 2]};
627       if (F->getName() != Intrinsic::getName(ID, ObjectPtr)) {
628         rename(F);
629         NewFn = Intrinsic::getDeclaration(F->getParent(), ID, ObjectPtr);
630         return true;
631       }
632     }
633     if (Name.startswith("invariant.group.barrier")) {
634       // Rename invariant.group.barrier to launder.invariant.group
635       auto Args = F->getFunctionType()->params();
636       Type* ObjectPtr[1] = {Args[0]};
637       rename(F);
638       NewFn = Intrinsic::getDeclaration(F->getParent(),
639           Intrinsic::launder_invariant_group, ObjectPtr);
640       return true;
641 
642     }
643 
644     break;
645   }
646   case 'm': {
647     if (Name.startswith("masked.load.")) {
648       Type *Tys[] = { F->getReturnType(), F->arg_begin()->getType() };
649       if (F->getName() != Intrinsic::getName(Intrinsic::masked_load, Tys)) {
650         rename(F);
651         NewFn = Intrinsic::getDeclaration(F->getParent(),
652                                           Intrinsic::masked_load,
653                                           Tys);
654         return true;
655       }
656     }
657     if (Name.startswith("masked.store.")) {
658       auto Args = F->getFunctionType()->params();
659       Type *Tys[] = { Args[0], Args[1] };
660       if (F->getName() != Intrinsic::getName(Intrinsic::masked_store, Tys)) {
661         rename(F);
662         NewFn = Intrinsic::getDeclaration(F->getParent(),
663                                           Intrinsic::masked_store,
664                                           Tys);
665         return true;
666       }
667     }
668     // Renaming gather/scatter intrinsics with no address space overloading
669     // to the new overload which includes an address space
670     if (Name.startswith("masked.gather.")) {
671       Type *Tys[] = {F->getReturnType(), F->arg_begin()->getType()};
672       if (F->getName() != Intrinsic::getName(Intrinsic::masked_gather, Tys)) {
673         rename(F);
674         NewFn = Intrinsic::getDeclaration(F->getParent(),
675                                           Intrinsic::masked_gather, Tys);
676         return true;
677       }
678     }
679     if (Name.startswith("masked.scatter.")) {
680       auto Args = F->getFunctionType()->params();
681       Type *Tys[] = {Args[0], Args[1]};
682       if (F->getName() != Intrinsic::getName(Intrinsic::masked_scatter, Tys)) {
683         rename(F);
684         NewFn = Intrinsic::getDeclaration(F->getParent(),
685                                           Intrinsic::masked_scatter, Tys);
686         return true;
687       }
688     }
689     // Updating the memory intrinsics (memcpy/memmove/memset) that have an
690     // alignment parameter to embedding the alignment as an attribute of
691     // the pointer args.
692     if (Name.startswith("memcpy.") && F->arg_size() == 5) {
693       rename(F);
694       // Get the types of dest, src, and len
695       ArrayRef<Type *> ParamTypes = F->getFunctionType()->params().slice(0, 3);
696       NewFn = Intrinsic::getDeclaration(F->getParent(), Intrinsic::memcpy,
697                                         ParamTypes);
698       return true;
699     }
700     if (Name.startswith("memmove.") && F->arg_size() == 5) {
701       rename(F);
702       // Get the types of dest, src, and len
703       ArrayRef<Type *> ParamTypes = F->getFunctionType()->params().slice(0, 3);
704       NewFn = Intrinsic::getDeclaration(F->getParent(), Intrinsic::memmove,
705                                         ParamTypes);
706       return true;
707     }
708     if (Name.startswith("memset.") && F->arg_size() == 5) {
709       rename(F);
710       // Get the types of dest, and len
711       const auto *FT = F->getFunctionType();
712       Type *ParamTypes[2] = {
713           FT->getParamType(0), // Dest
714           FT->getParamType(2)  // len
715       };
716       NewFn = Intrinsic::getDeclaration(F->getParent(), Intrinsic::memset,
717                                         ParamTypes);
718       return true;
719     }
720     break;
721   }
722   case 'n': {
723     if (Name.startswith("nvvm.")) {
724       Name = Name.substr(5);
725 
726       // The following nvvm intrinsics correspond exactly to an LLVM intrinsic.
727       Intrinsic::ID IID = StringSwitch<Intrinsic::ID>(Name)
728                               .Cases("brev32", "brev64", Intrinsic::bitreverse)
729                               .Case("clz.i", Intrinsic::ctlz)
730                               .Case("popc.i", Intrinsic::ctpop)
731                               .Default(Intrinsic::not_intrinsic);
732       if (IID != Intrinsic::not_intrinsic && F->arg_size() == 1) {
733         NewFn = Intrinsic::getDeclaration(F->getParent(), IID,
734                                           {F->getReturnType()});
735         return true;
736       }
737 
738       // The following nvvm intrinsics correspond exactly to an LLVM idiom, but
739       // not to an intrinsic alone.  We expand them in UpgradeIntrinsicCall.
740       //
741       // TODO: We could add lohi.i2d.
742       bool Expand = StringSwitch<bool>(Name)
743                         .Cases("abs.i", "abs.ll", true)
744                         .Cases("clz.ll", "popc.ll", "h2f", true)
745                         .Cases("max.i", "max.ll", "max.ui", "max.ull", true)
746                         .Cases("min.i", "min.ll", "min.ui", "min.ull", true)
747                         .Default(false);
748       if (Expand) {
749         NewFn = nullptr;
750         return true;
751       }
752     }
753     break;
754   }
755   case 'o':
756     // We only need to change the name to match the mangling including the
757     // address space.
758     if (Name.startswith("objectsize.")) {
759       Type *Tys[2] = { F->getReturnType(), F->arg_begin()->getType() };
760       if (F->arg_size() == 2 || F->arg_size() == 3 ||
761           F->getName() != Intrinsic::getName(Intrinsic::objectsize, Tys)) {
762         rename(F);
763         NewFn = Intrinsic::getDeclaration(F->getParent(), Intrinsic::objectsize,
764                                           Tys);
765         return true;
766       }
767     }
768     break;
769 
770   case 's':
771     if (Name == "stackprotectorcheck") {
772       NewFn = nullptr;
773       return true;
774     }
775     break;
776 
777   case 'x':
778     if (UpgradeX86IntrinsicFunction(F, Name, NewFn))
779       return true;
780   }
781   // Remangle our intrinsic since we upgrade the mangling
782   auto Result = llvm::Intrinsic::remangleIntrinsicFunction(F);
783   if (Result != None) {
784     NewFn = Result.getValue();
785     return true;
786   }
787 
788   //  This may not belong here. This function is effectively being overloaded
789   //  to both detect an intrinsic which needs upgrading, and to provide the
790   //  upgraded form of the intrinsic. We should perhaps have two separate
791   //  functions for this.
792   return false;
793 }
794 
795 bool llvm::UpgradeIntrinsicFunction(Function *F, Function *&NewFn) {
796   NewFn = nullptr;
797   bool Upgraded = UpgradeIntrinsicFunction1(F, NewFn);
798   assert(F != NewFn && "Intrinsic function upgraded to the same function");
799 
800   // Upgrade intrinsic attributes.  This does not change the function.
801   if (NewFn)
802     F = NewFn;
803   if (Intrinsic::ID id = F->getIntrinsicID())
804     F->setAttributes(Intrinsic::getAttributes(F->getContext(), id));
805   return Upgraded;
806 }
807 
808 GlobalVariable *llvm::UpgradeGlobalVariable(GlobalVariable *GV) {
809   if (!(GV->hasName() && (GV->getName() == "llvm.global_ctors" ||
810                           GV->getName() == "llvm.global_dtors")) ||
811       !GV->hasInitializer())
812     return nullptr;
813   ArrayType *ATy = dyn_cast<ArrayType>(GV->getValueType());
814   if (!ATy)
815     return nullptr;
816   StructType *STy = dyn_cast<StructType>(ATy->getElementType());
817   if (!STy || STy->getNumElements() != 2)
818     return nullptr;
819 
820   LLVMContext &C = GV->getContext();
821   IRBuilder<> IRB(C);
822   auto EltTy = StructType::get(STy->getElementType(0), STy->getElementType(1),
823                                IRB.getInt8PtrTy());
824   Constant *Init = GV->getInitializer();
825   unsigned N = Init->getNumOperands();
826   std::vector<Constant *> NewCtors(N);
827   for (unsigned i = 0; i != N; ++i) {
828     auto Ctor = cast<Constant>(Init->getOperand(i));
829     NewCtors[i] = ConstantStruct::get(
830         EltTy, Ctor->getAggregateElement(0u), Ctor->getAggregateElement(1),
831         Constant::getNullValue(IRB.getInt8PtrTy()));
832   }
833   Constant *NewInit = ConstantArray::get(ArrayType::get(EltTy, N), NewCtors);
834 
835   return new GlobalVariable(NewInit->getType(), false, GV->getLinkage(),
836                             NewInit, GV->getName());
837 }
838 
839 // Handles upgrading SSE2/AVX2/AVX512BW PSLLDQ intrinsics by converting them
840 // to byte shuffles.
841 static Value *UpgradeX86PSLLDQIntrinsics(IRBuilder<> &Builder,
842                                          Value *Op, unsigned Shift) {
843   Type *ResultTy = Op->getType();
844   unsigned NumElts = ResultTy->getVectorNumElements() * 8;
845 
846   // Bitcast from a 64-bit element type to a byte element type.
847   Type *VecTy = VectorType::get(Builder.getInt8Ty(), NumElts);
848   Op = Builder.CreateBitCast(Op, VecTy, "cast");
849 
850   // We'll be shuffling in zeroes.
851   Value *Res = Constant::getNullValue(VecTy);
852 
853   // If shift is less than 16, emit a shuffle to move the bytes. Otherwise,
854   // we'll just return the zero vector.
855   if (Shift < 16) {
856     uint32_t Idxs[64];
857     // 256/512-bit version is split into 2/4 16-byte lanes.
858     for (unsigned l = 0; l != NumElts; l += 16)
859       for (unsigned i = 0; i != 16; ++i) {
860         unsigned Idx = NumElts + i - Shift;
861         if (Idx < NumElts)
862           Idx -= NumElts - 16; // end of lane, switch operand.
863         Idxs[l + i] = Idx + l;
864       }
865 
866     Res = Builder.CreateShuffleVector(Res, Op, makeArrayRef(Idxs, NumElts));
867   }
868 
869   // Bitcast back to a 64-bit element type.
870   return Builder.CreateBitCast(Res, ResultTy, "cast");
871 }
872 
873 // Handles upgrading SSE2/AVX2/AVX512BW PSRLDQ intrinsics by converting them
874 // to byte shuffles.
875 static Value *UpgradeX86PSRLDQIntrinsics(IRBuilder<> &Builder, Value *Op,
876                                          unsigned Shift) {
877   Type *ResultTy = Op->getType();
878   unsigned NumElts = ResultTy->getVectorNumElements() * 8;
879 
880   // Bitcast from a 64-bit element type to a byte element type.
881   Type *VecTy = VectorType::get(Builder.getInt8Ty(), NumElts);
882   Op = Builder.CreateBitCast(Op, VecTy, "cast");
883 
884   // We'll be shuffling in zeroes.
885   Value *Res = Constant::getNullValue(VecTy);
886 
887   // If shift is less than 16, emit a shuffle to move the bytes. Otherwise,
888   // we'll just return the zero vector.
889   if (Shift < 16) {
890     uint32_t Idxs[64];
891     // 256/512-bit version is split into 2/4 16-byte lanes.
892     for (unsigned l = 0; l != NumElts; l += 16)
893       for (unsigned i = 0; i != 16; ++i) {
894         unsigned Idx = i + Shift;
895         if (Idx >= 16)
896           Idx += NumElts - 16; // end of lane, switch operand.
897         Idxs[l + i] = Idx + l;
898       }
899 
900     Res = Builder.CreateShuffleVector(Op, Res, makeArrayRef(Idxs, NumElts));
901   }
902 
903   // Bitcast back to a 64-bit element type.
904   return Builder.CreateBitCast(Res, ResultTy, "cast");
905 }
906 
907 static Value *getX86MaskVec(IRBuilder<> &Builder, Value *Mask,
908                             unsigned NumElts) {
909   llvm::VectorType *MaskTy = llvm::VectorType::get(Builder.getInt1Ty(),
910                              cast<IntegerType>(Mask->getType())->getBitWidth());
911   Mask = Builder.CreateBitCast(Mask, MaskTy);
912 
913   // If we have less than 8 elements, then the starting mask was an i8 and
914   // we need to extract down to the right number of elements.
915   if (NumElts < 8) {
916     uint32_t Indices[4];
917     for (unsigned i = 0; i != NumElts; ++i)
918       Indices[i] = i;
919     Mask = Builder.CreateShuffleVector(Mask, Mask,
920                                        makeArrayRef(Indices, NumElts),
921                                        "extract");
922   }
923 
924   return Mask;
925 }
926 
927 static Value *EmitX86Select(IRBuilder<> &Builder, Value *Mask,
928                             Value *Op0, Value *Op1) {
929   // If the mask is all ones just emit the first operation.
930   if (const auto *C = dyn_cast<Constant>(Mask))
931     if (C->isAllOnesValue())
932       return Op0;
933 
934   Mask = getX86MaskVec(Builder, Mask, Op0->getType()->getVectorNumElements());
935   return Builder.CreateSelect(Mask, Op0, Op1);
936 }
937 
938 static Value *EmitX86ScalarSelect(IRBuilder<> &Builder, Value *Mask,
939                                   Value *Op0, Value *Op1) {
940   // If the mask is all ones just emit the first operation.
941   if (const auto *C = dyn_cast<Constant>(Mask))
942     if (C->isAllOnesValue())
943       return Op0;
944 
945   llvm::VectorType *MaskTy =
946     llvm::VectorType::get(Builder.getInt1Ty(),
947                           Mask->getType()->getIntegerBitWidth());
948   Mask = Builder.CreateBitCast(Mask, MaskTy);
949   Mask = Builder.CreateExtractElement(Mask, (uint64_t)0);
950   return Builder.CreateSelect(Mask, Op0, Op1);
951 }
952 
953 // Handle autoupgrade for masked PALIGNR and VALIGND/Q intrinsics.
954 // PALIGNR handles large immediates by shifting while VALIGN masks the immediate
955 // so we need to handle both cases. VALIGN also doesn't have 128-bit lanes.
956 static Value *UpgradeX86ALIGNIntrinsics(IRBuilder<> &Builder, Value *Op0,
957                                         Value *Op1, Value *Shift,
958                                         Value *Passthru, Value *Mask,
959                                         bool IsVALIGN) {
960   unsigned ShiftVal = cast<llvm::ConstantInt>(Shift)->getZExtValue();
961 
962   unsigned NumElts = Op0->getType()->getVectorNumElements();
963   assert((IsVALIGN || NumElts % 16 == 0) && "Illegal NumElts for PALIGNR!");
964   assert((!IsVALIGN || NumElts <= 16) && "NumElts too large for VALIGN!");
965   assert(isPowerOf2_32(NumElts) && "NumElts not a power of 2!");
966 
967   // Mask the immediate for VALIGN.
968   if (IsVALIGN)
969     ShiftVal &= (NumElts - 1);
970 
971   // If palignr is shifting the pair of vectors more than the size of two
972   // lanes, emit zero.
973   if (ShiftVal >= 32)
974     return llvm::Constant::getNullValue(Op0->getType());
975 
976   // If palignr is shifting the pair of input vectors more than one lane,
977   // but less than two lanes, convert to shifting in zeroes.
978   if (ShiftVal > 16) {
979     ShiftVal -= 16;
980     Op1 = Op0;
981     Op0 = llvm::Constant::getNullValue(Op0->getType());
982   }
983 
984   uint32_t Indices[64];
985   // 256-bit palignr operates on 128-bit lanes so we need to handle that
986   for (unsigned l = 0; l < NumElts; l += 16) {
987     for (unsigned i = 0; i != 16; ++i) {
988       unsigned Idx = ShiftVal + i;
989       if (!IsVALIGN && Idx >= 16) // Disable wrap for VALIGN.
990         Idx += NumElts - 16; // End of lane, switch operand.
991       Indices[l + i] = Idx + l;
992     }
993   }
994 
995   Value *Align = Builder.CreateShuffleVector(Op1, Op0,
996                                              makeArrayRef(Indices, NumElts),
997                                              "palignr");
998 
999   return EmitX86Select(Builder, Mask, Align, Passthru);
1000 }
1001 
1002 static Value *UpgradeX86VPERMT2Intrinsics(IRBuilder<> &Builder, CallInst &CI,
1003                                           bool ZeroMask, bool IndexForm) {
1004   Type *Ty = CI.getType();
1005   unsigned VecWidth = Ty->getPrimitiveSizeInBits();
1006   unsigned EltWidth = Ty->getScalarSizeInBits();
1007   bool IsFloat = Ty->isFPOrFPVectorTy();
1008   Intrinsic::ID IID;
1009   if (VecWidth == 128 && EltWidth == 32 && IsFloat)
1010     IID = Intrinsic::x86_avx512_vpermi2var_ps_128;
1011   else if (VecWidth == 128 && EltWidth == 32 && !IsFloat)
1012     IID = Intrinsic::x86_avx512_vpermi2var_d_128;
1013   else if (VecWidth == 128 && EltWidth == 64 && IsFloat)
1014     IID = Intrinsic::x86_avx512_vpermi2var_pd_128;
1015   else if (VecWidth == 128 && EltWidth == 64 && !IsFloat)
1016     IID = Intrinsic::x86_avx512_vpermi2var_q_128;
1017   else if (VecWidth == 256 && EltWidth == 32 && IsFloat)
1018     IID = Intrinsic::x86_avx512_vpermi2var_ps_256;
1019   else if (VecWidth == 256 && EltWidth == 32 && !IsFloat)
1020     IID = Intrinsic::x86_avx512_vpermi2var_d_256;
1021   else if (VecWidth == 256 && EltWidth == 64 && IsFloat)
1022     IID = Intrinsic::x86_avx512_vpermi2var_pd_256;
1023   else if (VecWidth == 256 && EltWidth == 64 && !IsFloat)
1024     IID = Intrinsic::x86_avx512_vpermi2var_q_256;
1025   else if (VecWidth == 512 && EltWidth == 32 && IsFloat)
1026     IID = Intrinsic::x86_avx512_vpermi2var_ps_512;
1027   else if (VecWidth == 512 && EltWidth == 32 && !IsFloat)
1028     IID = Intrinsic::x86_avx512_vpermi2var_d_512;
1029   else if (VecWidth == 512 && EltWidth == 64 && IsFloat)
1030     IID = Intrinsic::x86_avx512_vpermi2var_pd_512;
1031   else if (VecWidth == 512 && EltWidth == 64 && !IsFloat)
1032     IID = Intrinsic::x86_avx512_vpermi2var_q_512;
1033   else if (VecWidth == 128 && EltWidth == 16)
1034     IID = Intrinsic::x86_avx512_vpermi2var_hi_128;
1035   else if (VecWidth == 256 && EltWidth == 16)
1036     IID = Intrinsic::x86_avx512_vpermi2var_hi_256;
1037   else if (VecWidth == 512 && EltWidth == 16)
1038     IID = Intrinsic::x86_avx512_vpermi2var_hi_512;
1039   else if (VecWidth == 128 && EltWidth == 8)
1040     IID = Intrinsic::x86_avx512_vpermi2var_qi_128;
1041   else if (VecWidth == 256 && EltWidth == 8)
1042     IID = Intrinsic::x86_avx512_vpermi2var_qi_256;
1043   else if (VecWidth == 512 && EltWidth == 8)
1044     IID = Intrinsic::x86_avx512_vpermi2var_qi_512;
1045   else
1046     llvm_unreachable("Unexpected intrinsic");
1047 
1048   Value *Args[] = { CI.getArgOperand(0) , CI.getArgOperand(1),
1049                     CI.getArgOperand(2) };
1050 
1051   // If this isn't index form we need to swap operand 0 and 1.
1052   if (!IndexForm)
1053     std::swap(Args[0], Args[1]);
1054 
1055   Value *V = Builder.CreateCall(Intrinsic::getDeclaration(CI.getModule(), IID),
1056                                 Args);
1057   Value *PassThru = ZeroMask ? ConstantAggregateZero::get(Ty)
1058                              : Builder.CreateBitCast(CI.getArgOperand(1),
1059                                                      Ty);
1060   return EmitX86Select(Builder, CI.getArgOperand(3), V, PassThru);
1061 }
1062 
1063 static Value *UpgradeX86AddSubSatIntrinsics(IRBuilder<> &Builder, CallInst &CI,
1064                                             bool IsSigned, bool IsAddition) {
1065   Type *Ty = CI.getType();
1066   Value *Op0 = CI.getOperand(0);
1067   Value *Op1 = CI.getOperand(1);
1068 
1069   Intrinsic::ID IID =
1070       IsSigned ? (IsAddition ? Intrinsic::sadd_sat : Intrinsic::ssub_sat)
1071                : (IsAddition ? Intrinsic::uadd_sat : Intrinsic::usub_sat);
1072   Function *Intrin = Intrinsic::getDeclaration(CI.getModule(), IID, Ty);
1073   Value *Res = Builder.CreateCall(Intrin, {Op0, Op1});
1074 
1075   if (CI.getNumArgOperands() == 4) { // For masked intrinsics.
1076     Value *VecSrc = CI.getOperand(2);
1077     Value *Mask = CI.getOperand(3);
1078     Res = EmitX86Select(Builder, Mask, Res, VecSrc);
1079   }
1080   return Res;
1081 }
1082 
1083 static Value *upgradeX86Rotate(IRBuilder<> &Builder, CallInst &CI,
1084                                bool IsRotateRight) {
1085   Type *Ty = CI.getType();
1086   Value *Src = CI.getArgOperand(0);
1087   Value *Amt = CI.getArgOperand(1);
1088 
1089   // Amount may be scalar immediate, in which case create a splat vector.
1090   // Funnel shifts amounts are treated as modulo and types are all power-of-2 so
1091   // we only care about the lowest log2 bits anyway.
1092   if (Amt->getType() != Ty) {
1093     unsigned NumElts = Ty->getVectorNumElements();
1094     Amt = Builder.CreateIntCast(Amt, Ty->getScalarType(), false);
1095     Amt = Builder.CreateVectorSplat(NumElts, Amt);
1096   }
1097 
1098   Intrinsic::ID IID = IsRotateRight ? Intrinsic::fshr : Intrinsic::fshl;
1099   Function *Intrin = Intrinsic::getDeclaration(CI.getModule(), IID, Ty);
1100   Value *Res = Builder.CreateCall(Intrin, {Src, Src, Amt});
1101 
1102   if (CI.getNumArgOperands() == 4) { // For masked intrinsics.
1103     Value *VecSrc = CI.getOperand(2);
1104     Value *Mask = CI.getOperand(3);
1105     Res = EmitX86Select(Builder, Mask, Res, VecSrc);
1106   }
1107   return Res;
1108 }
1109 
1110 static Value *upgradeX86vpcom(IRBuilder<> &Builder, CallInst &CI, unsigned Imm,
1111                               bool IsSigned) {
1112   Type *Ty = CI.getType();
1113   Value *LHS = CI.getArgOperand(0);
1114   Value *RHS = CI.getArgOperand(1);
1115 
1116   CmpInst::Predicate Pred;
1117   switch (Imm) {
1118   case 0x0:
1119     Pred = IsSigned ? ICmpInst::ICMP_SLT : ICmpInst::ICMP_ULT;
1120     break;
1121   case 0x1:
1122     Pred = IsSigned ? ICmpInst::ICMP_SLE : ICmpInst::ICMP_ULE;
1123     break;
1124   case 0x2:
1125     Pred = IsSigned ? ICmpInst::ICMP_SGT : ICmpInst::ICMP_UGT;
1126     break;
1127   case 0x3:
1128     Pred = IsSigned ? ICmpInst::ICMP_SGE : ICmpInst::ICMP_UGE;
1129     break;
1130   case 0x4:
1131     Pred = ICmpInst::ICMP_EQ;
1132     break;
1133   case 0x5:
1134     Pred = ICmpInst::ICMP_NE;
1135     break;
1136   case 0x6:
1137     return Constant::getNullValue(Ty); // FALSE
1138   case 0x7:
1139     return Constant::getAllOnesValue(Ty); // TRUE
1140   default:
1141     llvm_unreachable("Unknown XOP vpcom/vpcomu predicate");
1142   }
1143 
1144   Value *Cmp = Builder.CreateICmp(Pred, LHS, RHS);
1145   Value *Ext = Builder.CreateSExt(Cmp, Ty);
1146   return Ext;
1147 }
1148 
1149 static Value *upgradeX86ConcatShift(IRBuilder<> &Builder, CallInst &CI,
1150                                     bool IsShiftRight, bool ZeroMask) {
1151   Type *Ty = CI.getType();
1152   Value *Op0 = CI.getArgOperand(0);
1153   Value *Op1 = CI.getArgOperand(1);
1154   Value *Amt = CI.getArgOperand(2);
1155 
1156   if (IsShiftRight)
1157     std::swap(Op0, Op1);
1158 
1159   // Amount may be scalar immediate, in which case create a splat vector.
1160   // Funnel shifts amounts are treated as modulo and types are all power-of-2 so
1161   // we only care about the lowest log2 bits anyway.
1162   if (Amt->getType() != Ty) {
1163     unsigned NumElts = Ty->getVectorNumElements();
1164     Amt = Builder.CreateIntCast(Amt, Ty->getScalarType(), false);
1165     Amt = Builder.CreateVectorSplat(NumElts, Amt);
1166   }
1167 
1168   Intrinsic::ID IID = IsShiftRight ? Intrinsic::fshr : Intrinsic::fshl;
1169   Function *Intrin = Intrinsic::getDeclaration(CI.getModule(), IID, Ty);
1170   Value *Res = Builder.CreateCall(Intrin, {Op0, Op1, Amt});
1171 
1172   unsigned NumArgs = CI.getNumArgOperands();
1173   if (NumArgs >= 4) { // For masked intrinsics.
1174     Value *VecSrc = NumArgs == 5 ? CI.getArgOperand(3) :
1175                     ZeroMask     ? ConstantAggregateZero::get(CI.getType()) :
1176                                    CI.getArgOperand(0);
1177     Value *Mask = CI.getOperand(NumArgs - 1);
1178     Res = EmitX86Select(Builder, Mask, Res, VecSrc);
1179   }
1180   return Res;
1181 }
1182 
1183 static Value *UpgradeMaskedStore(IRBuilder<> &Builder,
1184                                  Value *Ptr, Value *Data, Value *Mask,
1185                                  bool Aligned) {
1186   // Cast the pointer to the right type.
1187   Ptr = Builder.CreateBitCast(Ptr,
1188                               llvm::PointerType::getUnqual(Data->getType()));
1189   unsigned Align =
1190     Aligned ? cast<VectorType>(Data->getType())->getBitWidth() / 8 : 1;
1191 
1192   // If the mask is all ones just emit a regular store.
1193   if (const auto *C = dyn_cast<Constant>(Mask))
1194     if (C->isAllOnesValue())
1195       return Builder.CreateAlignedStore(Data, Ptr, Align);
1196 
1197   // Convert the mask from an integer type to a vector of i1.
1198   unsigned NumElts = Data->getType()->getVectorNumElements();
1199   Mask = getX86MaskVec(Builder, Mask, NumElts);
1200   return Builder.CreateMaskedStore(Data, Ptr, Align, Mask);
1201 }
1202 
1203 static Value *UpgradeMaskedLoad(IRBuilder<> &Builder,
1204                                 Value *Ptr, Value *Passthru, Value *Mask,
1205                                 bool Aligned) {
1206   Type *ValTy = Passthru->getType();
1207   // Cast the pointer to the right type.
1208   Ptr = Builder.CreateBitCast(Ptr, llvm::PointerType::getUnqual(ValTy));
1209   unsigned Align =
1210     Aligned ? cast<VectorType>(Passthru->getType())->getBitWidth() / 8 : 1;
1211 
1212   // If the mask is all ones just emit a regular store.
1213   if (const auto *C = dyn_cast<Constant>(Mask))
1214     if (C->isAllOnesValue())
1215       return Builder.CreateAlignedLoad(ValTy, Ptr, Align);
1216 
1217   // Convert the mask from an integer type to a vector of i1.
1218   unsigned NumElts = Passthru->getType()->getVectorNumElements();
1219   Mask = getX86MaskVec(Builder, Mask, NumElts);
1220   return Builder.CreateMaskedLoad(Ptr, Align, Mask, Passthru);
1221 }
1222 
1223 static Value *upgradeAbs(IRBuilder<> &Builder, CallInst &CI) {
1224   Value *Op0 = CI.getArgOperand(0);
1225   llvm::Type *Ty = Op0->getType();
1226   Value *Zero = llvm::Constant::getNullValue(Ty);
1227   Value *Cmp = Builder.CreateICmp(ICmpInst::ICMP_SGT, Op0, Zero);
1228   Value *Neg = Builder.CreateNeg(Op0);
1229   Value *Res = Builder.CreateSelect(Cmp, Op0, Neg);
1230 
1231   if (CI.getNumArgOperands() == 3)
1232     Res = EmitX86Select(Builder,CI.getArgOperand(2), Res, CI.getArgOperand(1));
1233 
1234   return Res;
1235 }
1236 
1237 static Value *upgradeIntMinMax(IRBuilder<> &Builder, CallInst &CI,
1238                                ICmpInst::Predicate Pred) {
1239   Value *Op0 = CI.getArgOperand(0);
1240   Value *Op1 = CI.getArgOperand(1);
1241   Value *Cmp = Builder.CreateICmp(Pred, Op0, Op1);
1242   Value *Res = Builder.CreateSelect(Cmp, Op0, Op1);
1243 
1244   if (CI.getNumArgOperands() == 4)
1245     Res = EmitX86Select(Builder, CI.getArgOperand(3), Res, CI.getArgOperand(2));
1246 
1247   return Res;
1248 }
1249 
1250 static Value *upgradePMULDQ(IRBuilder<> &Builder, CallInst &CI, bool IsSigned) {
1251   Type *Ty = CI.getType();
1252 
1253   // Arguments have a vXi32 type so cast to vXi64.
1254   Value *LHS = Builder.CreateBitCast(CI.getArgOperand(0), Ty);
1255   Value *RHS = Builder.CreateBitCast(CI.getArgOperand(1), Ty);
1256 
1257   if (IsSigned) {
1258     // Shift left then arithmetic shift right.
1259     Constant *ShiftAmt = ConstantInt::get(Ty, 32);
1260     LHS = Builder.CreateShl(LHS, ShiftAmt);
1261     LHS = Builder.CreateAShr(LHS, ShiftAmt);
1262     RHS = Builder.CreateShl(RHS, ShiftAmt);
1263     RHS = Builder.CreateAShr(RHS, ShiftAmt);
1264   } else {
1265     // Clear the upper bits.
1266     Constant *Mask = ConstantInt::get(Ty, 0xffffffff);
1267     LHS = Builder.CreateAnd(LHS, Mask);
1268     RHS = Builder.CreateAnd(RHS, Mask);
1269   }
1270 
1271   Value *Res = Builder.CreateMul(LHS, RHS);
1272 
1273   if (CI.getNumArgOperands() == 4)
1274     Res = EmitX86Select(Builder, CI.getArgOperand(3), Res, CI.getArgOperand(2));
1275 
1276   return Res;
1277 }
1278 
1279 // Applying mask on vector of i1's and make sure result is at least 8 bits wide.
1280 static Value *ApplyX86MaskOn1BitsVec(IRBuilder<> &Builder, Value *Vec,
1281                                      Value *Mask) {
1282   unsigned NumElts = Vec->getType()->getVectorNumElements();
1283   if (Mask) {
1284     const auto *C = dyn_cast<Constant>(Mask);
1285     if (!C || !C->isAllOnesValue())
1286       Vec = Builder.CreateAnd(Vec, getX86MaskVec(Builder, Mask, NumElts));
1287   }
1288 
1289   if (NumElts < 8) {
1290     uint32_t Indices[8];
1291     for (unsigned i = 0; i != NumElts; ++i)
1292       Indices[i] = i;
1293     for (unsigned i = NumElts; i != 8; ++i)
1294       Indices[i] = NumElts + i % NumElts;
1295     Vec = Builder.CreateShuffleVector(Vec,
1296                                       Constant::getNullValue(Vec->getType()),
1297                                       Indices);
1298   }
1299   return Builder.CreateBitCast(Vec, Builder.getIntNTy(std::max(NumElts, 8U)));
1300 }
1301 
1302 static Value *upgradeMaskedCompare(IRBuilder<> &Builder, CallInst &CI,
1303                                    unsigned CC, bool Signed) {
1304   Value *Op0 = CI.getArgOperand(0);
1305   unsigned NumElts = Op0->getType()->getVectorNumElements();
1306 
1307   Value *Cmp;
1308   if (CC == 3) {
1309     Cmp = Constant::getNullValue(llvm::VectorType::get(Builder.getInt1Ty(), NumElts));
1310   } else if (CC == 7) {
1311     Cmp = Constant::getAllOnesValue(llvm::VectorType::get(Builder.getInt1Ty(), NumElts));
1312   } else {
1313     ICmpInst::Predicate Pred;
1314     switch (CC) {
1315     default: llvm_unreachable("Unknown condition code");
1316     case 0: Pred = ICmpInst::ICMP_EQ;  break;
1317     case 1: Pred = Signed ? ICmpInst::ICMP_SLT : ICmpInst::ICMP_ULT; break;
1318     case 2: Pred = Signed ? ICmpInst::ICMP_SLE : ICmpInst::ICMP_ULE; break;
1319     case 4: Pred = ICmpInst::ICMP_NE;  break;
1320     case 5: Pred = Signed ? ICmpInst::ICMP_SGE : ICmpInst::ICMP_UGE; break;
1321     case 6: Pred = Signed ? ICmpInst::ICMP_SGT : ICmpInst::ICMP_UGT; break;
1322     }
1323     Cmp = Builder.CreateICmp(Pred, Op0, CI.getArgOperand(1));
1324   }
1325 
1326   Value *Mask = CI.getArgOperand(CI.getNumArgOperands() - 1);
1327 
1328   return ApplyX86MaskOn1BitsVec(Builder, Cmp, Mask);
1329 }
1330 
1331 // Replace a masked intrinsic with an older unmasked intrinsic.
1332 static Value *UpgradeX86MaskedShift(IRBuilder<> &Builder, CallInst &CI,
1333                                     Intrinsic::ID IID) {
1334   Function *Intrin = Intrinsic::getDeclaration(CI.getModule(), IID);
1335   Value *Rep = Builder.CreateCall(Intrin,
1336                                  { CI.getArgOperand(0), CI.getArgOperand(1) });
1337   return EmitX86Select(Builder, CI.getArgOperand(3), Rep, CI.getArgOperand(2));
1338 }
1339 
1340 static Value* upgradeMaskedMove(IRBuilder<> &Builder, CallInst &CI) {
1341   Value* A = CI.getArgOperand(0);
1342   Value* B = CI.getArgOperand(1);
1343   Value* Src = CI.getArgOperand(2);
1344   Value* Mask = CI.getArgOperand(3);
1345 
1346   Value* AndNode = Builder.CreateAnd(Mask, APInt(8, 1));
1347   Value* Cmp = Builder.CreateIsNotNull(AndNode);
1348   Value* Extract1 = Builder.CreateExtractElement(B, (uint64_t)0);
1349   Value* Extract2 = Builder.CreateExtractElement(Src, (uint64_t)0);
1350   Value* Select = Builder.CreateSelect(Cmp, Extract1, Extract2);
1351   return Builder.CreateInsertElement(A, Select, (uint64_t)0);
1352 }
1353 
1354 
1355 static Value* UpgradeMaskToInt(IRBuilder<> &Builder, CallInst &CI) {
1356   Value* Op = CI.getArgOperand(0);
1357   Type* ReturnOp = CI.getType();
1358   unsigned NumElts = CI.getType()->getVectorNumElements();
1359   Value *Mask = getX86MaskVec(Builder, Op, NumElts);
1360   return Builder.CreateSExt(Mask, ReturnOp, "vpmovm2");
1361 }
1362 
1363 // Replace intrinsic with unmasked version and a select.
1364 static bool upgradeAVX512MaskToSelect(StringRef Name, IRBuilder<> &Builder,
1365                                       CallInst &CI, Value *&Rep) {
1366   Name = Name.substr(12); // Remove avx512.mask.
1367 
1368   unsigned VecWidth = CI.getType()->getPrimitiveSizeInBits();
1369   unsigned EltWidth = CI.getType()->getScalarSizeInBits();
1370   Intrinsic::ID IID;
1371   if (Name.startswith("max.p")) {
1372     if (VecWidth == 128 && EltWidth == 32)
1373       IID = Intrinsic::x86_sse_max_ps;
1374     else if (VecWidth == 128 && EltWidth == 64)
1375       IID = Intrinsic::x86_sse2_max_pd;
1376     else if (VecWidth == 256 && EltWidth == 32)
1377       IID = Intrinsic::x86_avx_max_ps_256;
1378     else if (VecWidth == 256 && EltWidth == 64)
1379       IID = Intrinsic::x86_avx_max_pd_256;
1380     else
1381       llvm_unreachable("Unexpected intrinsic");
1382   } else if (Name.startswith("min.p")) {
1383     if (VecWidth == 128 && EltWidth == 32)
1384       IID = Intrinsic::x86_sse_min_ps;
1385     else if (VecWidth == 128 && EltWidth == 64)
1386       IID = Intrinsic::x86_sse2_min_pd;
1387     else if (VecWidth == 256 && EltWidth == 32)
1388       IID = Intrinsic::x86_avx_min_ps_256;
1389     else if (VecWidth == 256 && EltWidth == 64)
1390       IID = Intrinsic::x86_avx_min_pd_256;
1391     else
1392       llvm_unreachable("Unexpected intrinsic");
1393   } else if (Name.startswith("pshuf.b.")) {
1394     if (VecWidth == 128)
1395       IID = Intrinsic::x86_ssse3_pshuf_b_128;
1396     else if (VecWidth == 256)
1397       IID = Intrinsic::x86_avx2_pshuf_b;
1398     else if (VecWidth == 512)
1399       IID = Intrinsic::x86_avx512_pshuf_b_512;
1400     else
1401       llvm_unreachable("Unexpected intrinsic");
1402   } else if (Name.startswith("pmul.hr.sw.")) {
1403     if (VecWidth == 128)
1404       IID = Intrinsic::x86_ssse3_pmul_hr_sw_128;
1405     else if (VecWidth == 256)
1406       IID = Intrinsic::x86_avx2_pmul_hr_sw;
1407     else if (VecWidth == 512)
1408       IID = Intrinsic::x86_avx512_pmul_hr_sw_512;
1409     else
1410       llvm_unreachable("Unexpected intrinsic");
1411   } else if (Name.startswith("pmulh.w.")) {
1412     if (VecWidth == 128)
1413       IID = Intrinsic::x86_sse2_pmulh_w;
1414     else if (VecWidth == 256)
1415       IID = Intrinsic::x86_avx2_pmulh_w;
1416     else if (VecWidth == 512)
1417       IID = Intrinsic::x86_avx512_pmulh_w_512;
1418     else
1419       llvm_unreachable("Unexpected intrinsic");
1420   } else if (Name.startswith("pmulhu.w.")) {
1421     if (VecWidth == 128)
1422       IID = Intrinsic::x86_sse2_pmulhu_w;
1423     else if (VecWidth == 256)
1424       IID = Intrinsic::x86_avx2_pmulhu_w;
1425     else if (VecWidth == 512)
1426       IID = Intrinsic::x86_avx512_pmulhu_w_512;
1427     else
1428       llvm_unreachable("Unexpected intrinsic");
1429   } else if (Name.startswith("pmaddw.d.")) {
1430     if (VecWidth == 128)
1431       IID = Intrinsic::x86_sse2_pmadd_wd;
1432     else if (VecWidth == 256)
1433       IID = Intrinsic::x86_avx2_pmadd_wd;
1434     else if (VecWidth == 512)
1435       IID = Intrinsic::x86_avx512_pmaddw_d_512;
1436     else
1437       llvm_unreachable("Unexpected intrinsic");
1438   } else if (Name.startswith("pmaddubs.w.")) {
1439     if (VecWidth == 128)
1440       IID = Intrinsic::x86_ssse3_pmadd_ub_sw_128;
1441     else if (VecWidth == 256)
1442       IID = Intrinsic::x86_avx2_pmadd_ub_sw;
1443     else if (VecWidth == 512)
1444       IID = Intrinsic::x86_avx512_pmaddubs_w_512;
1445     else
1446       llvm_unreachable("Unexpected intrinsic");
1447   } else if (Name.startswith("packsswb.")) {
1448     if (VecWidth == 128)
1449       IID = Intrinsic::x86_sse2_packsswb_128;
1450     else if (VecWidth == 256)
1451       IID = Intrinsic::x86_avx2_packsswb;
1452     else if (VecWidth == 512)
1453       IID = Intrinsic::x86_avx512_packsswb_512;
1454     else
1455       llvm_unreachable("Unexpected intrinsic");
1456   } else if (Name.startswith("packssdw.")) {
1457     if (VecWidth == 128)
1458       IID = Intrinsic::x86_sse2_packssdw_128;
1459     else if (VecWidth == 256)
1460       IID = Intrinsic::x86_avx2_packssdw;
1461     else if (VecWidth == 512)
1462       IID = Intrinsic::x86_avx512_packssdw_512;
1463     else
1464       llvm_unreachable("Unexpected intrinsic");
1465   } else if (Name.startswith("packuswb.")) {
1466     if (VecWidth == 128)
1467       IID = Intrinsic::x86_sse2_packuswb_128;
1468     else if (VecWidth == 256)
1469       IID = Intrinsic::x86_avx2_packuswb;
1470     else if (VecWidth == 512)
1471       IID = Intrinsic::x86_avx512_packuswb_512;
1472     else
1473       llvm_unreachable("Unexpected intrinsic");
1474   } else if (Name.startswith("packusdw.")) {
1475     if (VecWidth == 128)
1476       IID = Intrinsic::x86_sse41_packusdw;
1477     else if (VecWidth == 256)
1478       IID = Intrinsic::x86_avx2_packusdw;
1479     else if (VecWidth == 512)
1480       IID = Intrinsic::x86_avx512_packusdw_512;
1481     else
1482       llvm_unreachable("Unexpected intrinsic");
1483   } else if (Name.startswith("vpermilvar.")) {
1484     if (VecWidth == 128 && EltWidth == 32)
1485       IID = Intrinsic::x86_avx_vpermilvar_ps;
1486     else if (VecWidth == 128 && EltWidth == 64)
1487       IID = Intrinsic::x86_avx_vpermilvar_pd;
1488     else if (VecWidth == 256 && EltWidth == 32)
1489       IID = Intrinsic::x86_avx_vpermilvar_ps_256;
1490     else if (VecWidth == 256 && EltWidth == 64)
1491       IID = Intrinsic::x86_avx_vpermilvar_pd_256;
1492     else if (VecWidth == 512 && EltWidth == 32)
1493       IID = Intrinsic::x86_avx512_vpermilvar_ps_512;
1494     else if (VecWidth == 512 && EltWidth == 64)
1495       IID = Intrinsic::x86_avx512_vpermilvar_pd_512;
1496     else
1497       llvm_unreachable("Unexpected intrinsic");
1498   } else if (Name == "cvtpd2dq.256") {
1499     IID = Intrinsic::x86_avx_cvt_pd2dq_256;
1500   } else if (Name == "cvtpd2ps.256") {
1501     IID = Intrinsic::x86_avx_cvt_pd2_ps_256;
1502   } else if (Name == "cvttpd2dq.256") {
1503     IID = Intrinsic::x86_avx_cvtt_pd2dq_256;
1504   } else if (Name == "cvttps2dq.128") {
1505     IID = Intrinsic::x86_sse2_cvttps2dq;
1506   } else if (Name == "cvttps2dq.256") {
1507     IID = Intrinsic::x86_avx_cvtt_ps2dq_256;
1508   } else if (Name.startswith("permvar.")) {
1509     bool IsFloat = CI.getType()->isFPOrFPVectorTy();
1510     if (VecWidth == 256 && EltWidth == 32 && IsFloat)
1511       IID = Intrinsic::x86_avx2_permps;
1512     else if (VecWidth == 256 && EltWidth == 32 && !IsFloat)
1513       IID = Intrinsic::x86_avx2_permd;
1514     else if (VecWidth == 256 && EltWidth == 64 && IsFloat)
1515       IID = Intrinsic::x86_avx512_permvar_df_256;
1516     else if (VecWidth == 256 && EltWidth == 64 && !IsFloat)
1517       IID = Intrinsic::x86_avx512_permvar_di_256;
1518     else if (VecWidth == 512 && EltWidth == 32 && IsFloat)
1519       IID = Intrinsic::x86_avx512_permvar_sf_512;
1520     else if (VecWidth == 512 && EltWidth == 32 && !IsFloat)
1521       IID = Intrinsic::x86_avx512_permvar_si_512;
1522     else if (VecWidth == 512 && EltWidth == 64 && IsFloat)
1523       IID = Intrinsic::x86_avx512_permvar_df_512;
1524     else if (VecWidth == 512 && EltWidth == 64 && !IsFloat)
1525       IID = Intrinsic::x86_avx512_permvar_di_512;
1526     else if (VecWidth == 128 && EltWidth == 16)
1527       IID = Intrinsic::x86_avx512_permvar_hi_128;
1528     else if (VecWidth == 256 && EltWidth == 16)
1529       IID = Intrinsic::x86_avx512_permvar_hi_256;
1530     else if (VecWidth == 512 && EltWidth == 16)
1531       IID = Intrinsic::x86_avx512_permvar_hi_512;
1532     else if (VecWidth == 128 && EltWidth == 8)
1533       IID = Intrinsic::x86_avx512_permvar_qi_128;
1534     else if (VecWidth == 256 && EltWidth == 8)
1535       IID = Intrinsic::x86_avx512_permvar_qi_256;
1536     else if (VecWidth == 512 && EltWidth == 8)
1537       IID = Intrinsic::x86_avx512_permvar_qi_512;
1538     else
1539       llvm_unreachable("Unexpected intrinsic");
1540   } else if (Name.startswith("dbpsadbw.")) {
1541     if (VecWidth == 128)
1542       IID = Intrinsic::x86_avx512_dbpsadbw_128;
1543     else if (VecWidth == 256)
1544       IID = Intrinsic::x86_avx512_dbpsadbw_256;
1545     else if (VecWidth == 512)
1546       IID = Intrinsic::x86_avx512_dbpsadbw_512;
1547     else
1548       llvm_unreachable("Unexpected intrinsic");
1549   } else if (Name.startswith("pmultishift.qb.")) {
1550     if (VecWidth == 128)
1551       IID = Intrinsic::x86_avx512_pmultishift_qb_128;
1552     else if (VecWidth == 256)
1553       IID = Intrinsic::x86_avx512_pmultishift_qb_256;
1554     else if (VecWidth == 512)
1555       IID = Intrinsic::x86_avx512_pmultishift_qb_512;
1556     else
1557       llvm_unreachable("Unexpected intrinsic");
1558   } else if (Name.startswith("conflict.")) {
1559     if (Name[9] == 'd' && VecWidth == 128)
1560       IID = Intrinsic::x86_avx512_conflict_d_128;
1561     else if (Name[9] == 'd' && VecWidth == 256)
1562       IID = Intrinsic::x86_avx512_conflict_d_256;
1563     else if (Name[9] == 'd' && VecWidth == 512)
1564       IID = Intrinsic::x86_avx512_conflict_d_512;
1565     else if (Name[9] == 'q' && VecWidth == 128)
1566       IID = Intrinsic::x86_avx512_conflict_q_128;
1567     else if (Name[9] == 'q' && VecWidth == 256)
1568       IID = Intrinsic::x86_avx512_conflict_q_256;
1569     else if (Name[9] == 'q' && VecWidth == 512)
1570       IID = Intrinsic::x86_avx512_conflict_q_512;
1571     else
1572       llvm_unreachable("Unexpected intrinsic");
1573   } else if (Name.startswith("pavg.")) {
1574     if (Name[5] == 'b' && VecWidth == 128)
1575       IID = Intrinsic::x86_sse2_pavg_b;
1576     else if (Name[5] == 'b' && VecWidth == 256)
1577       IID = Intrinsic::x86_avx2_pavg_b;
1578     else if (Name[5] == 'b' && VecWidth == 512)
1579       IID = Intrinsic::x86_avx512_pavg_b_512;
1580     else if (Name[5] == 'w' && VecWidth == 128)
1581       IID = Intrinsic::x86_sse2_pavg_w;
1582     else if (Name[5] == 'w' && VecWidth == 256)
1583       IID = Intrinsic::x86_avx2_pavg_w;
1584     else if (Name[5] == 'w' && VecWidth == 512)
1585       IID = Intrinsic::x86_avx512_pavg_w_512;
1586     else
1587       llvm_unreachable("Unexpected intrinsic");
1588   } else
1589     return false;
1590 
1591   SmallVector<Value *, 4> Args(CI.arg_operands().begin(),
1592                                CI.arg_operands().end());
1593   Args.pop_back();
1594   Args.pop_back();
1595   Rep = Builder.CreateCall(Intrinsic::getDeclaration(CI.getModule(), IID),
1596                            Args);
1597   unsigned NumArgs = CI.getNumArgOperands();
1598   Rep = EmitX86Select(Builder, CI.getArgOperand(NumArgs - 1), Rep,
1599                       CI.getArgOperand(NumArgs - 2));
1600   return true;
1601 }
1602 
1603 /// Upgrade comment in call to inline asm that represents an objc retain release
1604 /// marker.
1605 void llvm::UpgradeInlineAsmString(std::string *AsmStr) {
1606   size_t Pos;
1607   if (AsmStr->find("mov\tfp") == 0 &&
1608       AsmStr->find("objc_retainAutoreleaseReturnValue") != std::string::npos &&
1609       (Pos = AsmStr->find("# marker")) != std::string::npos) {
1610     AsmStr->replace(Pos, 1, ";");
1611   }
1612   return;
1613 }
1614 
1615 /// Upgrade a call to an old intrinsic. All argument and return casting must be
1616 /// provided to seamlessly integrate with existing context.
1617 void llvm::UpgradeIntrinsicCall(CallInst *CI, Function *NewFn) {
1618   Function *F = CI->getCalledFunction();
1619   LLVMContext &C = CI->getContext();
1620   IRBuilder<> Builder(C);
1621   Builder.SetInsertPoint(CI->getParent(), CI->getIterator());
1622 
1623   assert(F && "Intrinsic call is not direct?");
1624 
1625   if (!NewFn) {
1626     // Get the Function's name.
1627     StringRef Name = F->getName();
1628 
1629     // clang.arc.use is an old name for llvm.arc.clang.arc.use. It is dropped
1630     // from upgrader because the optimizer now only recognizes intrinsics for
1631     // ARC runtime calls.
1632     if (Name == "clang.arc.use") {
1633       CI->eraseFromParent();
1634       return;
1635     }
1636 
1637     assert(Name.startswith("llvm.") && "Intrinsic doesn't start with 'llvm.'");
1638     Name = Name.substr(5);
1639 
1640     bool IsX86 = Name.startswith("x86.");
1641     if (IsX86)
1642       Name = Name.substr(4);
1643     bool IsNVVM = Name.startswith("nvvm.");
1644     if (IsNVVM)
1645       Name = Name.substr(5);
1646 
1647     if (IsX86 && Name.startswith("sse4a.movnt.")) {
1648       Module *M = F->getParent();
1649       SmallVector<Metadata *, 1> Elts;
1650       Elts.push_back(
1651           ConstantAsMetadata::get(ConstantInt::get(Type::getInt32Ty(C), 1)));
1652       MDNode *Node = MDNode::get(C, Elts);
1653 
1654       Value *Arg0 = CI->getArgOperand(0);
1655       Value *Arg1 = CI->getArgOperand(1);
1656 
1657       // Nontemporal (unaligned) store of the 0'th element of the float/double
1658       // vector.
1659       Type *SrcEltTy = cast<VectorType>(Arg1->getType())->getElementType();
1660       PointerType *EltPtrTy = PointerType::getUnqual(SrcEltTy);
1661       Value *Addr = Builder.CreateBitCast(Arg0, EltPtrTy, "cast");
1662       Value *Extract =
1663           Builder.CreateExtractElement(Arg1, (uint64_t)0, "extractelement");
1664 
1665       StoreInst *SI = Builder.CreateAlignedStore(Extract, Addr, 1);
1666       SI->setMetadata(M->getMDKindID("nontemporal"), Node);
1667 
1668       // Remove intrinsic.
1669       CI->eraseFromParent();
1670       return;
1671     }
1672 
1673     if (IsX86 && (Name.startswith("avx.movnt.") ||
1674                   Name.startswith("avx512.storent."))) {
1675       Module *M = F->getParent();
1676       SmallVector<Metadata *, 1> Elts;
1677       Elts.push_back(
1678           ConstantAsMetadata::get(ConstantInt::get(Type::getInt32Ty(C), 1)));
1679       MDNode *Node = MDNode::get(C, Elts);
1680 
1681       Value *Arg0 = CI->getArgOperand(0);
1682       Value *Arg1 = CI->getArgOperand(1);
1683 
1684       // Convert the type of the pointer to a pointer to the stored type.
1685       Value *BC = Builder.CreateBitCast(Arg0,
1686                                         PointerType::getUnqual(Arg1->getType()),
1687                                         "cast");
1688       VectorType *VTy = cast<VectorType>(Arg1->getType());
1689       StoreInst *SI = Builder.CreateAlignedStore(Arg1, BC,
1690                                                  VTy->getBitWidth() / 8);
1691       SI->setMetadata(M->getMDKindID("nontemporal"), Node);
1692 
1693       // Remove intrinsic.
1694       CI->eraseFromParent();
1695       return;
1696     }
1697 
1698     if (IsX86 && Name == "sse2.storel.dq") {
1699       Value *Arg0 = CI->getArgOperand(0);
1700       Value *Arg1 = CI->getArgOperand(1);
1701 
1702       Type *NewVecTy = VectorType::get(Type::getInt64Ty(C), 2);
1703       Value *BC0 = Builder.CreateBitCast(Arg1, NewVecTy, "cast");
1704       Value *Elt = Builder.CreateExtractElement(BC0, (uint64_t)0);
1705       Value *BC = Builder.CreateBitCast(Arg0,
1706                                         PointerType::getUnqual(Elt->getType()),
1707                                         "cast");
1708       Builder.CreateAlignedStore(Elt, BC, 1);
1709 
1710       // Remove intrinsic.
1711       CI->eraseFromParent();
1712       return;
1713     }
1714 
1715     if (IsX86 && (Name.startswith("sse.storeu.") ||
1716                   Name.startswith("sse2.storeu.") ||
1717                   Name.startswith("avx.storeu."))) {
1718       Value *Arg0 = CI->getArgOperand(0);
1719       Value *Arg1 = CI->getArgOperand(1);
1720 
1721       Arg0 = Builder.CreateBitCast(Arg0,
1722                                    PointerType::getUnqual(Arg1->getType()),
1723                                    "cast");
1724       Builder.CreateAlignedStore(Arg1, Arg0, 1);
1725 
1726       // Remove intrinsic.
1727       CI->eraseFromParent();
1728       return;
1729     }
1730 
1731     if (IsX86 && Name == "avx512.mask.store.ss") {
1732       Value *Mask = Builder.CreateAnd(CI->getArgOperand(2), Builder.getInt8(1));
1733       UpgradeMaskedStore(Builder, CI->getArgOperand(0), CI->getArgOperand(1),
1734                          Mask, false);
1735 
1736       // Remove intrinsic.
1737       CI->eraseFromParent();
1738       return;
1739     }
1740 
1741     if (IsX86 && (Name.startswith("avx512.mask.store"))) {
1742       // "avx512.mask.storeu." or "avx512.mask.store."
1743       bool Aligned = Name[17] != 'u'; // "avx512.mask.storeu".
1744       UpgradeMaskedStore(Builder, CI->getArgOperand(0), CI->getArgOperand(1),
1745                          CI->getArgOperand(2), Aligned);
1746 
1747       // Remove intrinsic.
1748       CI->eraseFromParent();
1749       return;
1750     }
1751 
1752     Value *Rep;
1753     // Upgrade packed integer vector compare intrinsics to compare instructions.
1754     if (IsX86 && (Name.startswith("sse2.pcmp") ||
1755                   Name.startswith("avx2.pcmp"))) {
1756       // "sse2.pcpmpeq." "sse2.pcmpgt." "avx2.pcmpeq." or "avx2.pcmpgt."
1757       bool CmpEq = Name[9] == 'e';
1758       Rep = Builder.CreateICmp(CmpEq ? ICmpInst::ICMP_EQ : ICmpInst::ICMP_SGT,
1759                                CI->getArgOperand(0), CI->getArgOperand(1));
1760       Rep = Builder.CreateSExt(Rep, CI->getType(), "");
1761     } else if (IsX86 && (Name.startswith("avx512.broadcastm"))) {
1762       Type *ExtTy = Type::getInt32Ty(C);
1763       if (CI->getOperand(0)->getType()->isIntegerTy(8))
1764         ExtTy = Type::getInt64Ty(C);
1765       unsigned NumElts = CI->getType()->getPrimitiveSizeInBits() /
1766                          ExtTy->getPrimitiveSizeInBits();
1767       Rep = Builder.CreateZExt(CI->getArgOperand(0), ExtTy);
1768       Rep = Builder.CreateVectorSplat(NumElts, Rep);
1769     } else if (IsX86 && (Name == "sse.sqrt.ss" ||
1770                          Name == "sse2.sqrt.sd")) {
1771       Value *Vec = CI->getArgOperand(0);
1772       Value *Elt0 = Builder.CreateExtractElement(Vec, (uint64_t)0);
1773       Function *Intr = Intrinsic::getDeclaration(F->getParent(),
1774                                                  Intrinsic::sqrt, Elt0->getType());
1775       Elt0 = Builder.CreateCall(Intr, Elt0);
1776       Rep = Builder.CreateInsertElement(Vec, Elt0, (uint64_t)0);
1777     } else if (IsX86 && (Name.startswith("avx.sqrt.p") ||
1778                          Name.startswith("sse2.sqrt.p") ||
1779                          Name.startswith("sse.sqrt.p"))) {
1780       Rep = Builder.CreateCall(Intrinsic::getDeclaration(F->getParent(),
1781                                                          Intrinsic::sqrt,
1782                                                          CI->getType()),
1783                                {CI->getArgOperand(0)});
1784     } else if (IsX86 && (Name.startswith("avx512.mask.sqrt.p"))) {
1785       if (CI->getNumArgOperands() == 4 &&
1786           (!isa<ConstantInt>(CI->getArgOperand(3)) ||
1787            cast<ConstantInt>(CI->getArgOperand(3))->getZExtValue() != 4)) {
1788         Intrinsic::ID IID = Name[18] == 's' ? Intrinsic::x86_avx512_sqrt_ps_512
1789                                             : Intrinsic::x86_avx512_sqrt_pd_512;
1790 
1791         Value *Args[] = { CI->getArgOperand(0), CI->getArgOperand(3) };
1792         Rep = Builder.CreateCall(Intrinsic::getDeclaration(CI->getModule(),
1793                                                            IID), Args);
1794       } else {
1795         Rep = Builder.CreateCall(Intrinsic::getDeclaration(F->getParent(),
1796                                                            Intrinsic::sqrt,
1797                                                            CI->getType()),
1798                                  {CI->getArgOperand(0)});
1799       }
1800       Rep = EmitX86Select(Builder, CI->getArgOperand(2), Rep,
1801                           CI->getArgOperand(1));
1802     } else if (IsX86 && (Name.startswith("avx512.ptestm") ||
1803                          Name.startswith("avx512.ptestnm"))) {
1804       Value *Op0 = CI->getArgOperand(0);
1805       Value *Op1 = CI->getArgOperand(1);
1806       Value *Mask = CI->getArgOperand(2);
1807       Rep = Builder.CreateAnd(Op0, Op1);
1808       llvm::Type *Ty = Op0->getType();
1809       Value *Zero = llvm::Constant::getNullValue(Ty);
1810       ICmpInst::Predicate Pred =
1811         Name.startswith("avx512.ptestm") ? ICmpInst::ICMP_NE : ICmpInst::ICMP_EQ;
1812       Rep = Builder.CreateICmp(Pred, Rep, Zero);
1813       Rep = ApplyX86MaskOn1BitsVec(Builder, Rep, Mask);
1814     } else if (IsX86 && (Name.startswith("avx512.mask.pbroadcast"))){
1815       unsigned NumElts =
1816           CI->getArgOperand(1)->getType()->getVectorNumElements();
1817       Rep = Builder.CreateVectorSplat(NumElts, CI->getArgOperand(0));
1818       Rep = EmitX86Select(Builder, CI->getArgOperand(2), Rep,
1819                           CI->getArgOperand(1));
1820     } else if (IsX86 && (Name.startswith("avx512.kunpck"))) {
1821       unsigned NumElts = CI->getType()->getScalarSizeInBits();
1822       Value *LHS = getX86MaskVec(Builder, CI->getArgOperand(0), NumElts);
1823       Value *RHS = getX86MaskVec(Builder, CI->getArgOperand(1), NumElts);
1824       uint32_t Indices[64];
1825       for (unsigned i = 0; i != NumElts; ++i)
1826         Indices[i] = i;
1827 
1828       // First extract half of each vector. This gives better codegen than
1829       // doing it in a single shuffle.
1830       LHS = Builder.CreateShuffleVector(LHS, LHS,
1831                                         makeArrayRef(Indices, NumElts / 2));
1832       RHS = Builder.CreateShuffleVector(RHS, RHS,
1833                                         makeArrayRef(Indices, NumElts / 2));
1834       // Concat the vectors.
1835       // NOTE: Operands have to be swapped to match intrinsic definition.
1836       Rep = Builder.CreateShuffleVector(RHS, LHS,
1837                                         makeArrayRef(Indices, NumElts));
1838       Rep = Builder.CreateBitCast(Rep, CI->getType());
1839     } else if (IsX86 && Name == "avx512.kand.w") {
1840       Value *LHS = getX86MaskVec(Builder, CI->getArgOperand(0), 16);
1841       Value *RHS = getX86MaskVec(Builder, CI->getArgOperand(1), 16);
1842       Rep = Builder.CreateAnd(LHS, RHS);
1843       Rep = Builder.CreateBitCast(Rep, CI->getType());
1844     } else if (IsX86 && Name == "avx512.kandn.w") {
1845       Value *LHS = getX86MaskVec(Builder, CI->getArgOperand(0), 16);
1846       Value *RHS = getX86MaskVec(Builder, CI->getArgOperand(1), 16);
1847       LHS = Builder.CreateNot(LHS);
1848       Rep = Builder.CreateAnd(LHS, RHS);
1849       Rep = Builder.CreateBitCast(Rep, CI->getType());
1850     } else if (IsX86 && Name == "avx512.kor.w") {
1851       Value *LHS = getX86MaskVec(Builder, CI->getArgOperand(0), 16);
1852       Value *RHS = getX86MaskVec(Builder, CI->getArgOperand(1), 16);
1853       Rep = Builder.CreateOr(LHS, RHS);
1854       Rep = Builder.CreateBitCast(Rep, CI->getType());
1855     } else if (IsX86 && Name == "avx512.kxor.w") {
1856       Value *LHS = getX86MaskVec(Builder, CI->getArgOperand(0), 16);
1857       Value *RHS = getX86MaskVec(Builder, CI->getArgOperand(1), 16);
1858       Rep = Builder.CreateXor(LHS, RHS);
1859       Rep = Builder.CreateBitCast(Rep, CI->getType());
1860     } else if (IsX86 && Name == "avx512.kxnor.w") {
1861       Value *LHS = getX86MaskVec(Builder, CI->getArgOperand(0), 16);
1862       Value *RHS = getX86MaskVec(Builder, CI->getArgOperand(1), 16);
1863       LHS = Builder.CreateNot(LHS);
1864       Rep = Builder.CreateXor(LHS, RHS);
1865       Rep = Builder.CreateBitCast(Rep, CI->getType());
1866     } else if (IsX86 && Name == "avx512.knot.w") {
1867       Rep = getX86MaskVec(Builder, CI->getArgOperand(0), 16);
1868       Rep = Builder.CreateNot(Rep);
1869       Rep = Builder.CreateBitCast(Rep, CI->getType());
1870     } else if (IsX86 &&
1871                (Name == "avx512.kortestz.w" || Name == "avx512.kortestc.w")) {
1872       Value *LHS = getX86MaskVec(Builder, CI->getArgOperand(0), 16);
1873       Value *RHS = getX86MaskVec(Builder, CI->getArgOperand(1), 16);
1874       Rep = Builder.CreateOr(LHS, RHS);
1875       Rep = Builder.CreateBitCast(Rep, Builder.getInt16Ty());
1876       Value *C;
1877       if (Name[14] == 'c')
1878         C = ConstantInt::getAllOnesValue(Builder.getInt16Ty());
1879       else
1880         C = ConstantInt::getNullValue(Builder.getInt16Ty());
1881       Rep = Builder.CreateICmpEQ(Rep, C);
1882       Rep = Builder.CreateZExt(Rep, Builder.getInt32Ty());
1883     } else if (IsX86 && (Name == "sse.add.ss" || Name == "sse2.add.sd" ||
1884                          Name == "sse.sub.ss" || Name == "sse2.sub.sd" ||
1885                          Name == "sse.mul.ss" || Name == "sse2.mul.sd" ||
1886                          Name == "sse.div.ss" || Name == "sse2.div.sd")) {
1887       Type *I32Ty = Type::getInt32Ty(C);
1888       Value *Elt0 = Builder.CreateExtractElement(CI->getArgOperand(0),
1889                                                  ConstantInt::get(I32Ty, 0));
1890       Value *Elt1 = Builder.CreateExtractElement(CI->getArgOperand(1),
1891                                                  ConstantInt::get(I32Ty, 0));
1892       Value *EltOp;
1893       if (Name.contains(".add."))
1894         EltOp = Builder.CreateFAdd(Elt0, Elt1);
1895       else if (Name.contains(".sub."))
1896         EltOp = Builder.CreateFSub(Elt0, Elt1);
1897       else if (Name.contains(".mul."))
1898         EltOp = Builder.CreateFMul(Elt0, Elt1);
1899       else
1900         EltOp = Builder.CreateFDiv(Elt0, Elt1);
1901       Rep = Builder.CreateInsertElement(CI->getArgOperand(0), EltOp,
1902                                         ConstantInt::get(I32Ty, 0));
1903     } else if (IsX86 && Name.startswith("avx512.mask.pcmp")) {
1904       // "avx512.mask.pcmpeq." or "avx512.mask.pcmpgt."
1905       bool CmpEq = Name[16] == 'e';
1906       Rep = upgradeMaskedCompare(Builder, *CI, CmpEq ? 0 : 6, true);
1907     } else if (IsX86 && Name.startswith("avx512.mask.vpshufbitqmb.")) {
1908       Type *OpTy = CI->getArgOperand(0)->getType();
1909       unsigned VecWidth = OpTy->getPrimitiveSizeInBits();
1910       Intrinsic::ID IID;
1911       switch (VecWidth) {
1912       default: llvm_unreachable("Unexpected intrinsic");
1913       case 128: IID = Intrinsic::x86_avx512_vpshufbitqmb_128; break;
1914       case 256: IID = Intrinsic::x86_avx512_vpshufbitqmb_256; break;
1915       case 512: IID = Intrinsic::x86_avx512_vpshufbitqmb_512; break;
1916       }
1917 
1918       Rep = Builder.CreateCall(Intrinsic::getDeclaration(F->getParent(), IID),
1919                                { CI->getOperand(0), CI->getArgOperand(1) });
1920       Rep = ApplyX86MaskOn1BitsVec(Builder, Rep, CI->getArgOperand(2));
1921     } else if (IsX86 && Name.startswith("avx512.mask.fpclass.p")) {
1922       Type *OpTy = CI->getArgOperand(0)->getType();
1923       unsigned VecWidth = OpTy->getPrimitiveSizeInBits();
1924       unsigned EltWidth = OpTy->getScalarSizeInBits();
1925       Intrinsic::ID IID;
1926       if (VecWidth == 128 && EltWidth == 32)
1927         IID = Intrinsic::x86_avx512_fpclass_ps_128;
1928       else if (VecWidth == 256 && EltWidth == 32)
1929         IID = Intrinsic::x86_avx512_fpclass_ps_256;
1930       else if (VecWidth == 512 && EltWidth == 32)
1931         IID = Intrinsic::x86_avx512_fpclass_ps_512;
1932       else if (VecWidth == 128 && EltWidth == 64)
1933         IID = Intrinsic::x86_avx512_fpclass_pd_128;
1934       else if (VecWidth == 256 && EltWidth == 64)
1935         IID = Intrinsic::x86_avx512_fpclass_pd_256;
1936       else if (VecWidth == 512 && EltWidth == 64)
1937         IID = Intrinsic::x86_avx512_fpclass_pd_512;
1938       else
1939         llvm_unreachable("Unexpected intrinsic");
1940 
1941       Rep = Builder.CreateCall(Intrinsic::getDeclaration(F->getParent(), IID),
1942                                { CI->getOperand(0), CI->getArgOperand(1) });
1943       Rep = ApplyX86MaskOn1BitsVec(Builder, Rep, CI->getArgOperand(2));
1944     } else if (IsX86 && Name.startswith("avx512.mask.cmp.p")) {
1945       Type *OpTy = CI->getArgOperand(0)->getType();
1946       unsigned VecWidth = OpTy->getPrimitiveSizeInBits();
1947       unsigned EltWidth = OpTy->getScalarSizeInBits();
1948       Intrinsic::ID IID;
1949       if (VecWidth == 128 && EltWidth == 32)
1950         IID = Intrinsic::x86_avx512_cmp_ps_128;
1951       else if (VecWidth == 256 && EltWidth == 32)
1952         IID = Intrinsic::x86_avx512_cmp_ps_256;
1953       else if (VecWidth == 512 && EltWidth == 32)
1954         IID = Intrinsic::x86_avx512_cmp_ps_512;
1955       else if (VecWidth == 128 && EltWidth == 64)
1956         IID = Intrinsic::x86_avx512_cmp_pd_128;
1957       else if (VecWidth == 256 && EltWidth == 64)
1958         IID = Intrinsic::x86_avx512_cmp_pd_256;
1959       else if (VecWidth == 512 && EltWidth == 64)
1960         IID = Intrinsic::x86_avx512_cmp_pd_512;
1961       else
1962         llvm_unreachable("Unexpected intrinsic");
1963 
1964       SmallVector<Value *, 4> Args;
1965       Args.push_back(CI->getArgOperand(0));
1966       Args.push_back(CI->getArgOperand(1));
1967       Args.push_back(CI->getArgOperand(2));
1968       if (CI->getNumArgOperands() == 5)
1969         Args.push_back(CI->getArgOperand(4));
1970 
1971       Rep = Builder.CreateCall(Intrinsic::getDeclaration(F->getParent(), IID),
1972                                Args);
1973       Rep = ApplyX86MaskOn1BitsVec(Builder, Rep, CI->getArgOperand(3));
1974     } else if (IsX86 && Name.startswith("avx512.mask.cmp.") &&
1975                Name[16] != 'p') {
1976       // Integer compare intrinsics.
1977       unsigned Imm = cast<ConstantInt>(CI->getArgOperand(2))->getZExtValue();
1978       Rep = upgradeMaskedCompare(Builder, *CI, Imm, true);
1979     } else if (IsX86 && Name.startswith("avx512.mask.ucmp.")) {
1980       unsigned Imm = cast<ConstantInt>(CI->getArgOperand(2))->getZExtValue();
1981       Rep = upgradeMaskedCompare(Builder, *CI, Imm, false);
1982     } else if (IsX86 && (Name.startswith("avx512.cvtb2mask.") ||
1983                          Name.startswith("avx512.cvtw2mask.") ||
1984                          Name.startswith("avx512.cvtd2mask.") ||
1985                          Name.startswith("avx512.cvtq2mask."))) {
1986       Value *Op = CI->getArgOperand(0);
1987       Value *Zero = llvm::Constant::getNullValue(Op->getType());
1988       Rep = Builder.CreateICmp(ICmpInst::ICMP_SLT, Op, Zero);
1989       Rep = ApplyX86MaskOn1BitsVec(Builder, Rep, nullptr);
1990     } else if(IsX86 && (Name == "ssse3.pabs.b.128" ||
1991                         Name == "ssse3.pabs.w.128" ||
1992                         Name == "ssse3.pabs.d.128" ||
1993                         Name.startswith("avx2.pabs") ||
1994                         Name.startswith("avx512.mask.pabs"))) {
1995       Rep = upgradeAbs(Builder, *CI);
1996     } else if (IsX86 && (Name == "sse41.pmaxsb" ||
1997                          Name == "sse2.pmaxs.w" ||
1998                          Name == "sse41.pmaxsd" ||
1999                          Name.startswith("avx2.pmaxs") ||
2000                          Name.startswith("avx512.mask.pmaxs"))) {
2001       Rep = upgradeIntMinMax(Builder, *CI, ICmpInst::ICMP_SGT);
2002     } else if (IsX86 && (Name == "sse2.pmaxu.b" ||
2003                          Name == "sse41.pmaxuw" ||
2004                          Name == "sse41.pmaxud" ||
2005                          Name.startswith("avx2.pmaxu") ||
2006                          Name.startswith("avx512.mask.pmaxu"))) {
2007       Rep = upgradeIntMinMax(Builder, *CI, ICmpInst::ICMP_UGT);
2008     } else if (IsX86 && (Name == "sse41.pminsb" ||
2009                          Name == "sse2.pmins.w" ||
2010                          Name == "sse41.pminsd" ||
2011                          Name.startswith("avx2.pmins") ||
2012                          Name.startswith("avx512.mask.pmins"))) {
2013       Rep = upgradeIntMinMax(Builder, *CI, ICmpInst::ICMP_SLT);
2014     } else if (IsX86 && (Name == "sse2.pminu.b" ||
2015                          Name == "sse41.pminuw" ||
2016                          Name == "sse41.pminud" ||
2017                          Name.startswith("avx2.pminu") ||
2018                          Name.startswith("avx512.mask.pminu"))) {
2019       Rep = upgradeIntMinMax(Builder, *CI, ICmpInst::ICMP_ULT);
2020     } else if (IsX86 && (Name == "sse2.pmulu.dq" ||
2021                          Name == "avx2.pmulu.dq" ||
2022                          Name == "avx512.pmulu.dq.512" ||
2023                          Name.startswith("avx512.mask.pmulu.dq."))) {
2024       Rep = upgradePMULDQ(Builder, *CI, /*Signed*/false);
2025     } else if (IsX86 && (Name == "sse41.pmuldq" ||
2026                          Name == "avx2.pmul.dq" ||
2027                          Name == "avx512.pmul.dq.512" ||
2028                          Name.startswith("avx512.mask.pmul.dq."))) {
2029       Rep = upgradePMULDQ(Builder, *CI, /*Signed*/true);
2030     } else if (IsX86 && (Name == "sse.cvtsi2ss" ||
2031                          Name == "sse2.cvtsi2sd" ||
2032                          Name == "sse.cvtsi642ss" ||
2033                          Name == "sse2.cvtsi642sd")) {
2034       Rep = Builder.CreateSIToFP(CI->getArgOperand(1),
2035                                  CI->getType()->getVectorElementType());
2036       Rep = Builder.CreateInsertElement(CI->getArgOperand(0), Rep, (uint64_t)0);
2037     } else if (IsX86 && Name == "avx512.cvtusi2sd") {
2038       Rep = Builder.CreateUIToFP(CI->getArgOperand(1),
2039                                  CI->getType()->getVectorElementType());
2040       Rep = Builder.CreateInsertElement(CI->getArgOperand(0), Rep, (uint64_t)0);
2041     } else if (IsX86 && Name == "sse2.cvtss2sd") {
2042       Rep = Builder.CreateExtractElement(CI->getArgOperand(1), (uint64_t)0);
2043       Rep = Builder.CreateFPExt(Rep, CI->getType()->getVectorElementType());
2044       Rep = Builder.CreateInsertElement(CI->getArgOperand(0), Rep, (uint64_t)0);
2045     } else if (IsX86 && (Name == "sse2.cvtdq2pd" ||
2046                          Name == "sse2.cvtdq2ps" ||
2047                          Name == "avx.cvtdq2.pd.256" ||
2048                          Name == "avx.cvtdq2.ps.256" ||
2049                          Name.startswith("avx512.mask.cvtdq2pd.") ||
2050                          Name.startswith("avx512.mask.cvtudq2pd.") ||
2051                          Name.startswith("avx512.mask.cvtdq2ps.") ||
2052                          Name.startswith("avx512.mask.cvtudq2ps.") ||
2053                          Name.startswith("avx512.mask.cvtqq2pd.") ||
2054                          Name.startswith("avx512.mask.cvtuqq2pd.") ||
2055                          Name == "avx512.mask.cvtqq2ps.256" ||
2056                          Name == "avx512.mask.cvtqq2ps.512" ||
2057                          Name == "avx512.mask.cvtuqq2ps.256" ||
2058                          Name == "avx512.mask.cvtuqq2ps.512" ||
2059                          Name == "sse2.cvtps2pd" ||
2060                          Name == "avx.cvt.ps2.pd.256" ||
2061                          Name == "avx512.mask.cvtps2pd.128" ||
2062                          Name == "avx512.mask.cvtps2pd.256")) {
2063       Type *DstTy = CI->getType();
2064       Rep = CI->getArgOperand(0);
2065       Type *SrcTy = Rep->getType();
2066 
2067       unsigned NumDstElts = DstTy->getVectorNumElements();
2068       if (NumDstElts < SrcTy->getVectorNumElements()) {
2069         assert(NumDstElts == 2 && "Unexpected vector size");
2070         uint32_t ShuffleMask[2] = { 0, 1 };
2071         Rep = Builder.CreateShuffleVector(Rep, Rep, ShuffleMask);
2072       }
2073 
2074       bool IsPS2PD = SrcTy->getVectorElementType()->isFloatTy();
2075       bool IsUnsigned = (StringRef::npos != Name.find("cvtu"));
2076       if (IsPS2PD)
2077         Rep = Builder.CreateFPExt(Rep, DstTy, "cvtps2pd");
2078       else if (CI->getNumArgOperands() == 4 &&
2079                (!isa<ConstantInt>(CI->getArgOperand(3)) ||
2080                 cast<ConstantInt>(CI->getArgOperand(3))->getZExtValue() != 4)) {
2081         Intrinsic::ID IID = IsUnsigned ? Intrinsic::x86_avx512_uitofp_round
2082                                        : Intrinsic::x86_avx512_sitofp_round;
2083         Function *F = Intrinsic::getDeclaration(CI->getModule(), IID,
2084                                                 { DstTy, SrcTy });
2085         Rep = Builder.CreateCall(F, { Rep, CI->getArgOperand(3) });
2086       } else {
2087         Rep = IsUnsigned ? Builder.CreateUIToFP(Rep, DstTy, "cvt")
2088                          : Builder.CreateSIToFP(Rep, DstTy, "cvt");
2089       }
2090 
2091       if (CI->getNumArgOperands() >= 3)
2092         Rep = EmitX86Select(Builder, CI->getArgOperand(2), Rep,
2093                             CI->getArgOperand(1));
2094     } else if (IsX86 && (Name.startswith("avx512.mask.loadu."))) {
2095       Rep = UpgradeMaskedLoad(Builder, CI->getArgOperand(0),
2096                               CI->getArgOperand(1), CI->getArgOperand(2),
2097                               /*Aligned*/false);
2098     } else if (IsX86 && (Name.startswith("avx512.mask.load."))) {
2099       Rep = UpgradeMaskedLoad(Builder, CI->getArgOperand(0),
2100                               CI->getArgOperand(1),CI->getArgOperand(2),
2101                               /*Aligned*/true);
2102     } else if (IsX86 && Name.startswith("avx512.mask.expand.load.")) {
2103       Type *ResultTy = CI->getType();
2104       Type *PtrTy = ResultTy->getVectorElementType();
2105 
2106       // Cast the pointer to element type.
2107       Value *Ptr = Builder.CreateBitCast(CI->getOperand(0),
2108                                          llvm::PointerType::getUnqual(PtrTy));
2109 
2110       Value *MaskVec = getX86MaskVec(Builder, CI->getArgOperand(2),
2111                                      ResultTy->getVectorNumElements());
2112 
2113       Function *ELd = Intrinsic::getDeclaration(F->getParent(),
2114                                                 Intrinsic::masked_expandload,
2115                                                 ResultTy);
2116       Rep = Builder.CreateCall(ELd, { Ptr, MaskVec, CI->getOperand(1) });
2117     } else if (IsX86 && Name.startswith("avx512.mask.compress.store.")) {
2118       Type *ResultTy = CI->getArgOperand(1)->getType();
2119       Type *PtrTy = ResultTy->getVectorElementType();
2120 
2121       // Cast the pointer to element type.
2122       Value *Ptr = Builder.CreateBitCast(CI->getOperand(0),
2123                                          llvm::PointerType::getUnqual(PtrTy));
2124 
2125       Value *MaskVec = getX86MaskVec(Builder, CI->getArgOperand(2),
2126                                      ResultTy->getVectorNumElements());
2127 
2128       Function *CSt = Intrinsic::getDeclaration(F->getParent(),
2129                                                 Intrinsic::masked_compressstore,
2130                                                 ResultTy);
2131       Rep = Builder.CreateCall(CSt, { CI->getArgOperand(1), Ptr, MaskVec });
2132     } else if (IsX86 && (Name.startswith("avx512.mask.compress.") ||
2133                          Name.startswith("avx512.mask.expand."))) {
2134       Type *ResultTy = CI->getType();
2135 
2136       Value *MaskVec = getX86MaskVec(Builder, CI->getArgOperand(2),
2137                                      ResultTy->getVectorNumElements());
2138 
2139       bool IsCompress = Name[12] == 'c';
2140       Intrinsic::ID IID = IsCompress ? Intrinsic::x86_avx512_mask_compress
2141                                      : Intrinsic::x86_avx512_mask_expand;
2142       Function *Intr = Intrinsic::getDeclaration(F->getParent(), IID, ResultTy);
2143       Rep = Builder.CreateCall(Intr, { CI->getOperand(0), CI->getOperand(1),
2144                                        MaskVec });
2145     } else if (IsX86 && Name.startswith("xop.vpcom")) {
2146       bool IsSigned;
2147       if (Name.endswith("ub") || Name.endswith("uw") || Name.endswith("ud") ||
2148           Name.endswith("uq"))
2149         IsSigned = false;
2150       else if (Name.endswith("b") || Name.endswith("w") || Name.endswith("d") ||
2151                Name.endswith("q"))
2152         IsSigned = true;
2153       else
2154         llvm_unreachable("Unknown suffix");
2155 
2156       unsigned Imm;
2157       if (CI->getNumArgOperands() == 3) {
2158         Imm = cast<ConstantInt>(CI->getArgOperand(2))->getZExtValue();
2159       } else {
2160         Name = Name.substr(9); // strip off "xop.vpcom"
2161         if (Name.startswith("lt"))
2162           Imm = 0;
2163         else if (Name.startswith("le"))
2164           Imm = 1;
2165         else if (Name.startswith("gt"))
2166           Imm = 2;
2167         else if (Name.startswith("ge"))
2168           Imm = 3;
2169         else if (Name.startswith("eq"))
2170           Imm = 4;
2171         else if (Name.startswith("ne"))
2172           Imm = 5;
2173         else if (Name.startswith("false"))
2174           Imm = 6;
2175         else if (Name.startswith("true"))
2176           Imm = 7;
2177         else
2178           llvm_unreachable("Unknown condition");
2179       }
2180 
2181       Rep = upgradeX86vpcom(Builder, *CI, Imm, IsSigned);
2182     } else if (IsX86 && Name.startswith("xop.vpcmov")) {
2183       Value *Sel = CI->getArgOperand(2);
2184       Value *NotSel = Builder.CreateNot(Sel);
2185       Value *Sel0 = Builder.CreateAnd(CI->getArgOperand(0), Sel);
2186       Value *Sel1 = Builder.CreateAnd(CI->getArgOperand(1), NotSel);
2187       Rep = Builder.CreateOr(Sel0, Sel1);
2188     } else if (IsX86 && (Name.startswith("xop.vprot") ||
2189                          Name.startswith("avx512.prol") ||
2190                          Name.startswith("avx512.mask.prol"))) {
2191       Rep = upgradeX86Rotate(Builder, *CI, false);
2192     } else if (IsX86 && (Name.startswith("avx512.pror") ||
2193                          Name.startswith("avx512.mask.pror"))) {
2194       Rep = upgradeX86Rotate(Builder, *CI, true);
2195     } else if (IsX86 && (Name.startswith("avx512.vpshld.") ||
2196                          Name.startswith("avx512.mask.vpshld") ||
2197                          Name.startswith("avx512.maskz.vpshld"))) {
2198       bool ZeroMask = Name[11] == 'z';
2199       Rep = upgradeX86ConcatShift(Builder, *CI, false, ZeroMask);
2200     } else if (IsX86 && (Name.startswith("avx512.vpshrd.") ||
2201                          Name.startswith("avx512.mask.vpshrd") ||
2202                          Name.startswith("avx512.maskz.vpshrd"))) {
2203       bool ZeroMask = Name[11] == 'z';
2204       Rep = upgradeX86ConcatShift(Builder, *CI, true, ZeroMask);
2205     } else if (IsX86 && Name == "sse42.crc32.64.8") {
2206       Function *CRC32 = Intrinsic::getDeclaration(F->getParent(),
2207                                                Intrinsic::x86_sse42_crc32_32_8);
2208       Value *Trunc0 = Builder.CreateTrunc(CI->getArgOperand(0), Type::getInt32Ty(C));
2209       Rep = Builder.CreateCall(CRC32, {Trunc0, CI->getArgOperand(1)});
2210       Rep = Builder.CreateZExt(Rep, CI->getType(), "");
2211     } else if (IsX86 && (Name.startswith("avx.vbroadcast.s") ||
2212                          Name.startswith("avx512.vbroadcast.s"))) {
2213       // Replace broadcasts with a series of insertelements.
2214       Type *VecTy = CI->getType();
2215       Type *EltTy = VecTy->getVectorElementType();
2216       unsigned EltNum = VecTy->getVectorNumElements();
2217       Value *Cast = Builder.CreateBitCast(CI->getArgOperand(0),
2218                                           EltTy->getPointerTo());
2219       Value *Load = Builder.CreateLoad(EltTy, Cast);
2220       Type *I32Ty = Type::getInt32Ty(C);
2221       Rep = UndefValue::get(VecTy);
2222       for (unsigned I = 0; I < EltNum; ++I)
2223         Rep = Builder.CreateInsertElement(Rep, Load,
2224                                           ConstantInt::get(I32Ty, I));
2225     } else if (IsX86 && (Name.startswith("sse41.pmovsx") ||
2226                          Name.startswith("sse41.pmovzx") ||
2227                          Name.startswith("avx2.pmovsx") ||
2228                          Name.startswith("avx2.pmovzx") ||
2229                          Name.startswith("avx512.mask.pmovsx") ||
2230                          Name.startswith("avx512.mask.pmovzx"))) {
2231       VectorType *SrcTy = cast<VectorType>(CI->getArgOperand(0)->getType());
2232       VectorType *DstTy = cast<VectorType>(CI->getType());
2233       unsigned NumDstElts = DstTy->getNumElements();
2234 
2235       // Extract a subvector of the first NumDstElts lanes and sign/zero extend.
2236       SmallVector<uint32_t, 8> ShuffleMask(NumDstElts);
2237       for (unsigned i = 0; i != NumDstElts; ++i)
2238         ShuffleMask[i] = i;
2239 
2240       Value *SV = Builder.CreateShuffleVector(
2241           CI->getArgOperand(0), UndefValue::get(SrcTy), ShuffleMask);
2242 
2243       bool DoSext = (StringRef::npos != Name.find("pmovsx"));
2244       Rep = DoSext ? Builder.CreateSExt(SV, DstTy)
2245                    : Builder.CreateZExt(SV, DstTy);
2246       // If there are 3 arguments, it's a masked intrinsic so we need a select.
2247       if (CI->getNumArgOperands() == 3)
2248         Rep = EmitX86Select(Builder, CI->getArgOperand(2), Rep,
2249                             CI->getArgOperand(1));
2250     } else if (Name == "avx512.mask.pmov.qd.256" ||
2251                Name == "avx512.mask.pmov.qd.512" ||
2252                Name == "avx512.mask.pmov.wb.256" ||
2253                Name == "avx512.mask.pmov.wb.512") {
2254       Type *Ty = CI->getArgOperand(1)->getType();
2255       Rep = Builder.CreateTrunc(CI->getArgOperand(0), Ty);
2256       Rep = EmitX86Select(Builder, CI->getArgOperand(2), Rep,
2257                           CI->getArgOperand(1));
2258     } else if (IsX86 && (Name.startswith("avx.vbroadcastf128") ||
2259                          Name == "avx2.vbroadcasti128")) {
2260       // Replace vbroadcastf128/vbroadcasti128 with a vector load+shuffle.
2261       Type *EltTy = CI->getType()->getVectorElementType();
2262       unsigned NumSrcElts = 128 / EltTy->getPrimitiveSizeInBits();
2263       Type *VT = VectorType::get(EltTy, NumSrcElts);
2264       Value *Op = Builder.CreatePointerCast(CI->getArgOperand(0),
2265                                             PointerType::getUnqual(VT));
2266       Value *Load = Builder.CreateAlignedLoad(VT, Op, 1);
2267       if (NumSrcElts == 2)
2268         Rep = Builder.CreateShuffleVector(Load, UndefValue::get(Load->getType()),
2269                                           { 0, 1, 0, 1 });
2270       else
2271         Rep = Builder.CreateShuffleVector(Load, UndefValue::get(Load->getType()),
2272                                           { 0, 1, 2, 3, 0, 1, 2, 3 });
2273     } else if (IsX86 && (Name.startswith("avx512.mask.shuf.i") ||
2274                          Name.startswith("avx512.mask.shuf.f"))) {
2275       unsigned Imm = cast<ConstantInt>(CI->getArgOperand(2))->getZExtValue();
2276       Type *VT = CI->getType();
2277       unsigned NumLanes = VT->getPrimitiveSizeInBits() / 128;
2278       unsigned NumElementsInLane = 128 / VT->getScalarSizeInBits();
2279       unsigned ControlBitsMask = NumLanes - 1;
2280       unsigned NumControlBits = NumLanes / 2;
2281       SmallVector<uint32_t, 8> ShuffleMask(0);
2282 
2283       for (unsigned l = 0; l != NumLanes; ++l) {
2284         unsigned LaneMask = (Imm >> (l * NumControlBits)) & ControlBitsMask;
2285         // We actually need the other source.
2286         if (l >= NumLanes / 2)
2287           LaneMask += NumLanes;
2288         for (unsigned i = 0; i != NumElementsInLane; ++i)
2289           ShuffleMask.push_back(LaneMask * NumElementsInLane + i);
2290       }
2291       Rep = Builder.CreateShuffleVector(CI->getArgOperand(0),
2292                                         CI->getArgOperand(1), ShuffleMask);
2293       Rep = EmitX86Select(Builder, CI->getArgOperand(4), Rep,
2294                           CI->getArgOperand(3));
2295     }else if (IsX86 && (Name.startswith("avx512.mask.broadcastf") ||
2296                          Name.startswith("avx512.mask.broadcasti"))) {
2297       unsigned NumSrcElts =
2298                         CI->getArgOperand(0)->getType()->getVectorNumElements();
2299       unsigned NumDstElts = CI->getType()->getVectorNumElements();
2300 
2301       SmallVector<uint32_t, 8> ShuffleMask(NumDstElts);
2302       for (unsigned i = 0; i != NumDstElts; ++i)
2303         ShuffleMask[i] = i % NumSrcElts;
2304 
2305       Rep = Builder.CreateShuffleVector(CI->getArgOperand(0),
2306                                         CI->getArgOperand(0),
2307                                         ShuffleMask);
2308       Rep = EmitX86Select(Builder, CI->getArgOperand(2), Rep,
2309                           CI->getArgOperand(1));
2310     } else if (IsX86 && (Name.startswith("avx2.pbroadcast") ||
2311                          Name.startswith("avx2.vbroadcast") ||
2312                          Name.startswith("avx512.pbroadcast") ||
2313                          Name.startswith("avx512.mask.broadcast.s"))) {
2314       // Replace vp?broadcasts with a vector shuffle.
2315       Value *Op = CI->getArgOperand(0);
2316       unsigned NumElts = CI->getType()->getVectorNumElements();
2317       Type *MaskTy = VectorType::get(Type::getInt32Ty(C), NumElts);
2318       Rep = Builder.CreateShuffleVector(Op, UndefValue::get(Op->getType()),
2319                                         Constant::getNullValue(MaskTy));
2320 
2321       if (CI->getNumArgOperands() == 3)
2322         Rep = EmitX86Select(Builder, CI->getArgOperand(2), Rep,
2323                             CI->getArgOperand(1));
2324     } else if (IsX86 && (Name.startswith("sse2.padds.") ||
2325                          Name.startswith("sse2.psubs.") ||
2326                          Name.startswith("avx2.padds.") ||
2327                          Name.startswith("avx2.psubs.") ||
2328                          Name.startswith("avx512.padds.") ||
2329                          Name.startswith("avx512.psubs.") ||
2330                          Name.startswith("avx512.mask.padds.") ||
2331                          Name.startswith("avx512.mask.psubs."))) {
2332       bool IsAdd = Name.contains(".padds");
2333       Rep = UpgradeX86AddSubSatIntrinsics(Builder, *CI, true, IsAdd);
2334     } else if (IsX86 && (Name.startswith("sse2.paddus.") ||
2335                          Name.startswith("sse2.psubus.") ||
2336                          Name.startswith("avx2.paddus.") ||
2337                          Name.startswith("avx2.psubus.") ||
2338                          Name.startswith("avx512.mask.paddus.") ||
2339                          Name.startswith("avx512.mask.psubus."))) {
2340       bool IsAdd = Name.contains(".paddus");
2341       Rep = UpgradeX86AddSubSatIntrinsics(Builder, *CI, false, IsAdd);
2342     } else if (IsX86 && Name.startswith("avx512.mask.palignr.")) {
2343       Rep = UpgradeX86ALIGNIntrinsics(Builder, CI->getArgOperand(0),
2344                                       CI->getArgOperand(1),
2345                                       CI->getArgOperand(2),
2346                                       CI->getArgOperand(3),
2347                                       CI->getArgOperand(4),
2348                                       false);
2349     } else if (IsX86 && Name.startswith("avx512.mask.valign.")) {
2350       Rep = UpgradeX86ALIGNIntrinsics(Builder, CI->getArgOperand(0),
2351                                       CI->getArgOperand(1),
2352                                       CI->getArgOperand(2),
2353                                       CI->getArgOperand(3),
2354                                       CI->getArgOperand(4),
2355                                       true);
2356     } else if (IsX86 && (Name == "sse2.psll.dq" ||
2357                          Name == "avx2.psll.dq")) {
2358       // 128/256-bit shift left specified in bits.
2359       unsigned Shift = cast<ConstantInt>(CI->getArgOperand(1))->getZExtValue();
2360       Rep = UpgradeX86PSLLDQIntrinsics(Builder, CI->getArgOperand(0),
2361                                        Shift / 8); // Shift is in bits.
2362     } else if (IsX86 && (Name == "sse2.psrl.dq" ||
2363                          Name == "avx2.psrl.dq")) {
2364       // 128/256-bit shift right specified in bits.
2365       unsigned Shift = cast<ConstantInt>(CI->getArgOperand(1))->getZExtValue();
2366       Rep = UpgradeX86PSRLDQIntrinsics(Builder, CI->getArgOperand(0),
2367                                        Shift / 8); // Shift is in bits.
2368     } else if (IsX86 && (Name == "sse2.psll.dq.bs" ||
2369                          Name == "avx2.psll.dq.bs" ||
2370                          Name == "avx512.psll.dq.512")) {
2371       // 128/256/512-bit shift left specified in bytes.
2372       unsigned Shift = cast<ConstantInt>(CI->getArgOperand(1))->getZExtValue();
2373       Rep = UpgradeX86PSLLDQIntrinsics(Builder, CI->getArgOperand(0), Shift);
2374     } else if (IsX86 && (Name == "sse2.psrl.dq.bs" ||
2375                          Name == "avx2.psrl.dq.bs" ||
2376                          Name == "avx512.psrl.dq.512")) {
2377       // 128/256/512-bit shift right specified in bytes.
2378       unsigned Shift = cast<ConstantInt>(CI->getArgOperand(1))->getZExtValue();
2379       Rep = UpgradeX86PSRLDQIntrinsics(Builder, CI->getArgOperand(0), Shift);
2380     } else if (IsX86 && (Name == "sse41.pblendw" ||
2381                          Name.startswith("sse41.blendp") ||
2382                          Name.startswith("avx.blend.p") ||
2383                          Name == "avx2.pblendw" ||
2384                          Name.startswith("avx2.pblendd."))) {
2385       Value *Op0 = CI->getArgOperand(0);
2386       Value *Op1 = CI->getArgOperand(1);
2387       unsigned Imm = cast <ConstantInt>(CI->getArgOperand(2))->getZExtValue();
2388       VectorType *VecTy = cast<VectorType>(CI->getType());
2389       unsigned NumElts = VecTy->getNumElements();
2390 
2391       SmallVector<uint32_t, 16> Idxs(NumElts);
2392       for (unsigned i = 0; i != NumElts; ++i)
2393         Idxs[i] = ((Imm >> (i%8)) & 1) ? i + NumElts : i;
2394 
2395       Rep = Builder.CreateShuffleVector(Op0, Op1, Idxs);
2396     } else if (IsX86 && (Name.startswith("avx.vinsertf128.") ||
2397                          Name == "avx2.vinserti128" ||
2398                          Name.startswith("avx512.mask.insert"))) {
2399       Value *Op0 = CI->getArgOperand(0);
2400       Value *Op1 = CI->getArgOperand(1);
2401       unsigned Imm = cast<ConstantInt>(CI->getArgOperand(2))->getZExtValue();
2402       unsigned DstNumElts = CI->getType()->getVectorNumElements();
2403       unsigned SrcNumElts = Op1->getType()->getVectorNumElements();
2404       unsigned Scale = DstNumElts / SrcNumElts;
2405 
2406       // Mask off the high bits of the immediate value; hardware ignores those.
2407       Imm = Imm % Scale;
2408 
2409       // Extend the second operand into a vector the size of the destination.
2410       Value *UndefV = UndefValue::get(Op1->getType());
2411       SmallVector<uint32_t, 8> Idxs(DstNumElts);
2412       for (unsigned i = 0; i != SrcNumElts; ++i)
2413         Idxs[i] = i;
2414       for (unsigned i = SrcNumElts; i != DstNumElts; ++i)
2415         Idxs[i] = SrcNumElts;
2416       Rep = Builder.CreateShuffleVector(Op1, UndefV, Idxs);
2417 
2418       // Insert the second operand into the first operand.
2419 
2420       // Note that there is no guarantee that instruction lowering will actually
2421       // produce a vinsertf128 instruction for the created shuffles. In
2422       // particular, the 0 immediate case involves no lane changes, so it can
2423       // be handled as a blend.
2424 
2425       // Example of shuffle mask for 32-bit elements:
2426       // Imm = 1  <i32 0, i32 1, i32 2,  i32 3,  i32 8, i32 9, i32 10, i32 11>
2427       // Imm = 0  <i32 8, i32 9, i32 10, i32 11, i32 4, i32 5, i32 6,  i32 7 >
2428 
2429       // First fill with identify mask.
2430       for (unsigned i = 0; i != DstNumElts; ++i)
2431         Idxs[i] = i;
2432       // Then replace the elements where we need to insert.
2433       for (unsigned i = 0; i != SrcNumElts; ++i)
2434         Idxs[i + Imm * SrcNumElts] = i + DstNumElts;
2435       Rep = Builder.CreateShuffleVector(Op0, Rep, Idxs);
2436 
2437       // If the intrinsic has a mask operand, handle that.
2438       if (CI->getNumArgOperands() == 5)
2439         Rep = EmitX86Select(Builder, CI->getArgOperand(4), Rep,
2440                             CI->getArgOperand(3));
2441     } else if (IsX86 && (Name.startswith("avx.vextractf128.") ||
2442                          Name == "avx2.vextracti128" ||
2443                          Name.startswith("avx512.mask.vextract"))) {
2444       Value *Op0 = CI->getArgOperand(0);
2445       unsigned Imm = cast<ConstantInt>(CI->getArgOperand(1))->getZExtValue();
2446       unsigned DstNumElts = CI->getType()->getVectorNumElements();
2447       unsigned SrcNumElts = Op0->getType()->getVectorNumElements();
2448       unsigned Scale = SrcNumElts / DstNumElts;
2449 
2450       // Mask off the high bits of the immediate value; hardware ignores those.
2451       Imm = Imm % Scale;
2452 
2453       // Get indexes for the subvector of the input vector.
2454       SmallVector<uint32_t, 8> Idxs(DstNumElts);
2455       for (unsigned i = 0; i != DstNumElts; ++i) {
2456         Idxs[i] = i + (Imm * DstNumElts);
2457       }
2458       Rep = Builder.CreateShuffleVector(Op0, Op0, Idxs);
2459 
2460       // If the intrinsic has a mask operand, handle that.
2461       if (CI->getNumArgOperands() == 4)
2462         Rep = EmitX86Select(Builder, CI->getArgOperand(3), Rep,
2463                             CI->getArgOperand(2));
2464     } else if (!IsX86 && Name == "stackprotectorcheck") {
2465       Rep = nullptr;
2466     } else if (IsX86 && (Name.startswith("avx512.mask.perm.df.") ||
2467                          Name.startswith("avx512.mask.perm.di."))) {
2468       Value *Op0 = CI->getArgOperand(0);
2469       unsigned Imm = cast<ConstantInt>(CI->getArgOperand(1))->getZExtValue();
2470       VectorType *VecTy = cast<VectorType>(CI->getType());
2471       unsigned NumElts = VecTy->getNumElements();
2472 
2473       SmallVector<uint32_t, 8> Idxs(NumElts);
2474       for (unsigned i = 0; i != NumElts; ++i)
2475         Idxs[i] = (i & ~0x3) + ((Imm >> (2 * (i & 0x3))) & 3);
2476 
2477       Rep = Builder.CreateShuffleVector(Op0, Op0, Idxs);
2478 
2479       if (CI->getNumArgOperands() == 4)
2480         Rep = EmitX86Select(Builder, CI->getArgOperand(3), Rep,
2481                             CI->getArgOperand(2));
2482     } else if (IsX86 && (Name.startswith("avx.vperm2f128.") ||
2483                          Name == "avx2.vperm2i128")) {
2484       // The immediate permute control byte looks like this:
2485       //    [1:0] - select 128 bits from sources for low half of destination
2486       //    [2]   - ignore
2487       //    [3]   - zero low half of destination
2488       //    [5:4] - select 128 bits from sources for high half of destination
2489       //    [6]   - ignore
2490       //    [7]   - zero high half of destination
2491 
2492       uint8_t Imm = cast<ConstantInt>(CI->getArgOperand(2))->getZExtValue();
2493 
2494       unsigned NumElts = CI->getType()->getVectorNumElements();
2495       unsigned HalfSize = NumElts / 2;
2496       SmallVector<uint32_t, 8> ShuffleMask(NumElts);
2497 
2498       // Determine which operand(s) are actually in use for this instruction.
2499       Value *V0 = (Imm & 0x02) ? CI->getArgOperand(1) : CI->getArgOperand(0);
2500       Value *V1 = (Imm & 0x20) ? CI->getArgOperand(1) : CI->getArgOperand(0);
2501 
2502       // If needed, replace operands based on zero mask.
2503       V0 = (Imm & 0x08) ? ConstantAggregateZero::get(CI->getType()) : V0;
2504       V1 = (Imm & 0x80) ? ConstantAggregateZero::get(CI->getType()) : V1;
2505 
2506       // Permute low half of result.
2507       unsigned StartIndex = (Imm & 0x01) ? HalfSize : 0;
2508       for (unsigned i = 0; i < HalfSize; ++i)
2509         ShuffleMask[i] = StartIndex + i;
2510 
2511       // Permute high half of result.
2512       StartIndex = (Imm & 0x10) ? HalfSize : 0;
2513       for (unsigned i = 0; i < HalfSize; ++i)
2514         ShuffleMask[i + HalfSize] = NumElts + StartIndex + i;
2515 
2516       Rep = Builder.CreateShuffleVector(V0, V1, ShuffleMask);
2517 
2518     } else if (IsX86 && (Name.startswith("avx.vpermil.") ||
2519                          Name == "sse2.pshuf.d" ||
2520                          Name.startswith("avx512.mask.vpermil.p") ||
2521                          Name.startswith("avx512.mask.pshuf.d."))) {
2522       Value *Op0 = CI->getArgOperand(0);
2523       unsigned Imm = cast<ConstantInt>(CI->getArgOperand(1))->getZExtValue();
2524       VectorType *VecTy = cast<VectorType>(CI->getType());
2525       unsigned NumElts = VecTy->getNumElements();
2526       // Calculate the size of each index in the immediate.
2527       unsigned IdxSize = 64 / VecTy->getScalarSizeInBits();
2528       unsigned IdxMask = ((1 << IdxSize) - 1);
2529 
2530       SmallVector<uint32_t, 8> Idxs(NumElts);
2531       // Lookup the bits for this element, wrapping around the immediate every
2532       // 8-bits. Elements are grouped into sets of 2 or 4 elements so we need
2533       // to offset by the first index of each group.
2534       for (unsigned i = 0; i != NumElts; ++i)
2535         Idxs[i] = ((Imm >> ((i * IdxSize) % 8)) & IdxMask) | (i & ~IdxMask);
2536 
2537       Rep = Builder.CreateShuffleVector(Op0, Op0, Idxs);
2538 
2539       if (CI->getNumArgOperands() == 4)
2540         Rep = EmitX86Select(Builder, CI->getArgOperand(3), Rep,
2541                             CI->getArgOperand(2));
2542     } else if (IsX86 && (Name == "sse2.pshufl.w" ||
2543                          Name.startswith("avx512.mask.pshufl.w."))) {
2544       Value *Op0 = CI->getArgOperand(0);
2545       unsigned Imm = cast<ConstantInt>(CI->getArgOperand(1))->getZExtValue();
2546       unsigned NumElts = CI->getType()->getVectorNumElements();
2547 
2548       SmallVector<uint32_t, 16> Idxs(NumElts);
2549       for (unsigned l = 0; l != NumElts; l += 8) {
2550         for (unsigned i = 0; i != 4; ++i)
2551           Idxs[i + l] = ((Imm >> (2 * i)) & 0x3) + l;
2552         for (unsigned i = 4; i != 8; ++i)
2553           Idxs[i + l] = i + l;
2554       }
2555 
2556       Rep = Builder.CreateShuffleVector(Op0, Op0, Idxs);
2557 
2558       if (CI->getNumArgOperands() == 4)
2559         Rep = EmitX86Select(Builder, CI->getArgOperand(3), Rep,
2560                             CI->getArgOperand(2));
2561     } else if (IsX86 && (Name == "sse2.pshufh.w" ||
2562                          Name.startswith("avx512.mask.pshufh.w."))) {
2563       Value *Op0 = CI->getArgOperand(0);
2564       unsigned Imm = cast<ConstantInt>(CI->getArgOperand(1))->getZExtValue();
2565       unsigned NumElts = CI->getType()->getVectorNumElements();
2566 
2567       SmallVector<uint32_t, 16> Idxs(NumElts);
2568       for (unsigned l = 0; l != NumElts; l += 8) {
2569         for (unsigned i = 0; i != 4; ++i)
2570           Idxs[i + l] = i + l;
2571         for (unsigned i = 0; i != 4; ++i)
2572           Idxs[i + l + 4] = ((Imm >> (2 * i)) & 0x3) + 4 + l;
2573       }
2574 
2575       Rep = Builder.CreateShuffleVector(Op0, Op0, Idxs);
2576 
2577       if (CI->getNumArgOperands() == 4)
2578         Rep = EmitX86Select(Builder, CI->getArgOperand(3), Rep,
2579                             CI->getArgOperand(2));
2580     } else if (IsX86 && Name.startswith("avx512.mask.shuf.p")) {
2581       Value *Op0 = CI->getArgOperand(0);
2582       Value *Op1 = CI->getArgOperand(1);
2583       unsigned Imm = cast<ConstantInt>(CI->getArgOperand(2))->getZExtValue();
2584       unsigned NumElts = CI->getType()->getVectorNumElements();
2585 
2586       unsigned NumLaneElts = 128/CI->getType()->getScalarSizeInBits();
2587       unsigned HalfLaneElts = NumLaneElts / 2;
2588 
2589       SmallVector<uint32_t, 16> Idxs(NumElts);
2590       for (unsigned i = 0; i != NumElts; ++i) {
2591         // Base index is the starting element of the lane.
2592         Idxs[i] = i - (i % NumLaneElts);
2593         // If we are half way through the lane switch to the other source.
2594         if ((i % NumLaneElts) >= HalfLaneElts)
2595           Idxs[i] += NumElts;
2596         // Now select the specific element. By adding HalfLaneElts bits from
2597         // the immediate. Wrapping around the immediate every 8-bits.
2598         Idxs[i] += (Imm >> ((i * HalfLaneElts) % 8)) & ((1 << HalfLaneElts) - 1);
2599       }
2600 
2601       Rep = Builder.CreateShuffleVector(Op0, Op1, Idxs);
2602 
2603       Rep = EmitX86Select(Builder, CI->getArgOperand(4), Rep,
2604                           CI->getArgOperand(3));
2605     } else if (IsX86 && (Name.startswith("avx512.mask.movddup") ||
2606                          Name.startswith("avx512.mask.movshdup") ||
2607                          Name.startswith("avx512.mask.movsldup"))) {
2608       Value *Op0 = CI->getArgOperand(0);
2609       unsigned NumElts = CI->getType()->getVectorNumElements();
2610       unsigned NumLaneElts = 128/CI->getType()->getScalarSizeInBits();
2611 
2612       unsigned Offset = 0;
2613       if (Name.startswith("avx512.mask.movshdup."))
2614         Offset = 1;
2615 
2616       SmallVector<uint32_t, 16> Idxs(NumElts);
2617       for (unsigned l = 0; l != NumElts; l += NumLaneElts)
2618         for (unsigned i = 0; i != NumLaneElts; i += 2) {
2619           Idxs[i + l + 0] = i + l + Offset;
2620           Idxs[i + l + 1] = i + l + Offset;
2621         }
2622 
2623       Rep = Builder.CreateShuffleVector(Op0, Op0, Idxs);
2624 
2625       Rep = EmitX86Select(Builder, CI->getArgOperand(2), Rep,
2626                           CI->getArgOperand(1));
2627     } else if (IsX86 && (Name.startswith("avx512.mask.punpckl") ||
2628                          Name.startswith("avx512.mask.unpckl."))) {
2629       Value *Op0 = CI->getArgOperand(0);
2630       Value *Op1 = CI->getArgOperand(1);
2631       int NumElts = CI->getType()->getVectorNumElements();
2632       int NumLaneElts = 128/CI->getType()->getScalarSizeInBits();
2633 
2634       SmallVector<uint32_t, 64> Idxs(NumElts);
2635       for (int l = 0; l != NumElts; l += NumLaneElts)
2636         for (int i = 0; i != NumLaneElts; ++i)
2637           Idxs[i + l] = l + (i / 2) + NumElts * (i % 2);
2638 
2639       Rep = Builder.CreateShuffleVector(Op0, Op1, Idxs);
2640 
2641       Rep = EmitX86Select(Builder, CI->getArgOperand(3), Rep,
2642                           CI->getArgOperand(2));
2643     } else if (IsX86 && (Name.startswith("avx512.mask.punpckh") ||
2644                          Name.startswith("avx512.mask.unpckh."))) {
2645       Value *Op0 = CI->getArgOperand(0);
2646       Value *Op1 = CI->getArgOperand(1);
2647       int NumElts = CI->getType()->getVectorNumElements();
2648       int NumLaneElts = 128/CI->getType()->getScalarSizeInBits();
2649 
2650       SmallVector<uint32_t, 64> Idxs(NumElts);
2651       for (int l = 0; l != NumElts; l += NumLaneElts)
2652         for (int i = 0; i != NumLaneElts; ++i)
2653           Idxs[i + l] = (NumLaneElts / 2) + l + (i / 2) + NumElts * (i % 2);
2654 
2655       Rep = Builder.CreateShuffleVector(Op0, Op1, Idxs);
2656 
2657       Rep = EmitX86Select(Builder, CI->getArgOperand(3), Rep,
2658                           CI->getArgOperand(2));
2659     } else if (IsX86 && (Name.startswith("avx512.mask.and.") ||
2660                          Name.startswith("avx512.mask.pand."))) {
2661       VectorType *FTy = cast<VectorType>(CI->getType());
2662       VectorType *ITy = VectorType::getInteger(FTy);
2663       Rep = Builder.CreateAnd(Builder.CreateBitCast(CI->getArgOperand(0), ITy),
2664                               Builder.CreateBitCast(CI->getArgOperand(1), ITy));
2665       Rep = Builder.CreateBitCast(Rep, FTy);
2666       Rep = EmitX86Select(Builder, CI->getArgOperand(3), Rep,
2667                           CI->getArgOperand(2));
2668     } else if (IsX86 && (Name.startswith("avx512.mask.andn.") ||
2669                          Name.startswith("avx512.mask.pandn."))) {
2670       VectorType *FTy = cast<VectorType>(CI->getType());
2671       VectorType *ITy = VectorType::getInteger(FTy);
2672       Rep = Builder.CreateNot(Builder.CreateBitCast(CI->getArgOperand(0), ITy));
2673       Rep = Builder.CreateAnd(Rep,
2674                               Builder.CreateBitCast(CI->getArgOperand(1), ITy));
2675       Rep = Builder.CreateBitCast(Rep, FTy);
2676       Rep = EmitX86Select(Builder, CI->getArgOperand(3), Rep,
2677                           CI->getArgOperand(2));
2678     } else if (IsX86 && (Name.startswith("avx512.mask.or.") ||
2679                          Name.startswith("avx512.mask.por."))) {
2680       VectorType *FTy = cast<VectorType>(CI->getType());
2681       VectorType *ITy = VectorType::getInteger(FTy);
2682       Rep = Builder.CreateOr(Builder.CreateBitCast(CI->getArgOperand(0), ITy),
2683                              Builder.CreateBitCast(CI->getArgOperand(1), ITy));
2684       Rep = Builder.CreateBitCast(Rep, FTy);
2685       Rep = EmitX86Select(Builder, CI->getArgOperand(3), Rep,
2686                           CI->getArgOperand(2));
2687     } else if (IsX86 && (Name.startswith("avx512.mask.xor.") ||
2688                          Name.startswith("avx512.mask.pxor."))) {
2689       VectorType *FTy = cast<VectorType>(CI->getType());
2690       VectorType *ITy = VectorType::getInteger(FTy);
2691       Rep = Builder.CreateXor(Builder.CreateBitCast(CI->getArgOperand(0), ITy),
2692                               Builder.CreateBitCast(CI->getArgOperand(1), ITy));
2693       Rep = Builder.CreateBitCast(Rep, FTy);
2694       Rep = EmitX86Select(Builder, CI->getArgOperand(3), Rep,
2695                           CI->getArgOperand(2));
2696     } else if (IsX86 && Name.startswith("avx512.mask.padd.")) {
2697       Rep = Builder.CreateAdd(CI->getArgOperand(0), CI->getArgOperand(1));
2698       Rep = EmitX86Select(Builder, CI->getArgOperand(3), Rep,
2699                           CI->getArgOperand(2));
2700     } else if (IsX86 && Name.startswith("avx512.mask.psub.")) {
2701       Rep = Builder.CreateSub(CI->getArgOperand(0), CI->getArgOperand(1));
2702       Rep = EmitX86Select(Builder, CI->getArgOperand(3), Rep,
2703                           CI->getArgOperand(2));
2704     } else if (IsX86 && Name.startswith("avx512.mask.pmull.")) {
2705       Rep = Builder.CreateMul(CI->getArgOperand(0), CI->getArgOperand(1));
2706       Rep = EmitX86Select(Builder, CI->getArgOperand(3), Rep,
2707                           CI->getArgOperand(2));
2708     } else if (IsX86 && Name.startswith("avx512.mask.add.p")) {
2709       if (Name.endswith(".512")) {
2710         Intrinsic::ID IID;
2711         if (Name[17] == 's')
2712           IID = Intrinsic::x86_avx512_add_ps_512;
2713         else
2714           IID = Intrinsic::x86_avx512_add_pd_512;
2715 
2716         Rep = Builder.CreateCall(Intrinsic::getDeclaration(F->getParent(), IID),
2717                                  { CI->getArgOperand(0), CI->getArgOperand(1),
2718                                    CI->getArgOperand(4) });
2719       } else {
2720         Rep = Builder.CreateFAdd(CI->getArgOperand(0), CI->getArgOperand(1));
2721       }
2722       Rep = EmitX86Select(Builder, CI->getArgOperand(3), Rep,
2723                           CI->getArgOperand(2));
2724     } else if (IsX86 && Name.startswith("avx512.mask.div.p")) {
2725       if (Name.endswith(".512")) {
2726         Intrinsic::ID IID;
2727         if (Name[17] == 's')
2728           IID = Intrinsic::x86_avx512_div_ps_512;
2729         else
2730           IID = Intrinsic::x86_avx512_div_pd_512;
2731 
2732         Rep = Builder.CreateCall(Intrinsic::getDeclaration(F->getParent(), IID),
2733                                  { CI->getArgOperand(0), CI->getArgOperand(1),
2734                                    CI->getArgOperand(4) });
2735       } else {
2736         Rep = Builder.CreateFDiv(CI->getArgOperand(0), CI->getArgOperand(1));
2737       }
2738       Rep = EmitX86Select(Builder, CI->getArgOperand(3), Rep,
2739                           CI->getArgOperand(2));
2740     } else if (IsX86 && Name.startswith("avx512.mask.mul.p")) {
2741       if (Name.endswith(".512")) {
2742         Intrinsic::ID IID;
2743         if (Name[17] == 's')
2744           IID = Intrinsic::x86_avx512_mul_ps_512;
2745         else
2746           IID = Intrinsic::x86_avx512_mul_pd_512;
2747 
2748         Rep = Builder.CreateCall(Intrinsic::getDeclaration(F->getParent(), IID),
2749                                  { CI->getArgOperand(0), CI->getArgOperand(1),
2750                                    CI->getArgOperand(4) });
2751       } else {
2752         Rep = Builder.CreateFMul(CI->getArgOperand(0), CI->getArgOperand(1));
2753       }
2754       Rep = EmitX86Select(Builder, CI->getArgOperand(3), Rep,
2755                           CI->getArgOperand(2));
2756     } else if (IsX86 && Name.startswith("avx512.mask.sub.p")) {
2757       if (Name.endswith(".512")) {
2758         Intrinsic::ID IID;
2759         if (Name[17] == 's')
2760           IID = Intrinsic::x86_avx512_sub_ps_512;
2761         else
2762           IID = Intrinsic::x86_avx512_sub_pd_512;
2763 
2764         Rep = Builder.CreateCall(Intrinsic::getDeclaration(F->getParent(), IID),
2765                                  { CI->getArgOperand(0), CI->getArgOperand(1),
2766                                    CI->getArgOperand(4) });
2767       } else {
2768         Rep = Builder.CreateFSub(CI->getArgOperand(0), CI->getArgOperand(1));
2769       }
2770       Rep = EmitX86Select(Builder, CI->getArgOperand(3), Rep,
2771                           CI->getArgOperand(2));
2772     } else if (IsX86 && (Name.startswith("avx512.mask.max.p") ||
2773                          Name.startswith("avx512.mask.min.p")) &&
2774                Name.drop_front(18) == ".512") {
2775       bool IsDouble = Name[17] == 'd';
2776       bool IsMin = Name[13] == 'i';
2777       static const Intrinsic::ID MinMaxTbl[2][2] = {
2778         { Intrinsic::x86_avx512_max_ps_512, Intrinsic::x86_avx512_max_pd_512 },
2779         { Intrinsic::x86_avx512_min_ps_512, Intrinsic::x86_avx512_min_pd_512 }
2780       };
2781       Intrinsic::ID IID = MinMaxTbl[IsMin][IsDouble];
2782 
2783       Rep = Builder.CreateCall(Intrinsic::getDeclaration(F->getParent(), IID),
2784                                { CI->getArgOperand(0), CI->getArgOperand(1),
2785                                  CI->getArgOperand(4) });
2786       Rep = EmitX86Select(Builder, CI->getArgOperand(3), Rep,
2787                           CI->getArgOperand(2));
2788     } else if (IsX86 && Name.startswith("avx512.mask.lzcnt.")) {
2789       Rep = Builder.CreateCall(Intrinsic::getDeclaration(F->getParent(),
2790                                                          Intrinsic::ctlz,
2791                                                          CI->getType()),
2792                                { CI->getArgOperand(0), Builder.getInt1(false) });
2793       Rep = EmitX86Select(Builder, CI->getArgOperand(2), Rep,
2794                           CI->getArgOperand(1));
2795     } else if (IsX86 && Name.startswith("avx512.mask.psll")) {
2796       bool IsImmediate = Name[16] == 'i' ||
2797                          (Name.size() > 18 && Name[18] == 'i');
2798       bool IsVariable = Name[16] == 'v';
2799       char Size = Name[16] == '.' ? Name[17] :
2800                   Name[17] == '.' ? Name[18] :
2801                   Name[18] == '.' ? Name[19] :
2802                                     Name[20];
2803 
2804       Intrinsic::ID IID;
2805       if (IsVariable && Name[17] != '.') {
2806         if (Size == 'd' && Name[17] == '2') // avx512.mask.psllv2.di
2807           IID = Intrinsic::x86_avx2_psllv_q;
2808         else if (Size == 'd' && Name[17] == '4') // avx512.mask.psllv4.di
2809           IID = Intrinsic::x86_avx2_psllv_q_256;
2810         else if (Size == 's' && Name[17] == '4') // avx512.mask.psllv4.si
2811           IID = Intrinsic::x86_avx2_psllv_d;
2812         else if (Size == 's' && Name[17] == '8') // avx512.mask.psllv8.si
2813           IID = Intrinsic::x86_avx2_psllv_d_256;
2814         else if (Size == 'h' && Name[17] == '8') // avx512.mask.psllv8.hi
2815           IID = Intrinsic::x86_avx512_psllv_w_128;
2816         else if (Size == 'h' && Name[17] == '1') // avx512.mask.psllv16.hi
2817           IID = Intrinsic::x86_avx512_psllv_w_256;
2818         else if (Name[17] == '3' && Name[18] == '2') // avx512.mask.psllv32hi
2819           IID = Intrinsic::x86_avx512_psllv_w_512;
2820         else
2821           llvm_unreachable("Unexpected size");
2822       } else if (Name.endswith(".128")) {
2823         if (Size == 'd') // avx512.mask.psll.d.128, avx512.mask.psll.di.128
2824           IID = IsImmediate ? Intrinsic::x86_sse2_pslli_d
2825                             : Intrinsic::x86_sse2_psll_d;
2826         else if (Size == 'q') // avx512.mask.psll.q.128, avx512.mask.psll.qi.128
2827           IID = IsImmediate ? Intrinsic::x86_sse2_pslli_q
2828                             : Intrinsic::x86_sse2_psll_q;
2829         else if (Size == 'w') // avx512.mask.psll.w.128, avx512.mask.psll.wi.128
2830           IID = IsImmediate ? Intrinsic::x86_sse2_pslli_w
2831                             : Intrinsic::x86_sse2_psll_w;
2832         else
2833           llvm_unreachable("Unexpected size");
2834       } else if (Name.endswith(".256")) {
2835         if (Size == 'd') // avx512.mask.psll.d.256, avx512.mask.psll.di.256
2836           IID = IsImmediate ? Intrinsic::x86_avx2_pslli_d
2837                             : Intrinsic::x86_avx2_psll_d;
2838         else if (Size == 'q') // avx512.mask.psll.q.256, avx512.mask.psll.qi.256
2839           IID = IsImmediate ? Intrinsic::x86_avx2_pslli_q
2840                             : Intrinsic::x86_avx2_psll_q;
2841         else if (Size == 'w') // avx512.mask.psll.w.256, avx512.mask.psll.wi.256
2842           IID = IsImmediate ? Intrinsic::x86_avx2_pslli_w
2843                             : Intrinsic::x86_avx2_psll_w;
2844         else
2845           llvm_unreachable("Unexpected size");
2846       } else {
2847         if (Size == 'd') // psll.di.512, pslli.d, psll.d, psllv.d.512
2848           IID = IsImmediate ? Intrinsic::x86_avx512_pslli_d_512 :
2849                 IsVariable  ? Intrinsic::x86_avx512_psllv_d_512 :
2850                               Intrinsic::x86_avx512_psll_d_512;
2851         else if (Size == 'q') // psll.qi.512, pslli.q, psll.q, psllv.q.512
2852           IID = IsImmediate ? Intrinsic::x86_avx512_pslli_q_512 :
2853                 IsVariable  ? Intrinsic::x86_avx512_psllv_q_512 :
2854                               Intrinsic::x86_avx512_psll_q_512;
2855         else if (Size == 'w') // psll.wi.512, pslli.w, psll.w
2856           IID = IsImmediate ? Intrinsic::x86_avx512_pslli_w_512
2857                             : Intrinsic::x86_avx512_psll_w_512;
2858         else
2859           llvm_unreachable("Unexpected size");
2860       }
2861 
2862       Rep = UpgradeX86MaskedShift(Builder, *CI, IID);
2863     } else if (IsX86 && Name.startswith("avx512.mask.psrl")) {
2864       bool IsImmediate = Name[16] == 'i' ||
2865                          (Name.size() > 18 && Name[18] == 'i');
2866       bool IsVariable = Name[16] == 'v';
2867       char Size = Name[16] == '.' ? Name[17] :
2868                   Name[17] == '.' ? Name[18] :
2869                   Name[18] == '.' ? Name[19] :
2870                                     Name[20];
2871 
2872       Intrinsic::ID IID;
2873       if (IsVariable && Name[17] != '.') {
2874         if (Size == 'd' && Name[17] == '2') // avx512.mask.psrlv2.di
2875           IID = Intrinsic::x86_avx2_psrlv_q;
2876         else if (Size == 'd' && Name[17] == '4') // avx512.mask.psrlv4.di
2877           IID = Intrinsic::x86_avx2_psrlv_q_256;
2878         else if (Size == 's' && Name[17] == '4') // avx512.mask.psrlv4.si
2879           IID = Intrinsic::x86_avx2_psrlv_d;
2880         else if (Size == 's' && Name[17] == '8') // avx512.mask.psrlv8.si
2881           IID = Intrinsic::x86_avx2_psrlv_d_256;
2882         else if (Size == 'h' && Name[17] == '8') // avx512.mask.psrlv8.hi
2883           IID = Intrinsic::x86_avx512_psrlv_w_128;
2884         else if (Size == 'h' && Name[17] == '1') // avx512.mask.psrlv16.hi
2885           IID = Intrinsic::x86_avx512_psrlv_w_256;
2886         else if (Name[17] == '3' && Name[18] == '2') // avx512.mask.psrlv32hi
2887           IID = Intrinsic::x86_avx512_psrlv_w_512;
2888         else
2889           llvm_unreachable("Unexpected size");
2890       } else if (Name.endswith(".128")) {
2891         if (Size == 'd') // avx512.mask.psrl.d.128, avx512.mask.psrl.di.128
2892           IID = IsImmediate ? Intrinsic::x86_sse2_psrli_d
2893                             : Intrinsic::x86_sse2_psrl_d;
2894         else if (Size == 'q') // avx512.mask.psrl.q.128, avx512.mask.psrl.qi.128
2895           IID = IsImmediate ? Intrinsic::x86_sse2_psrli_q
2896                             : Intrinsic::x86_sse2_psrl_q;
2897         else if (Size == 'w') // avx512.mask.psrl.w.128, avx512.mask.psrl.wi.128
2898           IID = IsImmediate ? Intrinsic::x86_sse2_psrli_w
2899                             : Intrinsic::x86_sse2_psrl_w;
2900         else
2901           llvm_unreachable("Unexpected size");
2902       } else if (Name.endswith(".256")) {
2903         if (Size == 'd') // avx512.mask.psrl.d.256, avx512.mask.psrl.di.256
2904           IID = IsImmediate ? Intrinsic::x86_avx2_psrli_d
2905                             : Intrinsic::x86_avx2_psrl_d;
2906         else if (Size == 'q') // avx512.mask.psrl.q.256, avx512.mask.psrl.qi.256
2907           IID = IsImmediate ? Intrinsic::x86_avx2_psrli_q
2908                             : Intrinsic::x86_avx2_psrl_q;
2909         else if (Size == 'w') // avx512.mask.psrl.w.256, avx512.mask.psrl.wi.256
2910           IID = IsImmediate ? Intrinsic::x86_avx2_psrli_w
2911                             : Intrinsic::x86_avx2_psrl_w;
2912         else
2913           llvm_unreachable("Unexpected size");
2914       } else {
2915         if (Size == 'd') // psrl.di.512, psrli.d, psrl.d, psrl.d.512
2916           IID = IsImmediate ? Intrinsic::x86_avx512_psrli_d_512 :
2917                 IsVariable  ? Intrinsic::x86_avx512_psrlv_d_512 :
2918                               Intrinsic::x86_avx512_psrl_d_512;
2919         else if (Size == 'q') // psrl.qi.512, psrli.q, psrl.q, psrl.q.512
2920           IID = IsImmediate ? Intrinsic::x86_avx512_psrli_q_512 :
2921                 IsVariable  ? Intrinsic::x86_avx512_psrlv_q_512 :
2922                               Intrinsic::x86_avx512_psrl_q_512;
2923         else if (Size == 'w') // psrl.wi.512, psrli.w, psrl.w)
2924           IID = IsImmediate ? Intrinsic::x86_avx512_psrli_w_512
2925                             : Intrinsic::x86_avx512_psrl_w_512;
2926         else
2927           llvm_unreachable("Unexpected size");
2928       }
2929 
2930       Rep = UpgradeX86MaskedShift(Builder, *CI, IID);
2931     } else if (IsX86 && Name.startswith("avx512.mask.psra")) {
2932       bool IsImmediate = Name[16] == 'i' ||
2933                          (Name.size() > 18 && Name[18] == 'i');
2934       bool IsVariable = Name[16] == 'v';
2935       char Size = Name[16] == '.' ? Name[17] :
2936                   Name[17] == '.' ? Name[18] :
2937                   Name[18] == '.' ? Name[19] :
2938                                     Name[20];
2939 
2940       Intrinsic::ID IID;
2941       if (IsVariable && Name[17] != '.') {
2942         if (Size == 's' && Name[17] == '4') // avx512.mask.psrav4.si
2943           IID = Intrinsic::x86_avx2_psrav_d;
2944         else if (Size == 's' && Name[17] == '8') // avx512.mask.psrav8.si
2945           IID = Intrinsic::x86_avx2_psrav_d_256;
2946         else if (Size == 'h' && Name[17] == '8') // avx512.mask.psrav8.hi
2947           IID = Intrinsic::x86_avx512_psrav_w_128;
2948         else if (Size == 'h' && Name[17] == '1') // avx512.mask.psrav16.hi
2949           IID = Intrinsic::x86_avx512_psrav_w_256;
2950         else if (Name[17] == '3' && Name[18] == '2') // avx512.mask.psrav32hi
2951           IID = Intrinsic::x86_avx512_psrav_w_512;
2952         else
2953           llvm_unreachable("Unexpected size");
2954       } else if (Name.endswith(".128")) {
2955         if (Size == 'd') // avx512.mask.psra.d.128, avx512.mask.psra.di.128
2956           IID = IsImmediate ? Intrinsic::x86_sse2_psrai_d
2957                             : Intrinsic::x86_sse2_psra_d;
2958         else if (Size == 'q') // avx512.mask.psra.q.128, avx512.mask.psra.qi.128
2959           IID = IsImmediate ? Intrinsic::x86_avx512_psrai_q_128 :
2960                 IsVariable  ? Intrinsic::x86_avx512_psrav_q_128 :
2961                               Intrinsic::x86_avx512_psra_q_128;
2962         else if (Size == 'w') // avx512.mask.psra.w.128, avx512.mask.psra.wi.128
2963           IID = IsImmediate ? Intrinsic::x86_sse2_psrai_w
2964                             : Intrinsic::x86_sse2_psra_w;
2965         else
2966           llvm_unreachable("Unexpected size");
2967       } else if (Name.endswith(".256")) {
2968         if (Size == 'd') // avx512.mask.psra.d.256, avx512.mask.psra.di.256
2969           IID = IsImmediate ? Intrinsic::x86_avx2_psrai_d
2970                             : Intrinsic::x86_avx2_psra_d;
2971         else if (Size == 'q') // avx512.mask.psra.q.256, avx512.mask.psra.qi.256
2972           IID = IsImmediate ? Intrinsic::x86_avx512_psrai_q_256 :
2973                 IsVariable  ? Intrinsic::x86_avx512_psrav_q_256 :
2974                               Intrinsic::x86_avx512_psra_q_256;
2975         else if (Size == 'w') // avx512.mask.psra.w.256, avx512.mask.psra.wi.256
2976           IID = IsImmediate ? Intrinsic::x86_avx2_psrai_w
2977                             : Intrinsic::x86_avx2_psra_w;
2978         else
2979           llvm_unreachable("Unexpected size");
2980       } else {
2981         if (Size == 'd') // psra.di.512, psrai.d, psra.d, psrav.d.512
2982           IID = IsImmediate ? Intrinsic::x86_avx512_psrai_d_512 :
2983                 IsVariable  ? Intrinsic::x86_avx512_psrav_d_512 :
2984                               Intrinsic::x86_avx512_psra_d_512;
2985         else if (Size == 'q') // psra.qi.512, psrai.q, psra.q
2986           IID = IsImmediate ? Intrinsic::x86_avx512_psrai_q_512 :
2987                 IsVariable  ? Intrinsic::x86_avx512_psrav_q_512 :
2988                               Intrinsic::x86_avx512_psra_q_512;
2989         else if (Size == 'w') // psra.wi.512, psrai.w, psra.w
2990           IID = IsImmediate ? Intrinsic::x86_avx512_psrai_w_512
2991                             : Intrinsic::x86_avx512_psra_w_512;
2992         else
2993           llvm_unreachable("Unexpected size");
2994       }
2995 
2996       Rep = UpgradeX86MaskedShift(Builder, *CI, IID);
2997     } else if (IsX86 && Name.startswith("avx512.mask.move.s")) {
2998       Rep = upgradeMaskedMove(Builder, *CI);
2999     } else if (IsX86 && Name.startswith("avx512.cvtmask2")) {
3000       Rep = UpgradeMaskToInt(Builder, *CI);
3001     } else if (IsX86 && Name.endswith(".movntdqa")) {
3002       Module *M = F->getParent();
3003       MDNode *Node = MDNode::get(
3004           C, ConstantAsMetadata::get(ConstantInt::get(Type::getInt32Ty(C), 1)));
3005 
3006       Value *Ptr = CI->getArgOperand(0);
3007       VectorType *VTy = cast<VectorType>(CI->getType());
3008 
3009       // Convert the type of the pointer to a pointer to the stored type.
3010       Value *BC =
3011           Builder.CreateBitCast(Ptr, PointerType::getUnqual(VTy), "cast");
3012       LoadInst *LI = Builder.CreateAlignedLoad(VTy, BC, VTy->getBitWidth() / 8);
3013       LI->setMetadata(M->getMDKindID("nontemporal"), Node);
3014       Rep = LI;
3015     } else if (IsX86 && (Name.startswith("fma.vfmadd.") ||
3016                          Name.startswith("fma.vfmsub.") ||
3017                          Name.startswith("fma.vfnmadd.") ||
3018                          Name.startswith("fma.vfnmsub."))) {
3019       bool NegMul = Name[6] == 'n';
3020       bool NegAcc = NegMul ? Name[8] == 's' : Name[7] == 's';
3021       bool IsScalar = NegMul ? Name[12] == 's' : Name[11] == 's';
3022 
3023       Value *Ops[] = { CI->getArgOperand(0), CI->getArgOperand(1),
3024                        CI->getArgOperand(2) };
3025 
3026       if (IsScalar) {
3027         Ops[0] = Builder.CreateExtractElement(Ops[0], (uint64_t)0);
3028         Ops[1] = Builder.CreateExtractElement(Ops[1], (uint64_t)0);
3029         Ops[2] = Builder.CreateExtractElement(Ops[2], (uint64_t)0);
3030       }
3031 
3032       if (NegMul && !IsScalar)
3033         Ops[0] = Builder.CreateFNeg(Ops[0]);
3034       if (NegMul && IsScalar)
3035         Ops[1] = Builder.CreateFNeg(Ops[1]);
3036       if (NegAcc)
3037         Ops[2] = Builder.CreateFNeg(Ops[2]);
3038 
3039       Rep = Builder.CreateCall(Intrinsic::getDeclaration(CI->getModule(),
3040                                                          Intrinsic::fma,
3041                                                          Ops[0]->getType()),
3042                                Ops);
3043 
3044       if (IsScalar)
3045         Rep = Builder.CreateInsertElement(CI->getArgOperand(0), Rep,
3046                                           (uint64_t)0);
3047     } else if (IsX86 && Name.startswith("fma4.vfmadd.s")) {
3048       Value *Ops[] = { CI->getArgOperand(0), CI->getArgOperand(1),
3049                        CI->getArgOperand(2) };
3050 
3051       Ops[0] = Builder.CreateExtractElement(Ops[0], (uint64_t)0);
3052       Ops[1] = Builder.CreateExtractElement(Ops[1], (uint64_t)0);
3053       Ops[2] = Builder.CreateExtractElement(Ops[2], (uint64_t)0);
3054 
3055       Rep = Builder.CreateCall(Intrinsic::getDeclaration(CI->getModule(),
3056                                                          Intrinsic::fma,
3057                                                          Ops[0]->getType()),
3058                                Ops);
3059 
3060       Rep = Builder.CreateInsertElement(Constant::getNullValue(CI->getType()),
3061                                         Rep, (uint64_t)0);
3062     } else if (IsX86 && (Name.startswith("avx512.mask.vfmadd.s") ||
3063                          Name.startswith("avx512.maskz.vfmadd.s") ||
3064                          Name.startswith("avx512.mask3.vfmadd.s") ||
3065                          Name.startswith("avx512.mask3.vfmsub.s") ||
3066                          Name.startswith("avx512.mask3.vfnmsub.s"))) {
3067       bool IsMask3 = Name[11] == '3';
3068       bool IsMaskZ = Name[11] == 'z';
3069       // Drop the "avx512.mask." to make it easier.
3070       Name = Name.drop_front(IsMask3 || IsMaskZ ? 13 : 12);
3071       bool NegMul = Name[2] == 'n';
3072       bool NegAcc = NegMul ? Name[4] == 's' : Name[3] == 's';
3073 
3074       Value *A = CI->getArgOperand(0);
3075       Value *B = CI->getArgOperand(1);
3076       Value *C = CI->getArgOperand(2);
3077 
3078       if (NegMul && (IsMask3 || IsMaskZ))
3079         A = Builder.CreateFNeg(A);
3080       if (NegMul && !(IsMask3 || IsMaskZ))
3081         B = Builder.CreateFNeg(B);
3082       if (NegAcc)
3083         C = Builder.CreateFNeg(C);
3084 
3085       A = Builder.CreateExtractElement(A, (uint64_t)0);
3086       B = Builder.CreateExtractElement(B, (uint64_t)0);
3087       C = Builder.CreateExtractElement(C, (uint64_t)0);
3088 
3089       if (!isa<ConstantInt>(CI->getArgOperand(4)) ||
3090           cast<ConstantInt>(CI->getArgOperand(4))->getZExtValue() != 4) {
3091         Value *Ops[] = { A, B, C, CI->getArgOperand(4) };
3092 
3093         Intrinsic::ID IID;
3094         if (Name.back() == 'd')
3095           IID = Intrinsic::x86_avx512_vfmadd_f64;
3096         else
3097           IID = Intrinsic::x86_avx512_vfmadd_f32;
3098         Function *FMA = Intrinsic::getDeclaration(CI->getModule(), IID);
3099         Rep = Builder.CreateCall(FMA, Ops);
3100       } else {
3101         Function *FMA = Intrinsic::getDeclaration(CI->getModule(),
3102                                                   Intrinsic::fma,
3103                                                   A->getType());
3104         Rep = Builder.CreateCall(FMA, { A, B, C });
3105       }
3106 
3107       Value *PassThru = IsMaskZ ? Constant::getNullValue(Rep->getType()) :
3108                         IsMask3 ? C : A;
3109 
3110       // For Mask3 with NegAcc, we need to create a new extractelement that
3111       // avoids the negation above.
3112       if (NegAcc && IsMask3)
3113         PassThru = Builder.CreateExtractElement(CI->getArgOperand(2),
3114                                                 (uint64_t)0);
3115 
3116       Rep = EmitX86ScalarSelect(Builder, CI->getArgOperand(3),
3117                                 Rep, PassThru);
3118       Rep = Builder.CreateInsertElement(CI->getArgOperand(IsMask3 ? 2 : 0),
3119                                         Rep, (uint64_t)0);
3120     } else if (IsX86 && (Name.startswith("avx512.mask.vfmadd.p") ||
3121                          Name.startswith("avx512.mask.vfnmadd.p") ||
3122                          Name.startswith("avx512.mask.vfnmsub.p") ||
3123                          Name.startswith("avx512.mask3.vfmadd.p") ||
3124                          Name.startswith("avx512.mask3.vfmsub.p") ||
3125                          Name.startswith("avx512.mask3.vfnmsub.p") ||
3126                          Name.startswith("avx512.maskz.vfmadd.p"))) {
3127       bool IsMask3 = Name[11] == '3';
3128       bool IsMaskZ = Name[11] == 'z';
3129       // Drop the "avx512.mask." to make it easier.
3130       Name = Name.drop_front(IsMask3 || IsMaskZ ? 13 : 12);
3131       bool NegMul = Name[2] == 'n';
3132       bool NegAcc = NegMul ? Name[4] == 's' : Name[3] == 's';
3133 
3134       Value *A = CI->getArgOperand(0);
3135       Value *B = CI->getArgOperand(1);
3136       Value *C = CI->getArgOperand(2);
3137 
3138       if (NegMul && (IsMask3 || IsMaskZ))
3139         A = Builder.CreateFNeg(A);
3140       if (NegMul && !(IsMask3 || IsMaskZ))
3141         B = Builder.CreateFNeg(B);
3142       if (NegAcc)
3143         C = Builder.CreateFNeg(C);
3144 
3145       if (CI->getNumArgOperands() == 5 &&
3146           (!isa<ConstantInt>(CI->getArgOperand(4)) ||
3147            cast<ConstantInt>(CI->getArgOperand(4))->getZExtValue() != 4)) {
3148         Intrinsic::ID IID;
3149         // Check the character before ".512" in string.
3150         if (Name[Name.size()-5] == 's')
3151           IID = Intrinsic::x86_avx512_vfmadd_ps_512;
3152         else
3153           IID = Intrinsic::x86_avx512_vfmadd_pd_512;
3154 
3155         Rep = Builder.CreateCall(Intrinsic::getDeclaration(F->getParent(), IID),
3156                                  { A, B, C, CI->getArgOperand(4) });
3157       } else {
3158         Function *FMA = Intrinsic::getDeclaration(CI->getModule(),
3159                                                   Intrinsic::fma,
3160                                                   A->getType());
3161         Rep = Builder.CreateCall(FMA, { A, B, C });
3162       }
3163 
3164       Value *PassThru = IsMaskZ ? llvm::Constant::getNullValue(CI->getType()) :
3165                         IsMask3 ? CI->getArgOperand(2) :
3166                                   CI->getArgOperand(0);
3167 
3168       Rep = EmitX86Select(Builder, CI->getArgOperand(3), Rep, PassThru);
3169     } else if (IsX86 && (Name.startswith("fma.vfmaddsub.p") ||
3170                          Name.startswith("fma.vfmsubadd.p"))) {
3171       bool IsSubAdd = Name[7] == 's';
3172       int NumElts = CI->getType()->getVectorNumElements();
3173 
3174       Value *Ops[] = { CI->getArgOperand(0), CI->getArgOperand(1),
3175                        CI->getArgOperand(2) };
3176 
3177       Function *FMA = Intrinsic::getDeclaration(CI->getModule(), Intrinsic::fma,
3178                                                 Ops[0]->getType());
3179       Value *Odd = Builder.CreateCall(FMA, Ops);
3180       Ops[2] = Builder.CreateFNeg(Ops[2]);
3181       Value *Even = Builder.CreateCall(FMA, Ops);
3182 
3183       if (IsSubAdd)
3184         std::swap(Even, Odd);
3185 
3186       SmallVector<uint32_t, 32> Idxs(NumElts);
3187       for (int i = 0; i != NumElts; ++i)
3188         Idxs[i] = i + (i % 2) * NumElts;
3189 
3190       Rep = Builder.CreateShuffleVector(Even, Odd, Idxs);
3191     } else if (IsX86 && (Name.startswith("avx512.mask.vfmaddsub.p") ||
3192                          Name.startswith("avx512.mask3.vfmaddsub.p") ||
3193                          Name.startswith("avx512.maskz.vfmaddsub.p") ||
3194                          Name.startswith("avx512.mask3.vfmsubadd.p"))) {
3195       bool IsMask3 = Name[11] == '3';
3196       bool IsMaskZ = Name[11] == 'z';
3197       // Drop the "avx512.mask." to make it easier.
3198       Name = Name.drop_front(IsMask3 || IsMaskZ ? 13 : 12);
3199       bool IsSubAdd = Name[3] == 's';
3200       if (CI->getNumArgOperands() == 5 &&
3201           (!isa<ConstantInt>(CI->getArgOperand(4)) ||
3202            cast<ConstantInt>(CI->getArgOperand(4))->getZExtValue() != 4)) {
3203         Intrinsic::ID IID;
3204         // Check the character before ".512" in string.
3205         if (Name[Name.size()-5] == 's')
3206           IID = Intrinsic::x86_avx512_vfmaddsub_ps_512;
3207         else
3208           IID = Intrinsic::x86_avx512_vfmaddsub_pd_512;
3209 
3210         Value *Ops[] = { CI->getArgOperand(0), CI->getArgOperand(1),
3211                          CI->getArgOperand(2), CI->getArgOperand(4) };
3212         if (IsSubAdd)
3213           Ops[2] = Builder.CreateFNeg(Ops[2]);
3214 
3215         Rep = Builder.CreateCall(Intrinsic::getDeclaration(F->getParent(), IID),
3216                                  {CI->getArgOperand(0), CI->getArgOperand(1),
3217                                   CI->getArgOperand(2), CI->getArgOperand(4)});
3218       } else {
3219         int NumElts = CI->getType()->getVectorNumElements();
3220 
3221         Value *Ops[] = { CI->getArgOperand(0), CI->getArgOperand(1),
3222                          CI->getArgOperand(2) };
3223 
3224         Function *FMA = Intrinsic::getDeclaration(CI->getModule(), Intrinsic::fma,
3225                                                   Ops[0]->getType());
3226         Value *Odd = Builder.CreateCall(FMA, Ops);
3227         Ops[2] = Builder.CreateFNeg(Ops[2]);
3228         Value *Even = Builder.CreateCall(FMA, Ops);
3229 
3230         if (IsSubAdd)
3231           std::swap(Even, Odd);
3232 
3233         SmallVector<uint32_t, 32> Idxs(NumElts);
3234         for (int i = 0; i != NumElts; ++i)
3235           Idxs[i] = i + (i % 2) * NumElts;
3236 
3237         Rep = Builder.CreateShuffleVector(Even, Odd, Idxs);
3238       }
3239 
3240       Value *PassThru = IsMaskZ ? llvm::Constant::getNullValue(CI->getType()) :
3241                         IsMask3 ? CI->getArgOperand(2) :
3242                                   CI->getArgOperand(0);
3243 
3244       Rep = EmitX86Select(Builder, CI->getArgOperand(3), Rep, PassThru);
3245     } else if (IsX86 && (Name.startswith("avx512.mask.pternlog.") ||
3246                          Name.startswith("avx512.maskz.pternlog."))) {
3247       bool ZeroMask = Name[11] == 'z';
3248       unsigned VecWidth = CI->getType()->getPrimitiveSizeInBits();
3249       unsigned EltWidth = CI->getType()->getScalarSizeInBits();
3250       Intrinsic::ID IID;
3251       if (VecWidth == 128 && EltWidth == 32)
3252         IID = Intrinsic::x86_avx512_pternlog_d_128;
3253       else if (VecWidth == 256 && EltWidth == 32)
3254         IID = Intrinsic::x86_avx512_pternlog_d_256;
3255       else if (VecWidth == 512 && EltWidth == 32)
3256         IID = Intrinsic::x86_avx512_pternlog_d_512;
3257       else if (VecWidth == 128 && EltWidth == 64)
3258         IID = Intrinsic::x86_avx512_pternlog_q_128;
3259       else if (VecWidth == 256 && EltWidth == 64)
3260         IID = Intrinsic::x86_avx512_pternlog_q_256;
3261       else if (VecWidth == 512 && EltWidth == 64)
3262         IID = Intrinsic::x86_avx512_pternlog_q_512;
3263       else
3264         llvm_unreachable("Unexpected intrinsic");
3265 
3266       Value *Args[] = { CI->getArgOperand(0) , CI->getArgOperand(1),
3267                         CI->getArgOperand(2), CI->getArgOperand(3) };
3268       Rep = Builder.CreateCall(Intrinsic::getDeclaration(CI->getModule(), IID),
3269                                Args);
3270       Value *PassThru = ZeroMask ? ConstantAggregateZero::get(CI->getType())
3271                                  : CI->getArgOperand(0);
3272       Rep = EmitX86Select(Builder, CI->getArgOperand(4), Rep, PassThru);
3273     } else if (IsX86 && (Name.startswith("avx512.mask.vpmadd52") ||
3274                          Name.startswith("avx512.maskz.vpmadd52"))) {
3275       bool ZeroMask = Name[11] == 'z';
3276       bool High = Name[20] == 'h' || Name[21] == 'h';
3277       unsigned VecWidth = CI->getType()->getPrimitiveSizeInBits();
3278       Intrinsic::ID IID;
3279       if (VecWidth == 128 && !High)
3280         IID = Intrinsic::x86_avx512_vpmadd52l_uq_128;
3281       else if (VecWidth == 256 && !High)
3282         IID = Intrinsic::x86_avx512_vpmadd52l_uq_256;
3283       else if (VecWidth == 512 && !High)
3284         IID = Intrinsic::x86_avx512_vpmadd52l_uq_512;
3285       else if (VecWidth == 128 && High)
3286         IID = Intrinsic::x86_avx512_vpmadd52h_uq_128;
3287       else if (VecWidth == 256 && High)
3288         IID = Intrinsic::x86_avx512_vpmadd52h_uq_256;
3289       else if (VecWidth == 512 && High)
3290         IID = Intrinsic::x86_avx512_vpmadd52h_uq_512;
3291       else
3292         llvm_unreachable("Unexpected intrinsic");
3293 
3294       Value *Args[] = { CI->getArgOperand(0) , CI->getArgOperand(1),
3295                         CI->getArgOperand(2) };
3296       Rep = Builder.CreateCall(Intrinsic::getDeclaration(CI->getModule(), IID),
3297                                Args);
3298       Value *PassThru = ZeroMask ? ConstantAggregateZero::get(CI->getType())
3299                                  : CI->getArgOperand(0);
3300       Rep = EmitX86Select(Builder, CI->getArgOperand(3), Rep, PassThru);
3301     } else if (IsX86 && (Name.startswith("avx512.mask.vpermi2var.") ||
3302                          Name.startswith("avx512.mask.vpermt2var.") ||
3303                          Name.startswith("avx512.maskz.vpermt2var."))) {
3304       bool ZeroMask = Name[11] == 'z';
3305       bool IndexForm = Name[17] == 'i';
3306       Rep = UpgradeX86VPERMT2Intrinsics(Builder, *CI, ZeroMask, IndexForm);
3307     } else if (IsX86 && (Name.startswith("avx512.mask.vpdpbusd.") ||
3308                          Name.startswith("avx512.maskz.vpdpbusd.") ||
3309                          Name.startswith("avx512.mask.vpdpbusds.") ||
3310                          Name.startswith("avx512.maskz.vpdpbusds."))) {
3311       bool ZeroMask = Name[11] == 'z';
3312       bool IsSaturating = Name[ZeroMask ? 21 : 20] == 's';
3313       unsigned VecWidth = CI->getType()->getPrimitiveSizeInBits();
3314       Intrinsic::ID IID;
3315       if (VecWidth == 128 && !IsSaturating)
3316         IID = Intrinsic::x86_avx512_vpdpbusd_128;
3317       else if (VecWidth == 256 && !IsSaturating)
3318         IID = Intrinsic::x86_avx512_vpdpbusd_256;
3319       else if (VecWidth == 512 && !IsSaturating)
3320         IID = Intrinsic::x86_avx512_vpdpbusd_512;
3321       else if (VecWidth == 128 && IsSaturating)
3322         IID = Intrinsic::x86_avx512_vpdpbusds_128;
3323       else if (VecWidth == 256 && IsSaturating)
3324         IID = Intrinsic::x86_avx512_vpdpbusds_256;
3325       else if (VecWidth == 512 && IsSaturating)
3326         IID = Intrinsic::x86_avx512_vpdpbusds_512;
3327       else
3328         llvm_unreachable("Unexpected intrinsic");
3329 
3330       Value *Args[] = { CI->getArgOperand(0), CI->getArgOperand(1),
3331                         CI->getArgOperand(2)  };
3332       Rep = Builder.CreateCall(Intrinsic::getDeclaration(CI->getModule(), IID),
3333                                Args);
3334       Value *PassThru = ZeroMask ? ConstantAggregateZero::get(CI->getType())
3335                                  : CI->getArgOperand(0);
3336       Rep = EmitX86Select(Builder, CI->getArgOperand(3), Rep, PassThru);
3337     } else if (IsX86 && (Name.startswith("avx512.mask.vpdpwssd.") ||
3338                          Name.startswith("avx512.maskz.vpdpwssd.") ||
3339                          Name.startswith("avx512.mask.vpdpwssds.") ||
3340                          Name.startswith("avx512.maskz.vpdpwssds."))) {
3341       bool ZeroMask = Name[11] == 'z';
3342       bool IsSaturating = Name[ZeroMask ? 21 : 20] == 's';
3343       unsigned VecWidth = CI->getType()->getPrimitiveSizeInBits();
3344       Intrinsic::ID IID;
3345       if (VecWidth == 128 && !IsSaturating)
3346         IID = Intrinsic::x86_avx512_vpdpwssd_128;
3347       else if (VecWidth == 256 && !IsSaturating)
3348         IID = Intrinsic::x86_avx512_vpdpwssd_256;
3349       else if (VecWidth == 512 && !IsSaturating)
3350         IID = Intrinsic::x86_avx512_vpdpwssd_512;
3351       else if (VecWidth == 128 && IsSaturating)
3352         IID = Intrinsic::x86_avx512_vpdpwssds_128;
3353       else if (VecWidth == 256 && IsSaturating)
3354         IID = Intrinsic::x86_avx512_vpdpwssds_256;
3355       else if (VecWidth == 512 && IsSaturating)
3356         IID = Intrinsic::x86_avx512_vpdpwssds_512;
3357       else
3358         llvm_unreachable("Unexpected intrinsic");
3359 
3360       Value *Args[] = { CI->getArgOperand(0), CI->getArgOperand(1),
3361                         CI->getArgOperand(2)  };
3362       Rep = Builder.CreateCall(Intrinsic::getDeclaration(CI->getModule(), IID),
3363                                Args);
3364       Value *PassThru = ZeroMask ? ConstantAggregateZero::get(CI->getType())
3365                                  : CI->getArgOperand(0);
3366       Rep = EmitX86Select(Builder, CI->getArgOperand(3), Rep, PassThru);
3367     } else if (IsX86 && (Name == "addcarryx.u32" || Name == "addcarryx.u64" ||
3368                          Name == "addcarry.u32" || Name == "addcarry.u64" ||
3369                          Name == "subborrow.u32" || Name == "subborrow.u64")) {
3370       Intrinsic::ID IID;
3371       if (Name[0] == 'a' && Name.back() == '2')
3372         IID = Intrinsic::x86_addcarry_32;
3373       else if (Name[0] == 'a' && Name.back() == '4')
3374         IID = Intrinsic::x86_addcarry_64;
3375       else if (Name[0] == 's' && Name.back() == '2')
3376         IID = Intrinsic::x86_subborrow_32;
3377       else if (Name[0] == 's' && Name.back() == '4')
3378         IID = Intrinsic::x86_subborrow_64;
3379       else
3380         llvm_unreachable("Unexpected intrinsic");
3381 
3382       // Make a call with 3 operands.
3383       Value *Args[] = { CI->getArgOperand(0), CI->getArgOperand(1),
3384                         CI->getArgOperand(2)};
3385       Value *NewCall = Builder.CreateCall(
3386                                 Intrinsic::getDeclaration(CI->getModule(), IID),
3387                                 Args);
3388 
3389       // Extract the second result and store it.
3390       Value *Data = Builder.CreateExtractValue(NewCall, 1);
3391       // Cast the pointer to the right type.
3392       Value *Ptr = Builder.CreateBitCast(CI->getArgOperand(3),
3393                                  llvm::PointerType::getUnqual(Data->getType()));
3394       Builder.CreateAlignedStore(Data, Ptr, 1);
3395       // Replace the original call result with the first result of the new call.
3396       Value *CF = Builder.CreateExtractValue(NewCall, 0);
3397 
3398       CI->replaceAllUsesWith(CF);
3399       Rep = nullptr;
3400     } else if (IsX86 && Name.startswith("avx512.mask.") &&
3401                upgradeAVX512MaskToSelect(Name, Builder, *CI, Rep)) {
3402       // Rep will be updated by the call in the condition.
3403     } else if (IsNVVM && (Name == "abs.i" || Name == "abs.ll")) {
3404       Value *Arg = CI->getArgOperand(0);
3405       Value *Neg = Builder.CreateNeg(Arg, "neg");
3406       Value *Cmp = Builder.CreateICmpSGE(
3407           Arg, llvm::Constant::getNullValue(Arg->getType()), "abs.cond");
3408       Rep = Builder.CreateSelect(Cmp, Arg, Neg, "abs");
3409     } else if (IsNVVM && (Name == "max.i" || Name == "max.ll" ||
3410                           Name == "max.ui" || Name == "max.ull")) {
3411       Value *Arg0 = CI->getArgOperand(0);
3412       Value *Arg1 = CI->getArgOperand(1);
3413       Value *Cmp = Name.endswith(".ui") || Name.endswith(".ull")
3414                        ? Builder.CreateICmpUGE(Arg0, Arg1, "max.cond")
3415                        : Builder.CreateICmpSGE(Arg0, Arg1, "max.cond");
3416       Rep = Builder.CreateSelect(Cmp, Arg0, Arg1, "max");
3417     } else if (IsNVVM && (Name == "min.i" || Name == "min.ll" ||
3418                           Name == "min.ui" || Name == "min.ull")) {
3419       Value *Arg0 = CI->getArgOperand(0);
3420       Value *Arg1 = CI->getArgOperand(1);
3421       Value *Cmp = Name.endswith(".ui") || Name.endswith(".ull")
3422                        ? Builder.CreateICmpULE(Arg0, Arg1, "min.cond")
3423                        : Builder.CreateICmpSLE(Arg0, Arg1, "min.cond");
3424       Rep = Builder.CreateSelect(Cmp, Arg0, Arg1, "min");
3425     } else if (IsNVVM && Name == "clz.ll") {
3426       // llvm.nvvm.clz.ll returns an i32, but llvm.ctlz.i64 and returns an i64.
3427       Value *Arg = CI->getArgOperand(0);
3428       Value *Ctlz = Builder.CreateCall(
3429           Intrinsic::getDeclaration(F->getParent(), Intrinsic::ctlz,
3430                                     {Arg->getType()}),
3431           {Arg, Builder.getFalse()}, "ctlz");
3432       Rep = Builder.CreateTrunc(Ctlz, Builder.getInt32Ty(), "ctlz.trunc");
3433     } else if (IsNVVM && Name == "popc.ll") {
3434       // llvm.nvvm.popc.ll returns an i32, but llvm.ctpop.i64 and returns an
3435       // i64.
3436       Value *Arg = CI->getArgOperand(0);
3437       Value *Popc = Builder.CreateCall(
3438           Intrinsic::getDeclaration(F->getParent(), Intrinsic::ctpop,
3439                                     {Arg->getType()}),
3440           Arg, "ctpop");
3441       Rep = Builder.CreateTrunc(Popc, Builder.getInt32Ty(), "ctpop.trunc");
3442     } else if (IsNVVM && Name == "h2f") {
3443       Rep = Builder.CreateCall(Intrinsic::getDeclaration(
3444                                    F->getParent(), Intrinsic::convert_from_fp16,
3445                                    {Builder.getFloatTy()}),
3446                                CI->getArgOperand(0), "h2f");
3447     } else {
3448       llvm_unreachable("Unknown function for CallInst upgrade.");
3449     }
3450 
3451     if (Rep)
3452       CI->replaceAllUsesWith(Rep);
3453     CI->eraseFromParent();
3454     return;
3455   }
3456 
3457   const auto &DefaultCase = [&NewFn, &CI]() -> void {
3458     // Handle generic mangling change, but nothing else
3459     assert(
3460         (CI->getCalledFunction()->getName() != NewFn->getName()) &&
3461         "Unknown function for CallInst upgrade and isn't just a name change");
3462     CI->setCalledFunction(NewFn);
3463   };
3464   CallInst *NewCall = nullptr;
3465   switch (NewFn->getIntrinsicID()) {
3466   default: {
3467     DefaultCase();
3468     return;
3469   }
3470 
3471   case Intrinsic::arm_neon_vld1:
3472   case Intrinsic::arm_neon_vld2:
3473   case Intrinsic::arm_neon_vld3:
3474   case Intrinsic::arm_neon_vld4:
3475   case Intrinsic::arm_neon_vld2lane:
3476   case Intrinsic::arm_neon_vld3lane:
3477   case Intrinsic::arm_neon_vld4lane:
3478   case Intrinsic::arm_neon_vst1:
3479   case Intrinsic::arm_neon_vst2:
3480   case Intrinsic::arm_neon_vst3:
3481   case Intrinsic::arm_neon_vst4:
3482   case Intrinsic::arm_neon_vst2lane:
3483   case Intrinsic::arm_neon_vst3lane:
3484   case Intrinsic::arm_neon_vst4lane: {
3485     SmallVector<Value *, 4> Args(CI->arg_operands().begin(),
3486                                  CI->arg_operands().end());
3487     NewCall = Builder.CreateCall(NewFn, Args);
3488     break;
3489   }
3490 
3491   case Intrinsic::bitreverse:
3492     NewCall = Builder.CreateCall(NewFn, {CI->getArgOperand(0)});
3493     break;
3494 
3495   case Intrinsic::ctlz:
3496   case Intrinsic::cttz:
3497     assert(CI->getNumArgOperands() == 1 &&
3498            "Mismatch between function args and call args");
3499     NewCall =
3500         Builder.CreateCall(NewFn, {CI->getArgOperand(0), Builder.getFalse()});
3501     break;
3502 
3503   case Intrinsic::objectsize: {
3504     Value *NullIsUnknownSize = CI->getNumArgOperands() == 2
3505                                    ? Builder.getFalse()
3506                                    : CI->getArgOperand(2);
3507     Value *Dynamic =
3508         CI->getNumArgOperands() < 4 ? Builder.getFalse() : CI->getArgOperand(3);
3509     NewCall = Builder.CreateCall(
3510         NewFn, {CI->getArgOperand(0), CI->getArgOperand(1), NullIsUnknownSize, Dynamic});
3511     break;
3512   }
3513 
3514   case Intrinsic::ctpop:
3515     NewCall = Builder.CreateCall(NewFn, {CI->getArgOperand(0)});
3516     break;
3517 
3518   case Intrinsic::convert_from_fp16:
3519     NewCall = Builder.CreateCall(NewFn, {CI->getArgOperand(0)});
3520     break;
3521 
3522   case Intrinsic::dbg_value:
3523     // Upgrade from the old version that had an extra offset argument.
3524     assert(CI->getNumArgOperands() == 4);
3525     // Drop nonzero offsets instead of attempting to upgrade them.
3526     if (auto *Offset = dyn_cast_or_null<Constant>(CI->getArgOperand(1)))
3527       if (Offset->isZeroValue()) {
3528         NewCall = Builder.CreateCall(
3529             NewFn,
3530             {CI->getArgOperand(0), CI->getArgOperand(2), CI->getArgOperand(3)});
3531         break;
3532       }
3533     CI->eraseFromParent();
3534     return;
3535 
3536   case Intrinsic::x86_xop_vfrcz_ss:
3537   case Intrinsic::x86_xop_vfrcz_sd:
3538     NewCall = Builder.CreateCall(NewFn, {CI->getArgOperand(1)});
3539     break;
3540 
3541   case Intrinsic::x86_xop_vpermil2pd:
3542   case Intrinsic::x86_xop_vpermil2ps:
3543   case Intrinsic::x86_xop_vpermil2pd_256:
3544   case Intrinsic::x86_xop_vpermil2ps_256: {
3545     SmallVector<Value *, 4> Args(CI->arg_operands().begin(),
3546                                  CI->arg_operands().end());
3547     VectorType *FltIdxTy = cast<VectorType>(Args[2]->getType());
3548     VectorType *IntIdxTy = VectorType::getInteger(FltIdxTy);
3549     Args[2] = Builder.CreateBitCast(Args[2], IntIdxTy);
3550     NewCall = Builder.CreateCall(NewFn, Args);
3551     break;
3552   }
3553 
3554   case Intrinsic::x86_sse41_ptestc:
3555   case Intrinsic::x86_sse41_ptestz:
3556   case Intrinsic::x86_sse41_ptestnzc: {
3557     // The arguments for these intrinsics used to be v4f32, and changed
3558     // to v2i64. This is purely a nop, since those are bitwise intrinsics.
3559     // So, the only thing required is a bitcast for both arguments.
3560     // First, check the arguments have the old type.
3561     Value *Arg0 = CI->getArgOperand(0);
3562     if (Arg0->getType() != VectorType::get(Type::getFloatTy(C), 4))
3563       return;
3564 
3565     // Old intrinsic, add bitcasts
3566     Value *Arg1 = CI->getArgOperand(1);
3567 
3568     Type *NewVecTy = VectorType::get(Type::getInt64Ty(C), 2);
3569 
3570     Value *BC0 = Builder.CreateBitCast(Arg0, NewVecTy, "cast");
3571     Value *BC1 = Builder.CreateBitCast(Arg1, NewVecTy, "cast");
3572 
3573     NewCall = Builder.CreateCall(NewFn, {BC0, BC1});
3574     break;
3575   }
3576 
3577   case Intrinsic::x86_rdtscp: {
3578     // This used to take 1 arguments. If we have no arguments, it is already
3579     // upgraded.
3580     if (CI->getNumOperands() == 0)
3581       return;
3582 
3583     NewCall = Builder.CreateCall(NewFn);
3584     // Extract the second result and store it.
3585     Value *Data = Builder.CreateExtractValue(NewCall, 1);
3586     // Cast the pointer to the right type.
3587     Value *Ptr = Builder.CreateBitCast(CI->getArgOperand(0),
3588                                  llvm::PointerType::getUnqual(Data->getType()));
3589     Builder.CreateAlignedStore(Data, Ptr, 1);
3590     // Replace the original call result with the first result of the new call.
3591     Value *TSC = Builder.CreateExtractValue(NewCall, 0);
3592 
3593     std::string Name = CI->getName();
3594     if (!Name.empty()) {
3595       CI->setName(Name + ".old");
3596       NewCall->setName(Name);
3597     }
3598     CI->replaceAllUsesWith(TSC);
3599     CI->eraseFromParent();
3600     return;
3601   }
3602 
3603   case Intrinsic::x86_sse41_insertps:
3604   case Intrinsic::x86_sse41_dppd:
3605   case Intrinsic::x86_sse41_dpps:
3606   case Intrinsic::x86_sse41_mpsadbw:
3607   case Intrinsic::x86_avx_dp_ps_256:
3608   case Intrinsic::x86_avx2_mpsadbw: {
3609     // Need to truncate the last argument from i32 to i8 -- this argument models
3610     // an inherently 8-bit immediate operand to these x86 instructions.
3611     SmallVector<Value *, 4> Args(CI->arg_operands().begin(),
3612                                  CI->arg_operands().end());
3613 
3614     // Replace the last argument with a trunc.
3615     Args.back() = Builder.CreateTrunc(Args.back(), Type::getInt8Ty(C), "trunc");
3616     NewCall = Builder.CreateCall(NewFn, Args);
3617     break;
3618   }
3619 
3620   case Intrinsic::thread_pointer: {
3621     NewCall = Builder.CreateCall(NewFn, {});
3622     break;
3623   }
3624 
3625   case Intrinsic::invariant_start:
3626   case Intrinsic::invariant_end:
3627   case Intrinsic::masked_load:
3628   case Intrinsic::masked_store:
3629   case Intrinsic::masked_gather:
3630   case Intrinsic::masked_scatter: {
3631     SmallVector<Value *, 4> Args(CI->arg_operands().begin(),
3632                                  CI->arg_operands().end());
3633     NewCall = Builder.CreateCall(NewFn, Args);
3634     break;
3635   }
3636 
3637   case Intrinsic::memcpy:
3638   case Intrinsic::memmove:
3639   case Intrinsic::memset: {
3640     // We have to make sure that the call signature is what we're expecting.
3641     // We only want to change the old signatures by removing the alignment arg:
3642     //  @llvm.mem[cpy|move]...(i8*, i8*, i[32|i64], i32, i1)
3643     //    -> @llvm.mem[cpy|move]...(i8*, i8*, i[32|i64], i1)
3644     //  @llvm.memset...(i8*, i8, i[32|64], i32, i1)
3645     //    -> @llvm.memset...(i8*, i8, i[32|64], i1)
3646     // Note: i8*'s in the above can be any pointer type
3647     if (CI->getNumArgOperands() != 5) {
3648       DefaultCase();
3649       return;
3650     }
3651     // Remove alignment argument (3), and add alignment attributes to the
3652     // dest/src pointers.
3653     Value *Args[4] = {CI->getArgOperand(0), CI->getArgOperand(1),
3654                       CI->getArgOperand(2), CI->getArgOperand(4)};
3655     NewCall = Builder.CreateCall(NewFn, Args);
3656     auto *MemCI = cast<MemIntrinsic>(NewCall);
3657     // All mem intrinsics support dest alignment.
3658     const ConstantInt *Align = cast<ConstantInt>(CI->getArgOperand(3));
3659     MemCI->setDestAlignment(Align->getZExtValue());
3660     // Memcpy/Memmove also support source alignment.
3661     if (auto *MTI = dyn_cast<MemTransferInst>(MemCI))
3662       MTI->setSourceAlignment(Align->getZExtValue());
3663     break;
3664   }
3665   }
3666   assert(NewCall && "Should have either set this variable or returned through "
3667                     "the default case");
3668   std::string Name = CI->getName();
3669   if (!Name.empty()) {
3670     CI->setName(Name + ".old");
3671     NewCall->setName(Name);
3672   }
3673   CI->replaceAllUsesWith(NewCall);
3674   CI->eraseFromParent();
3675 }
3676 
3677 void llvm::UpgradeCallsToIntrinsic(Function *F) {
3678   assert(F && "Illegal attempt to upgrade a non-existent intrinsic.");
3679 
3680   // Check if this function should be upgraded and get the replacement function
3681   // if there is one.
3682   Function *NewFn;
3683   if (UpgradeIntrinsicFunction(F, NewFn)) {
3684     // Replace all users of the old function with the new function or new
3685     // instructions. This is not a range loop because the call is deleted.
3686     for (auto UI = F->user_begin(), UE = F->user_end(); UI != UE; )
3687       if (CallInst *CI = dyn_cast<CallInst>(*UI++))
3688         UpgradeIntrinsicCall(CI, NewFn);
3689 
3690     // Remove old function, no longer used, from the module.
3691     F->eraseFromParent();
3692   }
3693 }
3694 
3695 MDNode *llvm::UpgradeTBAANode(MDNode &MD) {
3696   // Check if the tag uses struct-path aware TBAA format.
3697   if (isa<MDNode>(MD.getOperand(0)) && MD.getNumOperands() >= 3)
3698     return &MD;
3699 
3700   auto &Context = MD.getContext();
3701   if (MD.getNumOperands() == 3) {
3702     Metadata *Elts[] = {MD.getOperand(0), MD.getOperand(1)};
3703     MDNode *ScalarType = MDNode::get(Context, Elts);
3704     // Create a MDNode <ScalarType, ScalarType, offset 0, const>
3705     Metadata *Elts2[] = {ScalarType, ScalarType,
3706                          ConstantAsMetadata::get(
3707                              Constant::getNullValue(Type::getInt64Ty(Context))),
3708                          MD.getOperand(2)};
3709     return MDNode::get(Context, Elts2);
3710   }
3711   // Create a MDNode <MD, MD, offset 0>
3712   Metadata *Elts[] = {&MD, &MD, ConstantAsMetadata::get(Constant::getNullValue(
3713                                     Type::getInt64Ty(Context)))};
3714   return MDNode::get(Context, Elts);
3715 }
3716 
3717 Instruction *llvm::UpgradeBitCastInst(unsigned Opc, Value *V, Type *DestTy,
3718                                       Instruction *&Temp) {
3719   if (Opc != Instruction::BitCast)
3720     return nullptr;
3721 
3722   Temp = nullptr;
3723   Type *SrcTy = V->getType();
3724   if (SrcTy->isPtrOrPtrVectorTy() && DestTy->isPtrOrPtrVectorTy() &&
3725       SrcTy->getPointerAddressSpace() != DestTy->getPointerAddressSpace()) {
3726     LLVMContext &Context = V->getContext();
3727 
3728     // We have no information about target data layout, so we assume that
3729     // the maximum pointer size is 64bit.
3730     Type *MidTy = Type::getInt64Ty(Context);
3731     Temp = CastInst::Create(Instruction::PtrToInt, V, MidTy);
3732 
3733     return CastInst::Create(Instruction::IntToPtr, Temp, DestTy);
3734   }
3735 
3736   return nullptr;
3737 }
3738 
3739 Value *llvm::UpgradeBitCastExpr(unsigned Opc, Constant *C, Type *DestTy) {
3740   if (Opc != Instruction::BitCast)
3741     return nullptr;
3742 
3743   Type *SrcTy = C->getType();
3744   if (SrcTy->isPtrOrPtrVectorTy() && DestTy->isPtrOrPtrVectorTy() &&
3745       SrcTy->getPointerAddressSpace() != DestTy->getPointerAddressSpace()) {
3746     LLVMContext &Context = C->getContext();
3747 
3748     // We have no information about target data layout, so we assume that
3749     // the maximum pointer size is 64bit.
3750     Type *MidTy = Type::getInt64Ty(Context);
3751 
3752     return ConstantExpr::getIntToPtr(ConstantExpr::getPtrToInt(C, MidTy),
3753                                      DestTy);
3754   }
3755 
3756   return nullptr;
3757 }
3758 
3759 /// Check the debug info version number, if it is out-dated, drop the debug
3760 /// info. Return true if module is modified.
3761 bool llvm::UpgradeDebugInfo(Module &M) {
3762   unsigned Version = getDebugMetadataVersionFromModule(M);
3763   if (Version == DEBUG_METADATA_VERSION) {
3764     bool BrokenDebugInfo = false;
3765     if (verifyModule(M, &llvm::errs(), &BrokenDebugInfo))
3766       report_fatal_error("Broken module found, compilation aborted!");
3767     if (!BrokenDebugInfo)
3768       // Everything is ok.
3769       return false;
3770     else {
3771       // Diagnose malformed debug info.
3772       DiagnosticInfoIgnoringInvalidDebugMetadata Diag(M);
3773       M.getContext().diagnose(Diag);
3774     }
3775   }
3776   bool Modified = StripDebugInfo(M);
3777   if (Modified && Version != DEBUG_METADATA_VERSION) {
3778     // Diagnose a version mismatch.
3779     DiagnosticInfoDebugMetadataVersion DiagVersion(M, Version);
3780     M.getContext().diagnose(DiagVersion);
3781   }
3782   return Modified;
3783 }
3784 
3785 bool llvm::UpgradeRetainReleaseMarker(Module &M) {
3786   bool Changed = false;
3787   const char *MarkerKey = "clang.arc.retainAutoreleasedReturnValueMarker";
3788   NamedMDNode *ModRetainReleaseMarker = M.getNamedMetadata(MarkerKey);
3789   if (ModRetainReleaseMarker) {
3790     MDNode *Op = ModRetainReleaseMarker->getOperand(0);
3791     if (Op) {
3792       MDString *ID = dyn_cast_or_null<MDString>(Op->getOperand(0));
3793       if (ID) {
3794         SmallVector<StringRef, 4> ValueComp;
3795         ID->getString().split(ValueComp, "#");
3796         if (ValueComp.size() == 2) {
3797           std::string NewValue = ValueComp[0].str() + ";" + ValueComp[1].str();
3798           ID = MDString::get(M.getContext(), NewValue);
3799         }
3800         M.addModuleFlag(Module::Error, MarkerKey, ID);
3801         M.eraseNamedMetadata(ModRetainReleaseMarker);
3802         Changed = true;
3803       }
3804     }
3805   }
3806   return Changed;
3807 }
3808 
3809 bool llvm::UpgradeModuleFlags(Module &M) {
3810   NamedMDNode *ModFlags = M.getModuleFlagsMetadata();
3811   if (!ModFlags)
3812     return false;
3813 
3814   bool HasObjCFlag = false, HasClassProperties = false, Changed = false;
3815   for (unsigned I = 0, E = ModFlags->getNumOperands(); I != E; ++I) {
3816     MDNode *Op = ModFlags->getOperand(I);
3817     if (Op->getNumOperands() != 3)
3818       continue;
3819     MDString *ID = dyn_cast_or_null<MDString>(Op->getOperand(1));
3820     if (!ID)
3821       continue;
3822     if (ID->getString() == "Objective-C Image Info Version")
3823       HasObjCFlag = true;
3824     if (ID->getString() == "Objective-C Class Properties")
3825       HasClassProperties = true;
3826     // Upgrade PIC/PIE Module Flags. The module flag behavior for these two
3827     // field was Error and now they are Max.
3828     if (ID->getString() == "PIC Level" || ID->getString() == "PIE Level") {
3829       if (auto *Behavior =
3830               mdconst::dyn_extract_or_null<ConstantInt>(Op->getOperand(0))) {
3831         if (Behavior->getLimitedValue() == Module::Error) {
3832           Type *Int32Ty = Type::getInt32Ty(M.getContext());
3833           Metadata *Ops[3] = {
3834               ConstantAsMetadata::get(ConstantInt::get(Int32Ty, Module::Max)),
3835               MDString::get(M.getContext(), ID->getString()),
3836               Op->getOperand(2)};
3837           ModFlags->setOperand(I, MDNode::get(M.getContext(), Ops));
3838           Changed = true;
3839         }
3840       }
3841     }
3842     // Upgrade Objective-C Image Info Section. Removed the whitespce in the
3843     // section name so that llvm-lto will not complain about mismatching
3844     // module flags that is functionally the same.
3845     if (ID->getString() == "Objective-C Image Info Section") {
3846       if (auto *Value = dyn_cast_or_null<MDString>(Op->getOperand(2))) {
3847         SmallVector<StringRef, 4> ValueComp;
3848         Value->getString().split(ValueComp, " ");
3849         if (ValueComp.size() != 1) {
3850           std::string NewValue;
3851           for (auto &S : ValueComp)
3852             NewValue += S.str();
3853           Metadata *Ops[3] = {Op->getOperand(0), Op->getOperand(1),
3854                               MDString::get(M.getContext(), NewValue)};
3855           ModFlags->setOperand(I, MDNode::get(M.getContext(), Ops));
3856           Changed = true;
3857         }
3858       }
3859     }
3860   }
3861 
3862   // "Objective-C Class Properties" is recently added for Objective-C. We
3863   // upgrade ObjC bitcodes to contain a "Objective-C Class Properties" module
3864   // flag of value 0, so we can correclty downgrade this flag when trying to
3865   // link an ObjC bitcode without this module flag with an ObjC bitcode with
3866   // this module flag.
3867   if (HasObjCFlag && !HasClassProperties) {
3868     M.addModuleFlag(llvm::Module::Override, "Objective-C Class Properties",
3869                     (uint32_t)0);
3870     Changed = true;
3871   }
3872 
3873   return Changed;
3874 }
3875 
3876 void llvm::UpgradeSectionAttributes(Module &M) {
3877   auto TrimSpaces = [](StringRef Section) -> std::string {
3878     SmallVector<StringRef, 5> Components;
3879     Section.split(Components, ',');
3880 
3881     SmallString<32> Buffer;
3882     raw_svector_ostream OS(Buffer);
3883 
3884     for (auto Component : Components)
3885       OS << ',' << Component.trim();
3886 
3887     return OS.str().substr(1);
3888   };
3889 
3890   for (auto &GV : M.globals()) {
3891     if (!GV.hasSection())
3892       continue;
3893 
3894     StringRef Section = GV.getSection();
3895 
3896     if (!Section.startswith("__DATA, __objc_catlist"))
3897       continue;
3898 
3899     // __DATA, __objc_catlist, regular, no_dead_strip
3900     // __DATA,__objc_catlist,regular,no_dead_strip
3901     GV.setSection(TrimSpaces(Section));
3902   }
3903 }
3904 
3905 static bool isOldLoopArgument(Metadata *MD) {
3906   auto *T = dyn_cast_or_null<MDTuple>(MD);
3907   if (!T)
3908     return false;
3909   if (T->getNumOperands() < 1)
3910     return false;
3911   auto *S = dyn_cast_or_null<MDString>(T->getOperand(0));
3912   if (!S)
3913     return false;
3914   return S->getString().startswith("llvm.vectorizer.");
3915 }
3916 
3917 static MDString *upgradeLoopTag(LLVMContext &C, StringRef OldTag) {
3918   StringRef OldPrefix = "llvm.vectorizer.";
3919   assert(OldTag.startswith(OldPrefix) && "Expected old prefix");
3920 
3921   if (OldTag == "llvm.vectorizer.unroll")
3922     return MDString::get(C, "llvm.loop.interleave.count");
3923 
3924   return MDString::get(
3925       C, (Twine("llvm.loop.vectorize.") + OldTag.drop_front(OldPrefix.size()))
3926              .str());
3927 }
3928 
3929 static Metadata *upgradeLoopArgument(Metadata *MD) {
3930   auto *T = dyn_cast_or_null<MDTuple>(MD);
3931   if (!T)
3932     return MD;
3933   if (T->getNumOperands() < 1)
3934     return MD;
3935   auto *OldTag = dyn_cast_or_null<MDString>(T->getOperand(0));
3936   if (!OldTag)
3937     return MD;
3938   if (!OldTag->getString().startswith("llvm.vectorizer."))
3939     return MD;
3940 
3941   // This has an old tag.  Upgrade it.
3942   SmallVector<Metadata *, 8> Ops;
3943   Ops.reserve(T->getNumOperands());
3944   Ops.push_back(upgradeLoopTag(T->getContext(), OldTag->getString()));
3945   for (unsigned I = 1, E = T->getNumOperands(); I != E; ++I)
3946     Ops.push_back(T->getOperand(I));
3947 
3948   return MDTuple::get(T->getContext(), Ops);
3949 }
3950 
3951 MDNode *llvm::upgradeInstructionLoopAttachment(MDNode &N) {
3952   auto *T = dyn_cast<MDTuple>(&N);
3953   if (!T)
3954     return &N;
3955 
3956   if (none_of(T->operands(), isOldLoopArgument))
3957     return &N;
3958 
3959   SmallVector<Metadata *, 8> Ops;
3960   Ops.reserve(T->getNumOperands());
3961   for (Metadata *MD : T->operands())
3962     Ops.push_back(upgradeLoopArgument(MD));
3963 
3964   return MDTuple::get(T->getContext(), Ops);
3965 }
3966