1 //===---- CGBuiltin.cpp - Emit LLVM Code for builtins ---------------------===//
2 //
3 //                     The LLVM Compiler Infrastructure
4 //
5 // This file is distributed under the University of Illinois Open Source
6 // License. See LICENSE.TXT for details.
7 //
8 //===----------------------------------------------------------------------===//
9 //
10 // This contains code to emit Builtin calls as LLVM code.
11 //
12 //===----------------------------------------------------------------------===//
13 
14 #include "CodeGenFunction.h"
15 #include "CGCXXABI.h"
16 #include "CGObjCRuntime.h"
17 #include "CodeGenModule.h"
18 #include "TargetInfo.h"
19 #include "clang/AST/ASTContext.h"
20 #include "clang/AST/Decl.h"
21 #include "clang/Basic/TargetBuiltins.h"
22 #include "clang/Basic/TargetInfo.h"
23 #include "clang/CodeGen/CGFunctionInfo.h"
24 #include "llvm/ADT/StringExtras.h"
25 #include "llvm/IR/CallSite.h"
26 #include "llvm/IR/DataLayout.h"
27 #include "llvm/IR/InlineAsm.h"
28 #include "llvm/IR/Intrinsics.h"
29 #include <sstream>
30 
31 using namespace clang;
32 using namespace CodeGen;
33 using namespace llvm;
34 
35 /// getBuiltinLibFunction - Given a builtin id for a function like
36 /// "__builtin_fabsf", return a Function* for "fabsf".
37 llvm::Value *CodeGenModule::getBuiltinLibFunction(const FunctionDecl *FD,
38                                                   unsigned BuiltinID) {
39   assert(Context.BuiltinInfo.isLibFunction(BuiltinID));
40 
41   // Get the name, skip over the __builtin_ prefix (if necessary).
42   StringRef Name;
43   GlobalDecl D(FD);
44 
45   // If the builtin has been declared explicitly with an assembler label,
46   // use the mangled name. This differs from the plain label on platforms
47   // that prefix labels.
48   if (FD->hasAttr<AsmLabelAttr>())
49     Name = getMangledName(D);
50   else
51     Name = Context.BuiltinInfo.getName(BuiltinID) + 10;
52 
53   llvm::FunctionType *Ty =
54     cast<llvm::FunctionType>(getTypes().ConvertType(FD->getType()));
55 
56   return GetOrCreateLLVMFunction(Name, Ty, D, /*ForVTable=*/false);
57 }
58 
59 /// Emit the conversions required to turn the given value into an
60 /// integer of the given size.
61 static Value *EmitToInt(CodeGenFunction &CGF, llvm::Value *V,
62                         QualType T, llvm::IntegerType *IntType) {
63   V = CGF.EmitToMemory(V, T);
64 
65   if (V->getType()->isPointerTy())
66     return CGF.Builder.CreatePtrToInt(V, IntType);
67 
68   assert(V->getType() == IntType);
69   return V;
70 }
71 
72 static Value *EmitFromInt(CodeGenFunction &CGF, llvm::Value *V,
73                           QualType T, llvm::Type *ResultType) {
74   V = CGF.EmitFromMemory(V, T);
75 
76   if (ResultType->isPointerTy())
77     return CGF.Builder.CreateIntToPtr(V, ResultType);
78 
79   assert(V->getType() == ResultType);
80   return V;
81 }
82 
83 /// Utility to insert an atomic instruction based on Instrinsic::ID
84 /// and the expression node.
85 static Value *MakeBinaryAtomicValue(CodeGenFunction &CGF,
86                                     llvm::AtomicRMWInst::BinOp Kind,
87                                     const CallExpr *E) {
88   QualType T = E->getType();
89   assert(E->getArg(0)->getType()->isPointerType());
90   assert(CGF.getContext().hasSameUnqualifiedType(T,
91                                   E->getArg(0)->getType()->getPointeeType()));
92   assert(CGF.getContext().hasSameUnqualifiedType(T, E->getArg(1)->getType()));
93 
94   llvm::Value *DestPtr = CGF.EmitScalarExpr(E->getArg(0));
95   unsigned AddrSpace = DestPtr->getType()->getPointerAddressSpace();
96 
97   llvm::IntegerType *IntType =
98     llvm::IntegerType::get(CGF.getLLVMContext(),
99                            CGF.getContext().getTypeSize(T));
100   llvm::Type *IntPtrType = IntType->getPointerTo(AddrSpace);
101 
102   llvm::Value *Args[2];
103   Args[0] = CGF.Builder.CreateBitCast(DestPtr, IntPtrType);
104   Args[1] = CGF.EmitScalarExpr(E->getArg(1));
105   llvm::Type *ValueType = Args[1]->getType();
106   Args[1] = EmitToInt(CGF, Args[1], T, IntType);
107 
108   llvm::Value *Result =
109       CGF.Builder.CreateAtomicRMW(Kind, Args[0], Args[1],
110                                   llvm::SequentiallyConsistent);
111   return EmitFromInt(CGF, Result, T, ValueType);
112 }
113 
114 static Value *EmitNontemporalStore(CodeGenFunction &CGF, const CallExpr *E) {
115   Value *Val = CGF.EmitScalarExpr(E->getArg(0));
116   Value *Address = CGF.EmitScalarExpr(E->getArg(1));
117 
118   // Convert the type of the pointer to a pointer to the stored type.
119   Val = CGF.EmitToMemory(Val, E->getArg(0)->getType());
120   Value *BC = CGF.Builder.CreateBitCast(
121       Address, llvm::PointerType::getUnqual(Val->getType()), "cast");
122   LValue LV = CGF.MakeNaturalAlignAddrLValue(BC, E->getArg(0)->getType());
123   LV.setNontemporal(true);
124   CGF.EmitStoreOfScalar(Val, LV, false);
125   return nullptr;
126 }
127 
128 static Value *EmitNontemporalLoad(CodeGenFunction &CGF, const CallExpr *E) {
129   Value *Address = CGF.EmitScalarExpr(E->getArg(0));
130 
131   LValue LV = CGF.MakeNaturalAlignAddrLValue(Address, E->getType());
132   LV.setNontemporal(true);
133   return CGF.EmitLoadOfScalar(LV, E->getExprLoc());
134 }
135 
136 static RValue EmitBinaryAtomic(CodeGenFunction &CGF,
137                                llvm::AtomicRMWInst::BinOp Kind,
138                                const CallExpr *E) {
139   return RValue::get(MakeBinaryAtomicValue(CGF, Kind, E));
140 }
141 
142 /// Utility to insert an atomic instruction based Instrinsic::ID and
143 /// the expression node, where the return value is the result of the
144 /// operation.
145 static RValue EmitBinaryAtomicPost(CodeGenFunction &CGF,
146                                    llvm::AtomicRMWInst::BinOp Kind,
147                                    const CallExpr *E,
148                                    Instruction::BinaryOps Op,
149                                    bool Invert = false) {
150   QualType T = E->getType();
151   assert(E->getArg(0)->getType()->isPointerType());
152   assert(CGF.getContext().hasSameUnqualifiedType(T,
153                                   E->getArg(0)->getType()->getPointeeType()));
154   assert(CGF.getContext().hasSameUnqualifiedType(T, E->getArg(1)->getType()));
155 
156   llvm::Value *DestPtr = CGF.EmitScalarExpr(E->getArg(0));
157   unsigned AddrSpace = DestPtr->getType()->getPointerAddressSpace();
158 
159   llvm::IntegerType *IntType =
160     llvm::IntegerType::get(CGF.getLLVMContext(),
161                            CGF.getContext().getTypeSize(T));
162   llvm::Type *IntPtrType = IntType->getPointerTo(AddrSpace);
163 
164   llvm::Value *Args[2];
165   Args[1] = CGF.EmitScalarExpr(E->getArg(1));
166   llvm::Type *ValueType = Args[1]->getType();
167   Args[1] = EmitToInt(CGF, Args[1], T, IntType);
168   Args[0] = CGF.Builder.CreateBitCast(DestPtr, IntPtrType);
169 
170   llvm::Value *Result =
171       CGF.Builder.CreateAtomicRMW(Kind, Args[0], Args[1],
172                                   llvm::SequentiallyConsistent);
173   Result = CGF.Builder.CreateBinOp(Op, Result, Args[1]);
174   if (Invert)
175     Result = CGF.Builder.CreateBinOp(llvm::Instruction::Xor, Result,
176                                      llvm::ConstantInt::get(IntType, -1));
177   Result = EmitFromInt(CGF, Result, T, ValueType);
178   return RValue::get(Result);
179 }
180 
181 /// @brief Utility to insert an atomic cmpxchg instruction.
182 ///
183 /// @param CGF The current codegen function.
184 /// @param E   Builtin call expression to convert to cmpxchg.
185 ///            arg0 - address to operate on
186 ///            arg1 - value to compare with
187 ///            arg2 - new value
188 /// @param ReturnBool Specifies whether to return success flag of
189 ///                   cmpxchg result or the old value.
190 ///
191 /// @returns result of cmpxchg, according to ReturnBool
192 static Value *MakeAtomicCmpXchgValue(CodeGenFunction &CGF, const CallExpr *E,
193                                      bool ReturnBool) {
194   QualType T = ReturnBool ? E->getArg(1)->getType() : E->getType();
195   llvm::Value *DestPtr = CGF.EmitScalarExpr(E->getArg(0));
196   unsigned AddrSpace = DestPtr->getType()->getPointerAddressSpace();
197 
198   llvm::IntegerType *IntType = llvm::IntegerType::get(
199       CGF.getLLVMContext(), CGF.getContext().getTypeSize(T));
200   llvm::Type *IntPtrType = IntType->getPointerTo(AddrSpace);
201 
202   Value *Args[3];
203   Args[0] = CGF.Builder.CreateBitCast(DestPtr, IntPtrType);
204   Args[1] = CGF.EmitScalarExpr(E->getArg(1));
205   llvm::Type *ValueType = Args[1]->getType();
206   Args[1] = EmitToInt(CGF, Args[1], T, IntType);
207   Args[2] = EmitToInt(CGF, CGF.EmitScalarExpr(E->getArg(2)), T, IntType);
208 
209   Value *Pair = CGF.Builder.CreateAtomicCmpXchg(Args[0], Args[1], Args[2],
210                                                 llvm::SequentiallyConsistent,
211                                                 llvm::SequentiallyConsistent);
212   if (ReturnBool)
213     // Extract boolean success flag and zext it to int.
214     return CGF.Builder.CreateZExt(CGF.Builder.CreateExtractValue(Pair, 1),
215                                   CGF.ConvertType(E->getType()));
216   else
217     // Extract old value and emit it using the same type as compare value.
218     return EmitFromInt(CGF, CGF.Builder.CreateExtractValue(Pair, 0), T,
219                        ValueType);
220 }
221 
222 /// EmitFAbs - Emit a call to @llvm.fabs().
223 static Value *EmitFAbs(CodeGenFunction &CGF, Value *V) {
224   Value *F = CGF.CGM.getIntrinsic(Intrinsic::fabs, V->getType());
225   llvm::CallInst *Call = CGF.Builder.CreateCall(F, V);
226   Call->setDoesNotAccessMemory();
227   return Call;
228 }
229 
230 /// Emit the computation of the sign bit for a floating point value. Returns
231 /// the i1 sign bit value.
232 static Value *EmitSignBit(CodeGenFunction &CGF, Value *V) {
233   LLVMContext &C = CGF.CGM.getLLVMContext();
234 
235   llvm::Type *Ty = V->getType();
236   int Width = Ty->getPrimitiveSizeInBits();
237   llvm::Type *IntTy = llvm::IntegerType::get(C, Width);
238   V = CGF.Builder.CreateBitCast(V, IntTy);
239   if (Ty->isPPC_FP128Ty()) {
240     // The higher-order double comes first, and so we need to truncate the
241     // pair to extract the overall sign. The order of the pair is the same
242     // in both little- and big-Endian modes.
243     Width >>= 1;
244     IntTy = llvm::IntegerType::get(C, Width);
245     V = CGF.Builder.CreateTrunc(V, IntTy);
246   }
247   Value *Zero = llvm::Constant::getNullValue(IntTy);
248   return CGF.Builder.CreateICmpSLT(V, Zero);
249 }
250 
251 static RValue emitLibraryCall(CodeGenFunction &CGF, const FunctionDecl *Fn,
252                               const CallExpr *E, llvm::Value *calleeValue) {
253   return CGF.EmitCall(E->getCallee()->getType(), calleeValue, E,
254                       ReturnValueSlot(), Fn);
255 }
256 
257 /// \brief Emit a call to llvm.{sadd,uadd,ssub,usub,smul,umul}.with.overflow.*
258 /// depending on IntrinsicID.
259 ///
260 /// \arg CGF The current codegen function.
261 /// \arg IntrinsicID The ID for the Intrinsic we wish to generate.
262 /// \arg X The first argument to the llvm.*.with.overflow.*.
263 /// \arg Y The second argument to the llvm.*.with.overflow.*.
264 /// \arg Carry The carry returned by the llvm.*.with.overflow.*.
265 /// \returns The result (i.e. sum/product) returned by the intrinsic.
266 static llvm::Value *EmitOverflowIntrinsic(CodeGenFunction &CGF,
267                                           const llvm::Intrinsic::ID IntrinsicID,
268                                           llvm::Value *X, llvm::Value *Y,
269                                           llvm::Value *&Carry) {
270   // Make sure we have integers of the same width.
271   assert(X->getType() == Y->getType() &&
272          "Arguments must be the same type. (Did you forget to make sure both "
273          "arguments have the same integer width?)");
274 
275   llvm::Value *Callee = CGF.CGM.getIntrinsic(IntrinsicID, X->getType());
276   llvm::Value *Tmp = CGF.Builder.CreateCall(Callee, {X, Y});
277   Carry = CGF.Builder.CreateExtractValue(Tmp, 1);
278   return CGF.Builder.CreateExtractValue(Tmp, 0);
279 }
280 
281 Value *CodeGenFunction::EmitVAStartEnd(Value *ArgValue, bool IsStart) {
282   llvm::Type *DestType = Int8PtrTy;
283   if (ArgValue->getType() != DestType)
284     ArgValue =
285         Builder.CreateBitCast(ArgValue, DestType, ArgValue->getName().data());
286 
287   Intrinsic::ID inst = IsStart ? Intrinsic::vastart : Intrinsic::vaend;
288   return Builder.CreateCall(CGM.getIntrinsic(inst), ArgValue);
289 }
290 
291 RValue CodeGenFunction::EmitBuiltinExpr(const FunctionDecl *FD,
292                                         unsigned BuiltinID, const CallExpr *E,
293                                         ReturnValueSlot ReturnValue) {
294   // See if we can constant fold this builtin.  If so, don't emit it at all.
295   Expr::EvalResult Result;
296   if (E->EvaluateAsRValue(Result, CGM.getContext()) &&
297       !Result.hasSideEffects()) {
298     if (Result.Val.isInt())
299       return RValue::get(llvm::ConstantInt::get(getLLVMContext(),
300                                                 Result.Val.getInt()));
301     if (Result.Val.isFloat())
302       return RValue::get(llvm::ConstantFP::get(getLLVMContext(),
303                                                Result.Val.getFloat()));
304   }
305 
306   switch (BuiltinID) {
307   default: break;  // Handle intrinsics and libm functions below.
308   case Builtin::BI__builtin___CFStringMakeConstantString:
309   case Builtin::BI__builtin___NSStringMakeConstantString:
310     return RValue::get(CGM.EmitConstantExpr(E, E->getType(), nullptr));
311   case Builtin::BI__builtin_stdarg_start:
312   case Builtin::BI__builtin_va_start:
313   case Builtin::BI__va_start:
314   case Builtin::BI__builtin_va_end:
315     return RValue::get(
316         EmitVAStartEnd(BuiltinID == Builtin::BI__va_start
317                            ? EmitScalarExpr(E->getArg(0))
318                            : EmitVAListRef(E->getArg(0)).getPointer(),
319                        BuiltinID != Builtin::BI__builtin_va_end));
320   case Builtin::BI__builtin_va_copy: {
321     Value *DstPtr = EmitVAListRef(E->getArg(0)).getPointer();
322     Value *SrcPtr = EmitVAListRef(E->getArg(1)).getPointer();
323 
324     llvm::Type *Type = Int8PtrTy;
325 
326     DstPtr = Builder.CreateBitCast(DstPtr, Type);
327     SrcPtr = Builder.CreateBitCast(SrcPtr, Type);
328     return RValue::get(Builder.CreateCall(CGM.getIntrinsic(Intrinsic::vacopy),
329                                           {DstPtr, SrcPtr}));
330   }
331   case Builtin::BI__builtin_abs:
332   case Builtin::BI__builtin_labs:
333   case Builtin::BI__builtin_llabs: {
334     Value *ArgValue = EmitScalarExpr(E->getArg(0));
335 
336     Value *NegOp = Builder.CreateNeg(ArgValue, "neg");
337     Value *CmpResult =
338     Builder.CreateICmpSGE(ArgValue,
339                           llvm::Constant::getNullValue(ArgValue->getType()),
340                                                             "abscond");
341     Value *Result =
342       Builder.CreateSelect(CmpResult, ArgValue, NegOp, "abs");
343 
344     return RValue::get(Result);
345   }
346   case Builtin::BI__builtin_fabs:
347   case Builtin::BI__builtin_fabsf:
348   case Builtin::BI__builtin_fabsl: {
349     Value *Arg1 = EmitScalarExpr(E->getArg(0));
350     Value *Result = EmitFAbs(*this, Arg1);
351     return RValue::get(Result);
352   }
353   case Builtin::BI__builtin_fmod:
354   case Builtin::BI__builtin_fmodf:
355   case Builtin::BI__builtin_fmodl: {
356     Value *Arg1 = EmitScalarExpr(E->getArg(0));
357     Value *Arg2 = EmitScalarExpr(E->getArg(1));
358     Value *Result = Builder.CreateFRem(Arg1, Arg2, "fmod");
359     return RValue::get(Result);
360   }
361 
362   case Builtin::BI__builtin_conj:
363   case Builtin::BI__builtin_conjf:
364   case Builtin::BI__builtin_conjl: {
365     ComplexPairTy ComplexVal = EmitComplexExpr(E->getArg(0));
366     Value *Real = ComplexVal.first;
367     Value *Imag = ComplexVal.second;
368     Value *Zero =
369       Imag->getType()->isFPOrFPVectorTy()
370         ? llvm::ConstantFP::getZeroValueForNegation(Imag->getType())
371         : llvm::Constant::getNullValue(Imag->getType());
372 
373     Imag = Builder.CreateFSub(Zero, Imag, "sub");
374     return RValue::getComplex(std::make_pair(Real, Imag));
375   }
376   case Builtin::BI__builtin_creal:
377   case Builtin::BI__builtin_crealf:
378   case Builtin::BI__builtin_creall:
379   case Builtin::BIcreal:
380   case Builtin::BIcrealf:
381   case Builtin::BIcreall: {
382     ComplexPairTy ComplexVal = EmitComplexExpr(E->getArg(0));
383     return RValue::get(ComplexVal.first);
384   }
385 
386   case Builtin::BI__builtin_cimag:
387   case Builtin::BI__builtin_cimagf:
388   case Builtin::BI__builtin_cimagl:
389   case Builtin::BIcimag:
390   case Builtin::BIcimagf:
391   case Builtin::BIcimagl: {
392     ComplexPairTy ComplexVal = EmitComplexExpr(E->getArg(0));
393     return RValue::get(ComplexVal.second);
394   }
395 
396   case Builtin::BI__builtin_ctzs:
397   case Builtin::BI__builtin_ctz:
398   case Builtin::BI__builtin_ctzl:
399   case Builtin::BI__builtin_ctzll: {
400     Value *ArgValue = EmitScalarExpr(E->getArg(0));
401 
402     llvm::Type *ArgType = ArgValue->getType();
403     Value *F = CGM.getIntrinsic(Intrinsic::cttz, ArgType);
404 
405     llvm::Type *ResultType = ConvertType(E->getType());
406     Value *ZeroUndef = Builder.getInt1(getTarget().isCLZForZeroUndef());
407     Value *Result = Builder.CreateCall(F, {ArgValue, ZeroUndef});
408     if (Result->getType() != ResultType)
409       Result = Builder.CreateIntCast(Result, ResultType, /*isSigned*/true,
410                                      "cast");
411     return RValue::get(Result);
412   }
413   case Builtin::BI__builtin_clzs:
414   case Builtin::BI__builtin_clz:
415   case Builtin::BI__builtin_clzl:
416   case Builtin::BI__builtin_clzll: {
417     Value *ArgValue = EmitScalarExpr(E->getArg(0));
418 
419     llvm::Type *ArgType = ArgValue->getType();
420     Value *F = CGM.getIntrinsic(Intrinsic::ctlz, ArgType);
421 
422     llvm::Type *ResultType = ConvertType(E->getType());
423     Value *ZeroUndef = Builder.getInt1(getTarget().isCLZForZeroUndef());
424     Value *Result = Builder.CreateCall(F, {ArgValue, ZeroUndef});
425     if (Result->getType() != ResultType)
426       Result = Builder.CreateIntCast(Result, ResultType, /*isSigned*/true,
427                                      "cast");
428     return RValue::get(Result);
429   }
430   case Builtin::BI__builtin_ffs:
431   case Builtin::BI__builtin_ffsl:
432   case Builtin::BI__builtin_ffsll: {
433     // ffs(x) -> x ? cttz(x) + 1 : 0
434     Value *ArgValue = EmitScalarExpr(E->getArg(0));
435 
436     llvm::Type *ArgType = ArgValue->getType();
437     Value *F = CGM.getIntrinsic(Intrinsic::cttz, ArgType);
438 
439     llvm::Type *ResultType = ConvertType(E->getType());
440     Value *Tmp =
441         Builder.CreateAdd(Builder.CreateCall(F, {ArgValue, Builder.getTrue()}),
442                           llvm::ConstantInt::get(ArgType, 1));
443     Value *Zero = llvm::Constant::getNullValue(ArgType);
444     Value *IsZero = Builder.CreateICmpEQ(ArgValue, Zero, "iszero");
445     Value *Result = Builder.CreateSelect(IsZero, Zero, Tmp, "ffs");
446     if (Result->getType() != ResultType)
447       Result = Builder.CreateIntCast(Result, ResultType, /*isSigned*/true,
448                                      "cast");
449     return RValue::get(Result);
450   }
451   case Builtin::BI__builtin_parity:
452   case Builtin::BI__builtin_parityl:
453   case Builtin::BI__builtin_parityll: {
454     // parity(x) -> ctpop(x) & 1
455     Value *ArgValue = EmitScalarExpr(E->getArg(0));
456 
457     llvm::Type *ArgType = ArgValue->getType();
458     Value *F = CGM.getIntrinsic(Intrinsic::ctpop, ArgType);
459 
460     llvm::Type *ResultType = ConvertType(E->getType());
461     Value *Tmp = Builder.CreateCall(F, ArgValue);
462     Value *Result = Builder.CreateAnd(Tmp, llvm::ConstantInt::get(ArgType, 1));
463     if (Result->getType() != ResultType)
464       Result = Builder.CreateIntCast(Result, ResultType, /*isSigned*/true,
465                                      "cast");
466     return RValue::get(Result);
467   }
468   case Builtin::BI__builtin_popcount:
469   case Builtin::BI__builtin_popcountl:
470   case Builtin::BI__builtin_popcountll: {
471     Value *ArgValue = EmitScalarExpr(E->getArg(0));
472 
473     llvm::Type *ArgType = ArgValue->getType();
474     Value *F = CGM.getIntrinsic(Intrinsic::ctpop, ArgType);
475 
476     llvm::Type *ResultType = ConvertType(E->getType());
477     Value *Result = Builder.CreateCall(F, ArgValue);
478     if (Result->getType() != ResultType)
479       Result = Builder.CreateIntCast(Result, ResultType, /*isSigned*/true,
480                                      "cast");
481     return RValue::get(Result);
482   }
483   case Builtin::BI__builtin_unpredictable: {
484     // Always return the argument of __builtin_unpredictable. LLVM does not
485     // handle this builtin. Metadata for this builtin should be added directly
486     // to instructions such as branches or switches that use it.
487     return RValue::get(EmitScalarExpr(E->getArg(0)));
488   }
489   case Builtin::BI__builtin_expect: {
490     Value *ArgValue = EmitScalarExpr(E->getArg(0));
491     llvm::Type *ArgType = ArgValue->getType();
492 
493     Value *ExpectedValue = EmitScalarExpr(E->getArg(1));
494     // Don't generate llvm.expect on -O0 as the backend won't use it for
495     // anything.
496     // Note, we still IRGen ExpectedValue because it could have side-effects.
497     if (CGM.getCodeGenOpts().OptimizationLevel == 0)
498       return RValue::get(ArgValue);
499 
500     Value *FnExpect = CGM.getIntrinsic(Intrinsic::expect, ArgType);
501     Value *Result =
502         Builder.CreateCall(FnExpect, {ArgValue, ExpectedValue}, "expval");
503     return RValue::get(Result);
504   }
505   case Builtin::BI__builtin_assume_aligned: {
506     Value *PtrValue = EmitScalarExpr(E->getArg(0));
507     Value *OffsetValue =
508       (E->getNumArgs() > 2) ? EmitScalarExpr(E->getArg(2)) : nullptr;
509 
510     Value *AlignmentValue = EmitScalarExpr(E->getArg(1));
511     ConstantInt *AlignmentCI = cast<ConstantInt>(AlignmentValue);
512     unsigned Alignment = (unsigned) AlignmentCI->getZExtValue();
513 
514     EmitAlignmentAssumption(PtrValue, Alignment, OffsetValue);
515     return RValue::get(PtrValue);
516   }
517   case Builtin::BI__assume:
518   case Builtin::BI__builtin_assume: {
519     if (E->getArg(0)->HasSideEffects(getContext()))
520       return RValue::get(nullptr);
521 
522     Value *ArgValue = EmitScalarExpr(E->getArg(0));
523     Value *FnAssume = CGM.getIntrinsic(Intrinsic::assume);
524     return RValue::get(Builder.CreateCall(FnAssume, ArgValue));
525   }
526   case Builtin::BI__builtin_bswap16:
527   case Builtin::BI__builtin_bswap32:
528   case Builtin::BI__builtin_bswap64: {
529     Value *ArgValue = EmitScalarExpr(E->getArg(0));
530     llvm::Type *ArgType = ArgValue->getType();
531     Value *F = CGM.getIntrinsic(Intrinsic::bswap, ArgType);
532     return RValue::get(Builder.CreateCall(F, ArgValue));
533   }
534   case Builtin::BI__builtin_object_size: {
535     // We rely on constant folding to deal with expressions with side effects.
536     assert(!E->getArg(0)->HasSideEffects(getContext()) &&
537            "should have been constant folded");
538 
539     // We pass this builtin onto the optimizer so that it can
540     // figure out the object size in more complex cases.
541     llvm::Type *ResType = ConvertType(E->getType());
542 
543     // LLVM only supports 0 and 2, make sure that we pass along that
544     // as a boolean.
545     Value *Ty = EmitScalarExpr(E->getArg(1));
546     ConstantInt *CI = dyn_cast<ConstantInt>(Ty);
547     assert(CI);
548     uint64_t val = CI->getZExtValue();
549     CI = ConstantInt::get(Builder.getInt1Ty(), (val & 0x2) >> 1);
550     // FIXME: Get right address space.
551     llvm::Type *Tys[] = { ResType, Builder.getInt8PtrTy(0) };
552     Value *F = CGM.getIntrinsic(Intrinsic::objectsize, Tys);
553     return RValue::get(
554         Builder.CreateCall(F, {EmitScalarExpr(E->getArg(0)), CI}));
555   }
556   case Builtin::BI__builtin_prefetch: {
557     Value *Locality, *RW, *Address = EmitScalarExpr(E->getArg(0));
558     // FIXME: Technically these constants should of type 'int', yes?
559     RW = (E->getNumArgs() > 1) ? EmitScalarExpr(E->getArg(1)) :
560       llvm::ConstantInt::get(Int32Ty, 0);
561     Locality = (E->getNumArgs() > 2) ? EmitScalarExpr(E->getArg(2)) :
562       llvm::ConstantInt::get(Int32Ty, 3);
563     Value *Data = llvm::ConstantInt::get(Int32Ty, 1);
564     Value *F = CGM.getIntrinsic(Intrinsic::prefetch);
565     return RValue::get(Builder.CreateCall(F, {Address, RW, Locality, Data}));
566   }
567   case Builtin::BI__builtin_readcyclecounter: {
568     Value *F = CGM.getIntrinsic(Intrinsic::readcyclecounter);
569     return RValue::get(Builder.CreateCall(F));
570   }
571   case Builtin::BI__builtin___clear_cache: {
572     Value *Begin = EmitScalarExpr(E->getArg(0));
573     Value *End = EmitScalarExpr(E->getArg(1));
574     Value *F = CGM.getIntrinsic(Intrinsic::clear_cache);
575     return RValue::get(Builder.CreateCall(F, {Begin, End}));
576   }
577   case Builtin::BI__builtin_trap:
578     return RValue::get(EmitTrapCall(Intrinsic::trap));
579   case Builtin::BI__debugbreak:
580     return RValue::get(EmitTrapCall(Intrinsic::debugtrap));
581   case Builtin::BI__builtin_unreachable: {
582     if (SanOpts.has(SanitizerKind::Unreachable)) {
583       SanitizerScope SanScope(this);
584       EmitCheck(std::make_pair(static_cast<llvm::Value *>(Builder.getFalse()),
585                                SanitizerKind::Unreachable),
586                 "builtin_unreachable", EmitCheckSourceLocation(E->getExprLoc()),
587                 None);
588     } else
589       Builder.CreateUnreachable();
590 
591     // We do need to preserve an insertion point.
592     EmitBlock(createBasicBlock("unreachable.cont"));
593 
594     return RValue::get(nullptr);
595   }
596 
597   case Builtin::BI__builtin_powi:
598   case Builtin::BI__builtin_powif:
599   case Builtin::BI__builtin_powil: {
600     Value *Base = EmitScalarExpr(E->getArg(0));
601     Value *Exponent = EmitScalarExpr(E->getArg(1));
602     llvm::Type *ArgType = Base->getType();
603     Value *F = CGM.getIntrinsic(Intrinsic::powi, ArgType);
604     return RValue::get(Builder.CreateCall(F, {Base, Exponent}));
605   }
606 
607   case Builtin::BI__builtin_isgreater:
608   case Builtin::BI__builtin_isgreaterequal:
609   case Builtin::BI__builtin_isless:
610   case Builtin::BI__builtin_islessequal:
611   case Builtin::BI__builtin_islessgreater:
612   case Builtin::BI__builtin_isunordered: {
613     // Ordered comparisons: we know the arguments to these are matching scalar
614     // floating point values.
615     Value *LHS = EmitScalarExpr(E->getArg(0));
616     Value *RHS = EmitScalarExpr(E->getArg(1));
617 
618     switch (BuiltinID) {
619     default: llvm_unreachable("Unknown ordered comparison");
620     case Builtin::BI__builtin_isgreater:
621       LHS = Builder.CreateFCmpOGT(LHS, RHS, "cmp");
622       break;
623     case Builtin::BI__builtin_isgreaterequal:
624       LHS = Builder.CreateFCmpOGE(LHS, RHS, "cmp");
625       break;
626     case Builtin::BI__builtin_isless:
627       LHS = Builder.CreateFCmpOLT(LHS, RHS, "cmp");
628       break;
629     case Builtin::BI__builtin_islessequal:
630       LHS = Builder.CreateFCmpOLE(LHS, RHS, "cmp");
631       break;
632     case Builtin::BI__builtin_islessgreater:
633       LHS = Builder.CreateFCmpONE(LHS, RHS, "cmp");
634       break;
635     case Builtin::BI__builtin_isunordered:
636       LHS = Builder.CreateFCmpUNO(LHS, RHS, "cmp");
637       break;
638     }
639     // ZExt bool to int type.
640     return RValue::get(Builder.CreateZExt(LHS, ConvertType(E->getType())));
641   }
642   case Builtin::BI__builtin_isnan: {
643     Value *V = EmitScalarExpr(E->getArg(0));
644     V = Builder.CreateFCmpUNO(V, V, "cmp");
645     return RValue::get(Builder.CreateZExt(V, ConvertType(E->getType())));
646   }
647 
648   case Builtin::BI__builtin_isinf: {
649     // isinf(x) --> fabs(x) == infinity
650     Value *V = EmitScalarExpr(E->getArg(0));
651     V = EmitFAbs(*this, V);
652 
653     V = Builder.CreateFCmpOEQ(V, ConstantFP::getInfinity(V->getType()),"isinf");
654     return RValue::get(Builder.CreateZExt(V, ConvertType(E->getType())));
655   }
656 
657   case Builtin::BI__builtin_isinf_sign: {
658     // isinf_sign(x) -> fabs(x) == infinity ? (signbit(x) ? -1 : 1) : 0
659     Value *Arg = EmitScalarExpr(E->getArg(0));
660     Value *AbsArg = EmitFAbs(*this, Arg);
661     Value *IsInf = Builder.CreateFCmpOEQ(
662         AbsArg, ConstantFP::getInfinity(Arg->getType()), "isinf");
663     Value *IsNeg = EmitSignBit(*this, Arg);
664 
665     llvm::Type *IntTy = ConvertType(E->getType());
666     Value *Zero = Constant::getNullValue(IntTy);
667     Value *One = ConstantInt::get(IntTy, 1);
668     Value *NegativeOne = ConstantInt::get(IntTy, -1);
669     Value *SignResult = Builder.CreateSelect(IsNeg, NegativeOne, One);
670     Value *Result = Builder.CreateSelect(IsInf, SignResult, Zero);
671     return RValue::get(Result);
672   }
673 
674   case Builtin::BI__builtin_isnormal: {
675     // isnormal(x) --> x == x && fabsf(x) < infinity && fabsf(x) >= float_min
676     Value *V = EmitScalarExpr(E->getArg(0));
677     Value *Eq = Builder.CreateFCmpOEQ(V, V, "iseq");
678 
679     Value *Abs = EmitFAbs(*this, V);
680     Value *IsLessThanInf =
681       Builder.CreateFCmpULT(Abs, ConstantFP::getInfinity(V->getType()),"isinf");
682     APFloat Smallest = APFloat::getSmallestNormalized(
683                    getContext().getFloatTypeSemantics(E->getArg(0)->getType()));
684     Value *IsNormal =
685       Builder.CreateFCmpUGE(Abs, ConstantFP::get(V->getContext(), Smallest),
686                             "isnormal");
687     V = Builder.CreateAnd(Eq, IsLessThanInf, "and");
688     V = Builder.CreateAnd(V, IsNormal, "and");
689     return RValue::get(Builder.CreateZExt(V, ConvertType(E->getType())));
690   }
691 
692   case Builtin::BI__builtin_isfinite: {
693     // isfinite(x) --> x == x && fabs(x) != infinity;
694     Value *V = EmitScalarExpr(E->getArg(0));
695     Value *Eq = Builder.CreateFCmpOEQ(V, V, "iseq");
696 
697     Value *Abs = EmitFAbs(*this, V);
698     Value *IsNotInf =
699       Builder.CreateFCmpUNE(Abs, ConstantFP::getInfinity(V->getType()),"isinf");
700 
701     V = Builder.CreateAnd(Eq, IsNotInf, "and");
702     return RValue::get(Builder.CreateZExt(V, ConvertType(E->getType())));
703   }
704 
705   case Builtin::BI__builtin_fpclassify: {
706     Value *V = EmitScalarExpr(E->getArg(5));
707     llvm::Type *Ty = ConvertType(E->getArg(5)->getType());
708 
709     // Create Result
710     BasicBlock *Begin = Builder.GetInsertBlock();
711     BasicBlock *End = createBasicBlock("fpclassify_end", this->CurFn);
712     Builder.SetInsertPoint(End);
713     PHINode *Result =
714       Builder.CreatePHI(ConvertType(E->getArg(0)->getType()), 4,
715                         "fpclassify_result");
716 
717     // if (V==0) return FP_ZERO
718     Builder.SetInsertPoint(Begin);
719     Value *IsZero = Builder.CreateFCmpOEQ(V, Constant::getNullValue(Ty),
720                                           "iszero");
721     Value *ZeroLiteral = EmitScalarExpr(E->getArg(4));
722     BasicBlock *NotZero = createBasicBlock("fpclassify_not_zero", this->CurFn);
723     Builder.CreateCondBr(IsZero, End, NotZero);
724     Result->addIncoming(ZeroLiteral, Begin);
725 
726     // if (V != V) return FP_NAN
727     Builder.SetInsertPoint(NotZero);
728     Value *IsNan = Builder.CreateFCmpUNO(V, V, "cmp");
729     Value *NanLiteral = EmitScalarExpr(E->getArg(0));
730     BasicBlock *NotNan = createBasicBlock("fpclassify_not_nan", this->CurFn);
731     Builder.CreateCondBr(IsNan, End, NotNan);
732     Result->addIncoming(NanLiteral, NotZero);
733 
734     // if (fabs(V) == infinity) return FP_INFINITY
735     Builder.SetInsertPoint(NotNan);
736     Value *VAbs = EmitFAbs(*this, V);
737     Value *IsInf =
738       Builder.CreateFCmpOEQ(VAbs, ConstantFP::getInfinity(V->getType()),
739                             "isinf");
740     Value *InfLiteral = EmitScalarExpr(E->getArg(1));
741     BasicBlock *NotInf = createBasicBlock("fpclassify_not_inf", this->CurFn);
742     Builder.CreateCondBr(IsInf, End, NotInf);
743     Result->addIncoming(InfLiteral, NotNan);
744 
745     // if (fabs(V) >= MIN_NORMAL) return FP_NORMAL else FP_SUBNORMAL
746     Builder.SetInsertPoint(NotInf);
747     APFloat Smallest = APFloat::getSmallestNormalized(
748         getContext().getFloatTypeSemantics(E->getArg(5)->getType()));
749     Value *IsNormal =
750       Builder.CreateFCmpUGE(VAbs, ConstantFP::get(V->getContext(), Smallest),
751                             "isnormal");
752     Value *NormalResult =
753       Builder.CreateSelect(IsNormal, EmitScalarExpr(E->getArg(2)),
754                            EmitScalarExpr(E->getArg(3)));
755     Builder.CreateBr(End);
756     Result->addIncoming(NormalResult, NotInf);
757 
758     // return Result
759     Builder.SetInsertPoint(End);
760     return RValue::get(Result);
761   }
762 
763   case Builtin::BIalloca:
764   case Builtin::BI_alloca:
765   case Builtin::BI__builtin_alloca: {
766     Value *Size = EmitScalarExpr(E->getArg(0));
767     return RValue::get(Builder.CreateAlloca(Builder.getInt8Ty(), Size));
768   }
769   case Builtin::BIbzero:
770   case Builtin::BI__builtin_bzero: {
771     Address Dest = EmitPointerWithAlignment(E->getArg(0));
772     Value *SizeVal = EmitScalarExpr(E->getArg(1));
773     EmitNonNullArgCheck(RValue::get(Dest.getPointer()), E->getArg(0)->getType(),
774                         E->getArg(0)->getExprLoc(), FD, 0);
775     Builder.CreateMemSet(Dest, Builder.getInt8(0), SizeVal, false);
776     return RValue::get(Dest.getPointer());
777   }
778   case Builtin::BImemcpy:
779   case Builtin::BI__builtin_memcpy: {
780     Address Dest = EmitPointerWithAlignment(E->getArg(0));
781     Address Src = EmitPointerWithAlignment(E->getArg(1));
782     Value *SizeVal = EmitScalarExpr(E->getArg(2));
783     EmitNonNullArgCheck(RValue::get(Dest.getPointer()), E->getArg(0)->getType(),
784                         E->getArg(0)->getExprLoc(), FD, 0);
785     EmitNonNullArgCheck(RValue::get(Src.getPointer()), E->getArg(1)->getType(),
786                         E->getArg(1)->getExprLoc(), FD, 1);
787     Builder.CreateMemCpy(Dest, Src, SizeVal, false);
788     return RValue::get(Dest.getPointer());
789   }
790 
791   case Builtin::BI__builtin___memcpy_chk: {
792     // fold __builtin_memcpy_chk(x, y, cst1, cst2) to memcpy iff cst1<=cst2.
793     llvm::APSInt Size, DstSize;
794     if (!E->getArg(2)->EvaluateAsInt(Size, CGM.getContext()) ||
795         !E->getArg(3)->EvaluateAsInt(DstSize, CGM.getContext()))
796       break;
797     if (Size.ugt(DstSize))
798       break;
799     Address Dest = EmitPointerWithAlignment(E->getArg(0));
800     Address Src = EmitPointerWithAlignment(E->getArg(1));
801     Value *SizeVal = llvm::ConstantInt::get(Builder.getContext(), Size);
802     Builder.CreateMemCpy(Dest, Src, SizeVal, false);
803     return RValue::get(Dest.getPointer());
804   }
805 
806   case Builtin::BI__builtin_objc_memmove_collectable: {
807     Address DestAddr = EmitPointerWithAlignment(E->getArg(0));
808     Address SrcAddr = EmitPointerWithAlignment(E->getArg(1));
809     Value *SizeVal = EmitScalarExpr(E->getArg(2));
810     CGM.getObjCRuntime().EmitGCMemmoveCollectable(*this,
811                                                   DestAddr, SrcAddr, SizeVal);
812     return RValue::get(DestAddr.getPointer());
813   }
814 
815   case Builtin::BI__builtin___memmove_chk: {
816     // fold __builtin_memmove_chk(x, y, cst1, cst2) to memmove iff cst1<=cst2.
817     llvm::APSInt Size, DstSize;
818     if (!E->getArg(2)->EvaluateAsInt(Size, CGM.getContext()) ||
819         !E->getArg(3)->EvaluateAsInt(DstSize, CGM.getContext()))
820       break;
821     if (Size.ugt(DstSize))
822       break;
823     Address Dest = EmitPointerWithAlignment(E->getArg(0));
824     Address Src = EmitPointerWithAlignment(E->getArg(1));
825     Value *SizeVal = llvm::ConstantInt::get(Builder.getContext(), Size);
826     Builder.CreateMemMove(Dest, Src, SizeVal, false);
827     return RValue::get(Dest.getPointer());
828   }
829 
830   case Builtin::BImemmove:
831   case Builtin::BI__builtin_memmove: {
832     Address Dest = EmitPointerWithAlignment(E->getArg(0));
833     Address Src = EmitPointerWithAlignment(E->getArg(1));
834     Value *SizeVal = EmitScalarExpr(E->getArg(2));
835     EmitNonNullArgCheck(RValue::get(Dest.getPointer()), E->getArg(0)->getType(),
836                         E->getArg(0)->getExprLoc(), FD, 0);
837     EmitNonNullArgCheck(RValue::get(Src.getPointer()), E->getArg(1)->getType(),
838                         E->getArg(1)->getExprLoc(), FD, 1);
839     Builder.CreateMemMove(Dest, Src, SizeVal, false);
840     return RValue::get(Dest.getPointer());
841   }
842   case Builtin::BImemset:
843   case Builtin::BI__builtin_memset: {
844     Address Dest = EmitPointerWithAlignment(E->getArg(0));
845     Value *ByteVal = Builder.CreateTrunc(EmitScalarExpr(E->getArg(1)),
846                                          Builder.getInt8Ty());
847     Value *SizeVal = EmitScalarExpr(E->getArg(2));
848     EmitNonNullArgCheck(RValue::get(Dest.getPointer()), E->getArg(0)->getType(),
849                         E->getArg(0)->getExprLoc(), FD, 0);
850     Builder.CreateMemSet(Dest, ByteVal, SizeVal, false);
851     return RValue::get(Dest.getPointer());
852   }
853   case Builtin::BI__builtin___memset_chk: {
854     // fold __builtin_memset_chk(x, y, cst1, cst2) to memset iff cst1<=cst2.
855     llvm::APSInt Size, DstSize;
856     if (!E->getArg(2)->EvaluateAsInt(Size, CGM.getContext()) ||
857         !E->getArg(3)->EvaluateAsInt(DstSize, CGM.getContext()))
858       break;
859     if (Size.ugt(DstSize))
860       break;
861     Address Dest = EmitPointerWithAlignment(E->getArg(0));
862     Value *ByteVal = Builder.CreateTrunc(EmitScalarExpr(E->getArg(1)),
863                                          Builder.getInt8Ty());
864     Value *SizeVal = llvm::ConstantInt::get(Builder.getContext(), Size);
865     Builder.CreateMemSet(Dest, ByteVal, SizeVal, false);
866     return RValue::get(Dest.getPointer());
867   }
868   case Builtin::BI__builtin_dwarf_cfa: {
869     // The offset in bytes from the first argument to the CFA.
870     //
871     // Why on earth is this in the frontend?  Is there any reason at
872     // all that the backend can't reasonably determine this while
873     // lowering llvm.eh.dwarf.cfa()?
874     //
875     // TODO: If there's a satisfactory reason, add a target hook for
876     // this instead of hard-coding 0, which is correct for most targets.
877     int32_t Offset = 0;
878 
879     Value *F = CGM.getIntrinsic(Intrinsic::eh_dwarf_cfa);
880     return RValue::get(Builder.CreateCall(F,
881                                       llvm::ConstantInt::get(Int32Ty, Offset)));
882   }
883   case Builtin::BI__builtin_return_address: {
884     Value *Depth =
885         CGM.EmitConstantExpr(E->getArg(0), getContext().UnsignedIntTy, this);
886     Value *F = CGM.getIntrinsic(Intrinsic::returnaddress);
887     return RValue::get(Builder.CreateCall(F, Depth));
888   }
889   case Builtin::BI__builtin_frame_address: {
890     Value *Depth =
891         CGM.EmitConstantExpr(E->getArg(0), getContext().UnsignedIntTy, this);
892     Value *F = CGM.getIntrinsic(Intrinsic::frameaddress);
893     return RValue::get(Builder.CreateCall(F, Depth));
894   }
895   case Builtin::BI__builtin_extract_return_addr: {
896     Value *Address = EmitScalarExpr(E->getArg(0));
897     Value *Result = getTargetHooks().decodeReturnAddress(*this, Address);
898     return RValue::get(Result);
899   }
900   case Builtin::BI__builtin_frob_return_addr: {
901     Value *Address = EmitScalarExpr(E->getArg(0));
902     Value *Result = getTargetHooks().encodeReturnAddress(*this, Address);
903     return RValue::get(Result);
904   }
905   case Builtin::BI__builtin_dwarf_sp_column: {
906     llvm::IntegerType *Ty
907       = cast<llvm::IntegerType>(ConvertType(E->getType()));
908     int Column = getTargetHooks().getDwarfEHStackPointer(CGM);
909     if (Column == -1) {
910       CGM.ErrorUnsupported(E, "__builtin_dwarf_sp_column");
911       return RValue::get(llvm::UndefValue::get(Ty));
912     }
913     return RValue::get(llvm::ConstantInt::get(Ty, Column, true));
914   }
915   case Builtin::BI__builtin_init_dwarf_reg_size_table: {
916     Value *Address = EmitScalarExpr(E->getArg(0));
917     if (getTargetHooks().initDwarfEHRegSizeTable(*this, Address))
918       CGM.ErrorUnsupported(E, "__builtin_init_dwarf_reg_size_table");
919     return RValue::get(llvm::UndefValue::get(ConvertType(E->getType())));
920   }
921   case Builtin::BI__builtin_eh_return: {
922     Value *Int = EmitScalarExpr(E->getArg(0));
923     Value *Ptr = EmitScalarExpr(E->getArg(1));
924 
925     llvm::IntegerType *IntTy = cast<llvm::IntegerType>(Int->getType());
926     assert((IntTy->getBitWidth() == 32 || IntTy->getBitWidth() == 64) &&
927            "LLVM's __builtin_eh_return only supports 32- and 64-bit variants");
928     Value *F = CGM.getIntrinsic(IntTy->getBitWidth() == 32
929                                   ? Intrinsic::eh_return_i32
930                                   : Intrinsic::eh_return_i64);
931     Builder.CreateCall(F, {Int, Ptr});
932     Builder.CreateUnreachable();
933 
934     // We do need to preserve an insertion point.
935     EmitBlock(createBasicBlock("builtin_eh_return.cont"));
936 
937     return RValue::get(nullptr);
938   }
939   case Builtin::BI__builtin_unwind_init: {
940     Value *F = CGM.getIntrinsic(Intrinsic::eh_unwind_init);
941     return RValue::get(Builder.CreateCall(F));
942   }
943   case Builtin::BI__builtin_extend_pointer: {
944     // Extends a pointer to the size of an _Unwind_Word, which is
945     // uint64_t on all platforms.  Generally this gets poked into a
946     // register and eventually used as an address, so if the
947     // addressing registers are wider than pointers and the platform
948     // doesn't implicitly ignore high-order bits when doing
949     // addressing, we need to make sure we zext / sext based on
950     // the platform's expectations.
951     //
952     // See: http://gcc.gnu.org/ml/gcc-bugs/2002-02/msg00237.html
953 
954     // Cast the pointer to intptr_t.
955     Value *Ptr = EmitScalarExpr(E->getArg(0));
956     Value *Result = Builder.CreatePtrToInt(Ptr, IntPtrTy, "extend.cast");
957 
958     // If that's 64 bits, we're done.
959     if (IntPtrTy->getBitWidth() == 64)
960       return RValue::get(Result);
961 
962     // Otherwise, ask the codegen data what to do.
963     if (getTargetHooks().extendPointerWithSExt())
964       return RValue::get(Builder.CreateSExt(Result, Int64Ty, "extend.sext"));
965     else
966       return RValue::get(Builder.CreateZExt(Result, Int64Ty, "extend.zext"));
967   }
968   case Builtin::BI__builtin_setjmp: {
969     // Buffer is a void**.
970     Address Buf = EmitPointerWithAlignment(E->getArg(0));
971 
972     // Store the frame pointer to the setjmp buffer.
973     Value *FrameAddr =
974       Builder.CreateCall(CGM.getIntrinsic(Intrinsic::frameaddress),
975                          ConstantInt::get(Int32Ty, 0));
976     Builder.CreateStore(FrameAddr, Buf);
977 
978     // Store the stack pointer to the setjmp buffer.
979     Value *StackAddr =
980         Builder.CreateCall(CGM.getIntrinsic(Intrinsic::stacksave));
981     Address StackSaveSlot =
982       Builder.CreateConstInBoundsGEP(Buf, 2, getPointerSize());
983     Builder.CreateStore(StackAddr, StackSaveSlot);
984 
985     // Call LLVM's EH setjmp, which is lightweight.
986     Value *F = CGM.getIntrinsic(Intrinsic::eh_sjlj_setjmp);
987     Buf = Builder.CreateBitCast(Buf, Int8PtrTy);
988     return RValue::get(Builder.CreateCall(F, Buf.getPointer()));
989   }
990   case Builtin::BI__builtin_longjmp: {
991     Value *Buf = EmitScalarExpr(E->getArg(0));
992     Buf = Builder.CreateBitCast(Buf, Int8PtrTy);
993 
994     // Call LLVM's EH longjmp, which is lightweight.
995     Builder.CreateCall(CGM.getIntrinsic(Intrinsic::eh_sjlj_longjmp), Buf);
996 
997     // longjmp doesn't return; mark this as unreachable.
998     Builder.CreateUnreachable();
999 
1000     // We do need to preserve an insertion point.
1001     EmitBlock(createBasicBlock("longjmp.cont"));
1002 
1003     return RValue::get(nullptr);
1004   }
1005   case Builtin::BI__sync_fetch_and_add:
1006   case Builtin::BI__sync_fetch_and_sub:
1007   case Builtin::BI__sync_fetch_and_or:
1008   case Builtin::BI__sync_fetch_and_and:
1009   case Builtin::BI__sync_fetch_and_xor:
1010   case Builtin::BI__sync_fetch_and_nand:
1011   case Builtin::BI__sync_add_and_fetch:
1012   case Builtin::BI__sync_sub_and_fetch:
1013   case Builtin::BI__sync_and_and_fetch:
1014   case Builtin::BI__sync_or_and_fetch:
1015   case Builtin::BI__sync_xor_and_fetch:
1016   case Builtin::BI__sync_nand_and_fetch:
1017   case Builtin::BI__sync_val_compare_and_swap:
1018   case Builtin::BI__sync_bool_compare_and_swap:
1019   case Builtin::BI__sync_lock_test_and_set:
1020   case Builtin::BI__sync_lock_release:
1021   case Builtin::BI__sync_swap:
1022     llvm_unreachable("Shouldn't make it through sema");
1023   case Builtin::BI__sync_fetch_and_add_1:
1024   case Builtin::BI__sync_fetch_and_add_2:
1025   case Builtin::BI__sync_fetch_and_add_4:
1026   case Builtin::BI__sync_fetch_and_add_8:
1027   case Builtin::BI__sync_fetch_and_add_16:
1028     return EmitBinaryAtomic(*this, llvm::AtomicRMWInst::Add, E);
1029   case Builtin::BI__sync_fetch_and_sub_1:
1030   case Builtin::BI__sync_fetch_and_sub_2:
1031   case Builtin::BI__sync_fetch_and_sub_4:
1032   case Builtin::BI__sync_fetch_and_sub_8:
1033   case Builtin::BI__sync_fetch_and_sub_16:
1034     return EmitBinaryAtomic(*this, llvm::AtomicRMWInst::Sub, E);
1035   case Builtin::BI__sync_fetch_and_or_1:
1036   case Builtin::BI__sync_fetch_and_or_2:
1037   case Builtin::BI__sync_fetch_and_or_4:
1038   case Builtin::BI__sync_fetch_and_or_8:
1039   case Builtin::BI__sync_fetch_and_or_16:
1040     return EmitBinaryAtomic(*this, llvm::AtomicRMWInst::Or, E);
1041   case Builtin::BI__sync_fetch_and_and_1:
1042   case Builtin::BI__sync_fetch_and_and_2:
1043   case Builtin::BI__sync_fetch_and_and_4:
1044   case Builtin::BI__sync_fetch_and_and_8:
1045   case Builtin::BI__sync_fetch_and_and_16:
1046     return EmitBinaryAtomic(*this, llvm::AtomicRMWInst::And, E);
1047   case Builtin::BI__sync_fetch_and_xor_1:
1048   case Builtin::BI__sync_fetch_and_xor_2:
1049   case Builtin::BI__sync_fetch_and_xor_4:
1050   case Builtin::BI__sync_fetch_and_xor_8:
1051   case Builtin::BI__sync_fetch_and_xor_16:
1052     return EmitBinaryAtomic(*this, llvm::AtomicRMWInst::Xor, E);
1053   case Builtin::BI__sync_fetch_and_nand_1:
1054   case Builtin::BI__sync_fetch_and_nand_2:
1055   case Builtin::BI__sync_fetch_and_nand_4:
1056   case Builtin::BI__sync_fetch_and_nand_8:
1057   case Builtin::BI__sync_fetch_and_nand_16:
1058     return EmitBinaryAtomic(*this, llvm::AtomicRMWInst::Nand, E);
1059 
1060   // Clang extensions: not overloaded yet.
1061   case Builtin::BI__sync_fetch_and_min:
1062     return EmitBinaryAtomic(*this, llvm::AtomicRMWInst::Min, E);
1063   case Builtin::BI__sync_fetch_and_max:
1064     return EmitBinaryAtomic(*this, llvm::AtomicRMWInst::Max, E);
1065   case Builtin::BI__sync_fetch_and_umin:
1066     return EmitBinaryAtomic(*this, llvm::AtomicRMWInst::UMin, E);
1067   case Builtin::BI__sync_fetch_and_umax:
1068     return EmitBinaryAtomic(*this, llvm::AtomicRMWInst::UMax, E);
1069 
1070   case Builtin::BI__sync_add_and_fetch_1:
1071   case Builtin::BI__sync_add_and_fetch_2:
1072   case Builtin::BI__sync_add_and_fetch_4:
1073   case Builtin::BI__sync_add_and_fetch_8:
1074   case Builtin::BI__sync_add_and_fetch_16:
1075     return EmitBinaryAtomicPost(*this, llvm::AtomicRMWInst::Add, E,
1076                                 llvm::Instruction::Add);
1077   case Builtin::BI__sync_sub_and_fetch_1:
1078   case Builtin::BI__sync_sub_and_fetch_2:
1079   case Builtin::BI__sync_sub_and_fetch_4:
1080   case Builtin::BI__sync_sub_and_fetch_8:
1081   case Builtin::BI__sync_sub_and_fetch_16:
1082     return EmitBinaryAtomicPost(*this, llvm::AtomicRMWInst::Sub, E,
1083                                 llvm::Instruction::Sub);
1084   case Builtin::BI__sync_and_and_fetch_1:
1085   case Builtin::BI__sync_and_and_fetch_2:
1086   case Builtin::BI__sync_and_and_fetch_4:
1087   case Builtin::BI__sync_and_and_fetch_8:
1088   case Builtin::BI__sync_and_and_fetch_16:
1089     return EmitBinaryAtomicPost(*this, llvm::AtomicRMWInst::And, E,
1090                                 llvm::Instruction::And);
1091   case Builtin::BI__sync_or_and_fetch_1:
1092   case Builtin::BI__sync_or_and_fetch_2:
1093   case Builtin::BI__sync_or_and_fetch_4:
1094   case Builtin::BI__sync_or_and_fetch_8:
1095   case Builtin::BI__sync_or_and_fetch_16:
1096     return EmitBinaryAtomicPost(*this, llvm::AtomicRMWInst::Or, E,
1097                                 llvm::Instruction::Or);
1098   case Builtin::BI__sync_xor_and_fetch_1:
1099   case Builtin::BI__sync_xor_and_fetch_2:
1100   case Builtin::BI__sync_xor_and_fetch_4:
1101   case Builtin::BI__sync_xor_and_fetch_8:
1102   case Builtin::BI__sync_xor_and_fetch_16:
1103     return EmitBinaryAtomicPost(*this, llvm::AtomicRMWInst::Xor, E,
1104                                 llvm::Instruction::Xor);
1105   case Builtin::BI__sync_nand_and_fetch_1:
1106   case Builtin::BI__sync_nand_and_fetch_2:
1107   case Builtin::BI__sync_nand_and_fetch_4:
1108   case Builtin::BI__sync_nand_and_fetch_8:
1109   case Builtin::BI__sync_nand_and_fetch_16:
1110     return EmitBinaryAtomicPost(*this, llvm::AtomicRMWInst::Nand, E,
1111                                 llvm::Instruction::And, true);
1112 
1113   case Builtin::BI__sync_val_compare_and_swap_1:
1114   case Builtin::BI__sync_val_compare_and_swap_2:
1115   case Builtin::BI__sync_val_compare_and_swap_4:
1116   case Builtin::BI__sync_val_compare_and_swap_8:
1117   case Builtin::BI__sync_val_compare_and_swap_16:
1118     return RValue::get(MakeAtomicCmpXchgValue(*this, E, false));
1119 
1120   case Builtin::BI__sync_bool_compare_and_swap_1:
1121   case Builtin::BI__sync_bool_compare_and_swap_2:
1122   case Builtin::BI__sync_bool_compare_and_swap_4:
1123   case Builtin::BI__sync_bool_compare_and_swap_8:
1124   case Builtin::BI__sync_bool_compare_and_swap_16:
1125     return RValue::get(MakeAtomicCmpXchgValue(*this, E, true));
1126 
1127   case Builtin::BI__sync_swap_1:
1128   case Builtin::BI__sync_swap_2:
1129   case Builtin::BI__sync_swap_4:
1130   case Builtin::BI__sync_swap_8:
1131   case Builtin::BI__sync_swap_16:
1132     return EmitBinaryAtomic(*this, llvm::AtomicRMWInst::Xchg, E);
1133 
1134   case Builtin::BI__sync_lock_test_and_set_1:
1135   case Builtin::BI__sync_lock_test_and_set_2:
1136   case Builtin::BI__sync_lock_test_and_set_4:
1137   case Builtin::BI__sync_lock_test_and_set_8:
1138   case Builtin::BI__sync_lock_test_and_set_16:
1139     return EmitBinaryAtomic(*this, llvm::AtomicRMWInst::Xchg, E);
1140 
1141   case Builtin::BI__sync_lock_release_1:
1142   case Builtin::BI__sync_lock_release_2:
1143   case Builtin::BI__sync_lock_release_4:
1144   case Builtin::BI__sync_lock_release_8:
1145   case Builtin::BI__sync_lock_release_16: {
1146     Value *Ptr = EmitScalarExpr(E->getArg(0));
1147     QualType ElTy = E->getArg(0)->getType()->getPointeeType();
1148     CharUnits StoreSize = getContext().getTypeSizeInChars(ElTy);
1149     llvm::Type *ITy = llvm::IntegerType::get(getLLVMContext(),
1150                                              StoreSize.getQuantity() * 8);
1151     Ptr = Builder.CreateBitCast(Ptr, ITy->getPointerTo());
1152     llvm::StoreInst *Store =
1153       Builder.CreateAlignedStore(llvm::Constant::getNullValue(ITy), Ptr,
1154                                  StoreSize);
1155     Store->setAtomic(llvm::Release);
1156     return RValue::get(nullptr);
1157   }
1158 
1159   case Builtin::BI__sync_synchronize: {
1160     // We assume this is supposed to correspond to a C++0x-style
1161     // sequentially-consistent fence (i.e. this is only usable for
1162     // synchonization, not device I/O or anything like that). This intrinsic
1163     // is really badly designed in the sense that in theory, there isn't
1164     // any way to safely use it... but in practice, it mostly works
1165     // to use it with non-atomic loads and stores to get acquire/release
1166     // semantics.
1167     Builder.CreateFence(llvm::SequentiallyConsistent);
1168     return RValue::get(nullptr);
1169   }
1170 
1171   case Builtin::BI__builtin_nontemporal_load:
1172     return RValue::get(EmitNontemporalLoad(*this, E));
1173   case Builtin::BI__builtin_nontemporal_store:
1174     return RValue::get(EmitNontemporalStore(*this, E));
1175   case Builtin::BI__c11_atomic_is_lock_free:
1176   case Builtin::BI__atomic_is_lock_free: {
1177     // Call "bool __atomic_is_lock_free(size_t size, void *ptr)". For the
1178     // __c11 builtin, ptr is 0 (indicating a properly-aligned object), since
1179     // _Atomic(T) is always properly-aligned.
1180     const char *LibCallName = "__atomic_is_lock_free";
1181     CallArgList Args;
1182     Args.add(RValue::get(EmitScalarExpr(E->getArg(0))),
1183              getContext().getSizeType());
1184     if (BuiltinID == Builtin::BI__atomic_is_lock_free)
1185       Args.add(RValue::get(EmitScalarExpr(E->getArg(1))),
1186                getContext().VoidPtrTy);
1187     else
1188       Args.add(RValue::get(llvm::Constant::getNullValue(VoidPtrTy)),
1189                getContext().VoidPtrTy);
1190     const CGFunctionInfo &FuncInfo =
1191         CGM.getTypes().arrangeFreeFunctionCall(E->getType(), Args,
1192                                                FunctionType::ExtInfo(),
1193                                                RequiredArgs::All);
1194     llvm::FunctionType *FTy = CGM.getTypes().GetFunctionType(FuncInfo);
1195     llvm::Constant *Func = CGM.CreateRuntimeFunction(FTy, LibCallName);
1196     return EmitCall(FuncInfo, Func, ReturnValueSlot(), Args);
1197   }
1198 
1199   case Builtin::BI__atomic_test_and_set: {
1200     // Look at the argument type to determine whether this is a volatile
1201     // operation. The parameter type is always volatile.
1202     QualType PtrTy = E->getArg(0)->IgnoreImpCasts()->getType();
1203     bool Volatile =
1204         PtrTy->castAs<PointerType>()->getPointeeType().isVolatileQualified();
1205 
1206     Value *Ptr = EmitScalarExpr(E->getArg(0));
1207     unsigned AddrSpace = Ptr->getType()->getPointerAddressSpace();
1208     Ptr = Builder.CreateBitCast(Ptr, Int8Ty->getPointerTo(AddrSpace));
1209     Value *NewVal = Builder.getInt8(1);
1210     Value *Order = EmitScalarExpr(E->getArg(1));
1211     if (isa<llvm::ConstantInt>(Order)) {
1212       int ord = cast<llvm::ConstantInt>(Order)->getZExtValue();
1213       AtomicRMWInst *Result = nullptr;
1214       switch (ord) {
1215       case 0:  // memory_order_relaxed
1216       default: // invalid order
1217         Result = Builder.CreateAtomicRMW(llvm::AtomicRMWInst::Xchg,
1218                                          Ptr, NewVal,
1219                                          llvm::Monotonic);
1220         break;
1221       case 1:  // memory_order_consume
1222       case 2:  // memory_order_acquire
1223         Result = Builder.CreateAtomicRMW(llvm::AtomicRMWInst::Xchg,
1224                                          Ptr, NewVal,
1225                                          llvm::Acquire);
1226         break;
1227       case 3:  // memory_order_release
1228         Result = Builder.CreateAtomicRMW(llvm::AtomicRMWInst::Xchg,
1229                                          Ptr, NewVal,
1230                                          llvm::Release);
1231         break;
1232       case 4:  // memory_order_acq_rel
1233         Result = Builder.CreateAtomicRMW(llvm::AtomicRMWInst::Xchg,
1234                                          Ptr, NewVal,
1235                                          llvm::AcquireRelease);
1236         break;
1237       case 5:  // memory_order_seq_cst
1238         Result = Builder.CreateAtomicRMW(llvm::AtomicRMWInst::Xchg,
1239                                          Ptr, NewVal,
1240                                          llvm::SequentiallyConsistent);
1241         break;
1242       }
1243       Result->setVolatile(Volatile);
1244       return RValue::get(Builder.CreateIsNotNull(Result, "tobool"));
1245     }
1246 
1247     llvm::BasicBlock *ContBB = createBasicBlock("atomic.continue", CurFn);
1248 
1249     llvm::BasicBlock *BBs[5] = {
1250       createBasicBlock("monotonic", CurFn),
1251       createBasicBlock("acquire", CurFn),
1252       createBasicBlock("release", CurFn),
1253       createBasicBlock("acqrel", CurFn),
1254       createBasicBlock("seqcst", CurFn)
1255     };
1256     llvm::AtomicOrdering Orders[5] = {
1257       llvm::Monotonic, llvm::Acquire, llvm::Release,
1258       llvm::AcquireRelease, llvm::SequentiallyConsistent
1259     };
1260 
1261     Order = Builder.CreateIntCast(Order, Builder.getInt32Ty(), false);
1262     llvm::SwitchInst *SI = Builder.CreateSwitch(Order, BBs[0]);
1263 
1264     Builder.SetInsertPoint(ContBB);
1265     PHINode *Result = Builder.CreatePHI(Int8Ty, 5, "was_set");
1266 
1267     for (unsigned i = 0; i < 5; ++i) {
1268       Builder.SetInsertPoint(BBs[i]);
1269       AtomicRMWInst *RMW = Builder.CreateAtomicRMW(llvm::AtomicRMWInst::Xchg,
1270                                                    Ptr, NewVal, Orders[i]);
1271       RMW->setVolatile(Volatile);
1272       Result->addIncoming(RMW, BBs[i]);
1273       Builder.CreateBr(ContBB);
1274     }
1275 
1276     SI->addCase(Builder.getInt32(0), BBs[0]);
1277     SI->addCase(Builder.getInt32(1), BBs[1]);
1278     SI->addCase(Builder.getInt32(2), BBs[1]);
1279     SI->addCase(Builder.getInt32(3), BBs[2]);
1280     SI->addCase(Builder.getInt32(4), BBs[3]);
1281     SI->addCase(Builder.getInt32(5), BBs[4]);
1282 
1283     Builder.SetInsertPoint(ContBB);
1284     return RValue::get(Builder.CreateIsNotNull(Result, "tobool"));
1285   }
1286 
1287   case Builtin::BI__atomic_clear: {
1288     QualType PtrTy = E->getArg(0)->IgnoreImpCasts()->getType();
1289     bool Volatile =
1290         PtrTy->castAs<PointerType>()->getPointeeType().isVolatileQualified();
1291 
1292     Address Ptr = EmitPointerWithAlignment(E->getArg(0));
1293     unsigned AddrSpace = Ptr.getPointer()->getType()->getPointerAddressSpace();
1294     Ptr = Builder.CreateBitCast(Ptr, Int8Ty->getPointerTo(AddrSpace));
1295     Value *NewVal = Builder.getInt8(0);
1296     Value *Order = EmitScalarExpr(E->getArg(1));
1297     if (isa<llvm::ConstantInt>(Order)) {
1298       int ord = cast<llvm::ConstantInt>(Order)->getZExtValue();
1299       StoreInst *Store = Builder.CreateStore(NewVal, Ptr, Volatile);
1300       switch (ord) {
1301       case 0:  // memory_order_relaxed
1302       default: // invalid order
1303         Store->setOrdering(llvm::Monotonic);
1304         break;
1305       case 3:  // memory_order_release
1306         Store->setOrdering(llvm::Release);
1307         break;
1308       case 5:  // memory_order_seq_cst
1309         Store->setOrdering(llvm::SequentiallyConsistent);
1310         break;
1311       }
1312       return RValue::get(nullptr);
1313     }
1314 
1315     llvm::BasicBlock *ContBB = createBasicBlock("atomic.continue", CurFn);
1316 
1317     llvm::BasicBlock *BBs[3] = {
1318       createBasicBlock("monotonic", CurFn),
1319       createBasicBlock("release", CurFn),
1320       createBasicBlock("seqcst", CurFn)
1321     };
1322     llvm::AtomicOrdering Orders[3] = {
1323       llvm::Monotonic, llvm::Release, llvm::SequentiallyConsistent
1324     };
1325 
1326     Order = Builder.CreateIntCast(Order, Builder.getInt32Ty(), false);
1327     llvm::SwitchInst *SI = Builder.CreateSwitch(Order, BBs[0]);
1328 
1329     for (unsigned i = 0; i < 3; ++i) {
1330       Builder.SetInsertPoint(BBs[i]);
1331       StoreInst *Store = Builder.CreateStore(NewVal, Ptr, Volatile);
1332       Store->setOrdering(Orders[i]);
1333       Builder.CreateBr(ContBB);
1334     }
1335 
1336     SI->addCase(Builder.getInt32(0), BBs[0]);
1337     SI->addCase(Builder.getInt32(3), BBs[1]);
1338     SI->addCase(Builder.getInt32(5), BBs[2]);
1339 
1340     Builder.SetInsertPoint(ContBB);
1341     return RValue::get(nullptr);
1342   }
1343 
1344   case Builtin::BI__atomic_thread_fence:
1345   case Builtin::BI__atomic_signal_fence:
1346   case Builtin::BI__c11_atomic_thread_fence:
1347   case Builtin::BI__c11_atomic_signal_fence: {
1348     llvm::SynchronizationScope Scope;
1349     if (BuiltinID == Builtin::BI__atomic_signal_fence ||
1350         BuiltinID == Builtin::BI__c11_atomic_signal_fence)
1351       Scope = llvm::SingleThread;
1352     else
1353       Scope = llvm::CrossThread;
1354     Value *Order = EmitScalarExpr(E->getArg(0));
1355     if (isa<llvm::ConstantInt>(Order)) {
1356       int ord = cast<llvm::ConstantInt>(Order)->getZExtValue();
1357       switch (ord) {
1358       case 0:  // memory_order_relaxed
1359       default: // invalid order
1360         break;
1361       case 1:  // memory_order_consume
1362       case 2:  // memory_order_acquire
1363         Builder.CreateFence(llvm::Acquire, Scope);
1364         break;
1365       case 3:  // memory_order_release
1366         Builder.CreateFence(llvm::Release, Scope);
1367         break;
1368       case 4:  // memory_order_acq_rel
1369         Builder.CreateFence(llvm::AcquireRelease, Scope);
1370         break;
1371       case 5:  // memory_order_seq_cst
1372         Builder.CreateFence(llvm::SequentiallyConsistent, Scope);
1373         break;
1374       }
1375       return RValue::get(nullptr);
1376     }
1377 
1378     llvm::BasicBlock *AcquireBB, *ReleaseBB, *AcqRelBB, *SeqCstBB;
1379     AcquireBB = createBasicBlock("acquire", CurFn);
1380     ReleaseBB = createBasicBlock("release", CurFn);
1381     AcqRelBB = createBasicBlock("acqrel", CurFn);
1382     SeqCstBB = createBasicBlock("seqcst", CurFn);
1383     llvm::BasicBlock *ContBB = createBasicBlock("atomic.continue", CurFn);
1384 
1385     Order = Builder.CreateIntCast(Order, Builder.getInt32Ty(), false);
1386     llvm::SwitchInst *SI = Builder.CreateSwitch(Order, ContBB);
1387 
1388     Builder.SetInsertPoint(AcquireBB);
1389     Builder.CreateFence(llvm::Acquire, Scope);
1390     Builder.CreateBr(ContBB);
1391     SI->addCase(Builder.getInt32(1), AcquireBB);
1392     SI->addCase(Builder.getInt32(2), AcquireBB);
1393 
1394     Builder.SetInsertPoint(ReleaseBB);
1395     Builder.CreateFence(llvm::Release, Scope);
1396     Builder.CreateBr(ContBB);
1397     SI->addCase(Builder.getInt32(3), ReleaseBB);
1398 
1399     Builder.SetInsertPoint(AcqRelBB);
1400     Builder.CreateFence(llvm::AcquireRelease, Scope);
1401     Builder.CreateBr(ContBB);
1402     SI->addCase(Builder.getInt32(4), AcqRelBB);
1403 
1404     Builder.SetInsertPoint(SeqCstBB);
1405     Builder.CreateFence(llvm::SequentiallyConsistent, Scope);
1406     Builder.CreateBr(ContBB);
1407     SI->addCase(Builder.getInt32(5), SeqCstBB);
1408 
1409     Builder.SetInsertPoint(ContBB);
1410     return RValue::get(nullptr);
1411   }
1412 
1413     // Library functions with special handling.
1414   case Builtin::BIsqrt:
1415   case Builtin::BIsqrtf:
1416   case Builtin::BIsqrtl: {
1417     // Transform a call to sqrt* into a @llvm.sqrt.* intrinsic call, but only
1418     // in finite- or unsafe-math mode (the intrinsic has different semantics
1419     // for handling negative numbers compared to the library function, so
1420     // -fmath-errno=0 is not enough).
1421     if (!FD->hasAttr<ConstAttr>())
1422       break;
1423     if (!(CGM.getCodeGenOpts().UnsafeFPMath ||
1424           CGM.getCodeGenOpts().NoNaNsFPMath))
1425       break;
1426     Value *Arg0 = EmitScalarExpr(E->getArg(0));
1427     llvm::Type *ArgType = Arg0->getType();
1428     Value *F = CGM.getIntrinsic(Intrinsic::sqrt, ArgType);
1429     return RValue::get(Builder.CreateCall(F, Arg0));
1430   }
1431 
1432   case Builtin::BI__builtin_pow:
1433   case Builtin::BI__builtin_powf:
1434   case Builtin::BI__builtin_powl:
1435   case Builtin::BIpow:
1436   case Builtin::BIpowf:
1437   case Builtin::BIpowl: {
1438     // Transform a call to pow* into a @llvm.pow.* intrinsic call.
1439     if (!FD->hasAttr<ConstAttr>())
1440       break;
1441     Value *Base = EmitScalarExpr(E->getArg(0));
1442     Value *Exponent = EmitScalarExpr(E->getArg(1));
1443     llvm::Type *ArgType = Base->getType();
1444     Value *F = CGM.getIntrinsic(Intrinsic::pow, ArgType);
1445     return RValue::get(Builder.CreateCall(F, {Base, Exponent}));
1446   }
1447 
1448   case Builtin::BIfma:
1449   case Builtin::BIfmaf:
1450   case Builtin::BIfmal:
1451   case Builtin::BI__builtin_fma:
1452   case Builtin::BI__builtin_fmaf:
1453   case Builtin::BI__builtin_fmal: {
1454     // Rewrite fma to intrinsic.
1455     Value *FirstArg = EmitScalarExpr(E->getArg(0));
1456     llvm::Type *ArgType = FirstArg->getType();
1457     Value *F = CGM.getIntrinsic(Intrinsic::fma, ArgType);
1458     return RValue::get(
1459         Builder.CreateCall(F, {FirstArg, EmitScalarExpr(E->getArg(1)),
1460                                EmitScalarExpr(E->getArg(2))}));
1461   }
1462 
1463   case Builtin::BI__builtin_signbit:
1464   case Builtin::BI__builtin_signbitf:
1465   case Builtin::BI__builtin_signbitl: {
1466     return RValue::get(
1467         Builder.CreateZExt(EmitSignBit(*this, EmitScalarExpr(E->getArg(0))),
1468                            ConvertType(E->getType())));
1469   }
1470   case Builtin::BI__builtin_annotation: {
1471     llvm::Value *AnnVal = EmitScalarExpr(E->getArg(0));
1472     llvm::Value *F = CGM.getIntrinsic(llvm::Intrinsic::annotation,
1473                                       AnnVal->getType());
1474 
1475     // Get the annotation string, go through casts. Sema requires this to be a
1476     // non-wide string literal, potentially casted, so the cast<> is safe.
1477     const Expr *AnnotationStrExpr = E->getArg(1)->IgnoreParenCasts();
1478     StringRef Str = cast<StringLiteral>(AnnotationStrExpr)->getString();
1479     return RValue::get(EmitAnnotationCall(F, AnnVal, Str, E->getExprLoc()));
1480   }
1481   case Builtin::BI__builtin_addcb:
1482   case Builtin::BI__builtin_addcs:
1483   case Builtin::BI__builtin_addc:
1484   case Builtin::BI__builtin_addcl:
1485   case Builtin::BI__builtin_addcll:
1486   case Builtin::BI__builtin_subcb:
1487   case Builtin::BI__builtin_subcs:
1488   case Builtin::BI__builtin_subc:
1489   case Builtin::BI__builtin_subcl:
1490   case Builtin::BI__builtin_subcll: {
1491 
1492     // We translate all of these builtins from expressions of the form:
1493     //   int x = ..., y = ..., carryin = ..., carryout, result;
1494     //   result = __builtin_addc(x, y, carryin, &carryout);
1495     //
1496     // to LLVM IR of the form:
1497     //
1498     //   %tmp1 = call {i32, i1} @llvm.uadd.with.overflow.i32(i32 %x, i32 %y)
1499     //   %tmpsum1 = extractvalue {i32, i1} %tmp1, 0
1500     //   %carry1 = extractvalue {i32, i1} %tmp1, 1
1501     //   %tmp2 = call {i32, i1} @llvm.uadd.with.overflow.i32(i32 %tmpsum1,
1502     //                                                       i32 %carryin)
1503     //   %result = extractvalue {i32, i1} %tmp2, 0
1504     //   %carry2 = extractvalue {i32, i1} %tmp2, 1
1505     //   %tmp3 = or i1 %carry1, %carry2
1506     //   %tmp4 = zext i1 %tmp3 to i32
1507     //   store i32 %tmp4, i32* %carryout
1508 
1509     // Scalarize our inputs.
1510     llvm::Value *X = EmitScalarExpr(E->getArg(0));
1511     llvm::Value *Y = EmitScalarExpr(E->getArg(1));
1512     llvm::Value *Carryin = EmitScalarExpr(E->getArg(2));
1513     Address CarryOutPtr = EmitPointerWithAlignment(E->getArg(3));
1514 
1515     // Decide if we are lowering to a uadd.with.overflow or usub.with.overflow.
1516     llvm::Intrinsic::ID IntrinsicId;
1517     switch (BuiltinID) {
1518     default: llvm_unreachable("Unknown multiprecision builtin id.");
1519     case Builtin::BI__builtin_addcb:
1520     case Builtin::BI__builtin_addcs:
1521     case Builtin::BI__builtin_addc:
1522     case Builtin::BI__builtin_addcl:
1523     case Builtin::BI__builtin_addcll:
1524       IntrinsicId = llvm::Intrinsic::uadd_with_overflow;
1525       break;
1526     case Builtin::BI__builtin_subcb:
1527     case Builtin::BI__builtin_subcs:
1528     case Builtin::BI__builtin_subc:
1529     case Builtin::BI__builtin_subcl:
1530     case Builtin::BI__builtin_subcll:
1531       IntrinsicId = llvm::Intrinsic::usub_with_overflow;
1532       break;
1533     }
1534 
1535     // Construct our resulting LLVM IR expression.
1536     llvm::Value *Carry1;
1537     llvm::Value *Sum1 = EmitOverflowIntrinsic(*this, IntrinsicId,
1538                                               X, Y, Carry1);
1539     llvm::Value *Carry2;
1540     llvm::Value *Sum2 = EmitOverflowIntrinsic(*this, IntrinsicId,
1541                                               Sum1, Carryin, Carry2);
1542     llvm::Value *CarryOut = Builder.CreateZExt(Builder.CreateOr(Carry1, Carry2),
1543                                                X->getType());
1544     Builder.CreateStore(CarryOut, CarryOutPtr);
1545     return RValue::get(Sum2);
1546   }
1547   case Builtin::BI__builtin_uadd_overflow:
1548   case Builtin::BI__builtin_uaddl_overflow:
1549   case Builtin::BI__builtin_uaddll_overflow:
1550   case Builtin::BI__builtin_usub_overflow:
1551   case Builtin::BI__builtin_usubl_overflow:
1552   case Builtin::BI__builtin_usubll_overflow:
1553   case Builtin::BI__builtin_umul_overflow:
1554   case Builtin::BI__builtin_umull_overflow:
1555   case Builtin::BI__builtin_umulll_overflow:
1556   case Builtin::BI__builtin_sadd_overflow:
1557   case Builtin::BI__builtin_saddl_overflow:
1558   case Builtin::BI__builtin_saddll_overflow:
1559   case Builtin::BI__builtin_ssub_overflow:
1560   case Builtin::BI__builtin_ssubl_overflow:
1561   case Builtin::BI__builtin_ssubll_overflow:
1562   case Builtin::BI__builtin_smul_overflow:
1563   case Builtin::BI__builtin_smull_overflow:
1564   case Builtin::BI__builtin_smulll_overflow: {
1565 
1566     // We translate all of these builtins directly to the relevant llvm IR node.
1567 
1568     // Scalarize our inputs.
1569     llvm::Value *X = EmitScalarExpr(E->getArg(0));
1570     llvm::Value *Y = EmitScalarExpr(E->getArg(1));
1571     Address SumOutPtr = EmitPointerWithAlignment(E->getArg(2));
1572 
1573     // Decide which of the overflow intrinsics we are lowering to:
1574     llvm::Intrinsic::ID IntrinsicId;
1575     switch (BuiltinID) {
1576     default: llvm_unreachable("Unknown security overflow builtin id.");
1577     case Builtin::BI__builtin_uadd_overflow:
1578     case Builtin::BI__builtin_uaddl_overflow:
1579     case Builtin::BI__builtin_uaddll_overflow:
1580       IntrinsicId = llvm::Intrinsic::uadd_with_overflow;
1581       break;
1582     case Builtin::BI__builtin_usub_overflow:
1583     case Builtin::BI__builtin_usubl_overflow:
1584     case Builtin::BI__builtin_usubll_overflow:
1585       IntrinsicId = llvm::Intrinsic::usub_with_overflow;
1586       break;
1587     case Builtin::BI__builtin_umul_overflow:
1588     case Builtin::BI__builtin_umull_overflow:
1589     case Builtin::BI__builtin_umulll_overflow:
1590       IntrinsicId = llvm::Intrinsic::umul_with_overflow;
1591       break;
1592     case Builtin::BI__builtin_sadd_overflow:
1593     case Builtin::BI__builtin_saddl_overflow:
1594     case Builtin::BI__builtin_saddll_overflow:
1595       IntrinsicId = llvm::Intrinsic::sadd_with_overflow;
1596       break;
1597     case Builtin::BI__builtin_ssub_overflow:
1598     case Builtin::BI__builtin_ssubl_overflow:
1599     case Builtin::BI__builtin_ssubll_overflow:
1600       IntrinsicId = llvm::Intrinsic::ssub_with_overflow;
1601       break;
1602     case Builtin::BI__builtin_smul_overflow:
1603     case Builtin::BI__builtin_smull_overflow:
1604     case Builtin::BI__builtin_smulll_overflow:
1605       IntrinsicId = llvm::Intrinsic::smul_with_overflow;
1606       break;
1607     }
1608 
1609 
1610     llvm::Value *Carry;
1611     llvm::Value *Sum = EmitOverflowIntrinsic(*this, IntrinsicId, X, Y, Carry);
1612     Builder.CreateStore(Sum, SumOutPtr);
1613 
1614     return RValue::get(Carry);
1615   }
1616   case Builtin::BI__builtin_addressof:
1617     return RValue::get(EmitLValue(E->getArg(0)).getPointer());
1618   case Builtin::BI__builtin_operator_new:
1619     return EmitBuiltinNewDeleteCall(FD->getType()->castAs<FunctionProtoType>(),
1620                                     E->getArg(0), false);
1621   case Builtin::BI__builtin_operator_delete:
1622     return EmitBuiltinNewDeleteCall(FD->getType()->castAs<FunctionProtoType>(),
1623                                     E->getArg(0), true);
1624   case Builtin::BI__noop:
1625     // __noop always evaluates to an integer literal zero.
1626     return RValue::get(ConstantInt::get(IntTy, 0));
1627   case Builtin::BI__builtin_call_with_static_chain: {
1628     const CallExpr *Call = cast<CallExpr>(E->getArg(0));
1629     const Expr *Chain = E->getArg(1);
1630     return EmitCall(Call->getCallee()->getType(),
1631                     EmitScalarExpr(Call->getCallee()), Call, ReturnValue,
1632                     Call->getCalleeDecl(), EmitScalarExpr(Chain));
1633   }
1634   case Builtin::BI_InterlockedExchange:
1635   case Builtin::BI_InterlockedExchangePointer:
1636     return EmitBinaryAtomic(*this, llvm::AtomicRMWInst::Xchg, E);
1637   case Builtin::BI_InterlockedCompareExchangePointer: {
1638     llvm::Type *RTy;
1639     llvm::IntegerType *IntType =
1640       IntegerType::get(getLLVMContext(),
1641                        getContext().getTypeSize(E->getType()));
1642     llvm::Type *IntPtrType = IntType->getPointerTo();
1643 
1644     llvm::Value *Destination =
1645       Builder.CreateBitCast(EmitScalarExpr(E->getArg(0)), IntPtrType);
1646 
1647     llvm::Value *Exchange = EmitScalarExpr(E->getArg(1));
1648     RTy = Exchange->getType();
1649     Exchange = Builder.CreatePtrToInt(Exchange, IntType);
1650 
1651     llvm::Value *Comparand =
1652       Builder.CreatePtrToInt(EmitScalarExpr(E->getArg(2)), IntType);
1653 
1654     auto Result = Builder.CreateAtomicCmpXchg(Destination, Comparand, Exchange,
1655                                               SequentiallyConsistent,
1656                                               SequentiallyConsistent);
1657     Result->setVolatile(true);
1658 
1659     return RValue::get(Builder.CreateIntToPtr(Builder.CreateExtractValue(Result,
1660                                                                          0),
1661                                               RTy));
1662   }
1663   case Builtin::BI_InterlockedCompareExchange: {
1664     AtomicCmpXchgInst *CXI = Builder.CreateAtomicCmpXchg(
1665         EmitScalarExpr(E->getArg(0)),
1666         EmitScalarExpr(E->getArg(2)),
1667         EmitScalarExpr(E->getArg(1)),
1668         SequentiallyConsistent,
1669         SequentiallyConsistent);
1670       CXI->setVolatile(true);
1671       return RValue::get(Builder.CreateExtractValue(CXI, 0));
1672   }
1673   case Builtin::BI_InterlockedIncrement: {
1674     AtomicRMWInst *RMWI = Builder.CreateAtomicRMW(
1675       AtomicRMWInst::Add,
1676       EmitScalarExpr(E->getArg(0)),
1677       ConstantInt::get(Int32Ty, 1),
1678       llvm::SequentiallyConsistent);
1679     RMWI->setVolatile(true);
1680     return RValue::get(Builder.CreateAdd(RMWI, ConstantInt::get(Int32Ty, 1)));
1681   }
1682   case Builtin::BI_InterlockedDecrement: {
1683     AtomicRMWInst *RMWI = Builder.CreateAtomicRMW(
1684       AtomicRMWInst::Sub,
1685       EmitScalarExpr(E->getArg(0)),
1686       ConstantInt::get(Int32Ty, 1),
1687       llvm::SequentiallyConsistent);
1688     RMWI->setVolatile(true);
1689     return RValue::get(Builder.CreateSub(RMWI, ConstantInt::get(Int32Ty, 1)));
1690   }
1691   case Builtin::BI_InterlockedExchangeAdd: {
1692     AtomicRMWInst *RMWI = Builder.CreateAtomicRMW(
1693       AtomicRMWInst::Add,
1694       EmitScalarExpr(E->getArg(0)),
1695       EmitScalarExpr(E->getArg(1)),
1696       llvm::SequentiallyConsistent);
1697     RMWI->setVolatile(true);
1698     return RValue::get(RMWI);
1699   }
1700   case Builtin::BI__readfsdword: {
1701     Value *IntToPtr =
1702       Builder.CreateIntToPtr(EmitScalarExpr(E->getArg(0)),
1703                              llvm::PointerType::get(CGM.Int32Ty, 257));
1704     LoadInst *Load =
1705         Builder.CreateAlignedLoad(IntToPtr, /*Align=*/4, /*isVolatile=*/true);
1706     return RValue::get(Load);
1707   }
1708 
1709   case Builtin::BI__exception_code:
1710   case Builtin::BI_exception_code:
1711     return RValue::get(EmitSEHExceptionCode());
1712   case Builtin::BI__exception_info:
1713   case Builtin::BI_exception_info:
1714     return RValue::get(EmitSEHExceptionInfo());
1715   case Builtin::BI__abnormal_termination:
1716   case Builtin::BI_abnormal_termination:
1717     return RValue::get(EmitSEHAbnormalTermination());
1718   case Builtin::BI_setjmpex: {
1719     if (getTarget().getTriple().isOSMSVCRT()) {
1720       llvm::Type *ArgTypes[] = {Int8PtrTy, Int8PtrTy};
1721       llvm::AttributeSet ReturnsTwiceAttr =
1722           AttributeSet::get(getLLVMContext(), llvm::AttributeSet::FunctionIndex,
1723                             llvm::Attribute::ReturnsTwice);
1724       llvm::Constant *SetJmpEx = CGM.CreateRuntimeFunction(
1725           llvm::FunctionType::get(IntTy, ArgTypes, /*isVarArg=*/false),
1726           "_setjmpex", ReturnsTwiceAttr);
1727       llvm::Value *Buf = Builder.CreateBitOrPointerCast(
1728           EmitScalarExpr(E->getArg(0)), Int8PtrTy);
1729       llvm::Value *FrameAddr =
1730           Builder.CreateCall(CGM.getIntrinsic(Intrinsic::frameaddress),
1731                              ConstantInt::get(Int32Ty, 0));
1732       llvm::Value *Args[] = {Buf, FrameAddr};
1733       llvm::CallSite CS = EmitRuntimeCallOrInvoke(SetJmpEx, Args);
1734       CS.setAttributes(ReturnsTwiceAttr);
1735       return RValue::get(CS.getInstruction());
1736     }
1737     break;
1738   }
1739   case Builtin::BI_setjmp: {
1740     if (getTarget().getTriple().isOSMSVCRT()) {
1741       llvm::AttributeSet ReturnsTwiceAttr =
1742           AttributeSet::get(getLLVMContext(), llvm::AttributeSet::FunctionIndex,
1743                             llvm::Attribute::ReturnsTwice);
1744       llvm::Value *Buf = Builder.CreateBitOrPointerCast(
1745           EmitScalarExpr(E->getArg(0)), Int8PtrTy);
1746       llvm::CallSite CS;
1747       if (getTarget().getTriple().getArch() == llvm::Triple::x86) {
1748         llvm::Type *ArgTypes[] = {Int8PtrTy, IntTy};
1749         llvm::Constant *SetJmp3 = CGM.CreateRuntimeFunction(
1750             llvm::FunctionType::get(IntTy, ArgTypes, /*isVarArg=*/true),
1751             "_setjmp3", ReturnsTwiceAttr);
1752         llvm::Value *Count = ConstantInt::get(IntTy, 0);
1753         llvm::Value *Args[] = {Buf, Count};
1754         CS = EmitRuntimeCallOrInvoke(SetJmp3, Args);
1755       } else {
1756         llvm::Type *ArgTypes[] = {Int8PtrTy, Int8PtrTy};
1757         llvm::Constant *SetJmp = CGM.CreateRuntimeFunction(
1758             llvm::FunctionType::get(IntTy, ArgTypes, /*isVarArg=*/false),
1759             "_setjmp", ReturnsTwiceAttr);
1760         llvm::Value *FrameAddr =
1761             Builder.CreateCall(CGM.getIntrinsic(Intrinsic::frameaddress),
1762                                ConstantInt::get(Int32Ty, 0));
1763         llvm::Value *Args[] = {Buf, FrameAddr};
1764         CS = EmitRuntimeCallOrInvoke(SetJmp, Args);
1765       }
1766       CS.setAttributes(ReturnsTwiceAttr);
1767       return RValue::get(CS.getInstruction());
1768     }
1769     break;
1770   }
1771 
1772   case Builtin::BI__GetExceptionInfo: {
1773     if (llvm::GlobalVariable *GV =
1774             CGM.getCXXABI().getThrowInfo(FD->getParamDecl(0)->getType()))
1775       return RValue::get(llvm::ConstantExpr::getBitCast(GV, CGM.Int8PtrTy));
1776     break;
1777   }
1778   }
1779 
1780   // If this is an alias for a lib function (e.g. __builtin_sin), emit
1781   // the call using the normal call path, but using the unmangled
1782   // version of the function name.
1783   if (getContext().BuiltinInfo.isLibFunction(BuiltinID))
1784     return emitLibraryCall(*this, FD, E,
1785                            CGM.getBuiltinLibFunction(FD, BuiltinID));
1786 
1787   // If this is a predefined lib function (e.g. malloc), emit the call
1788   // using exactly the normal call path.
1789   if (getContext().BuiltinInfo.isPredefinedLibFunction(BuiltinID))
1790     return emitLibraryCall(*this, FD, E, EmitScalarExpr(E->getCallee()));
1791 
1792   // See if we have a target specific intrinsic.
1793   const char *Name = getContext().BuiltinInfo.getName(BuiltinID);
1794   Intrinsic::ID IntrinsicID = Intrinsic::not_intrinsic;
1795   if (const char *Prefix =
1796           llvm::Triple::getArchTypePrefix(getTarget().getTriple().getArch())) {
1797     IntrinsicID = Intrinsic::getIntrinsicForGCCBuiltin(Prefix, Name);
1798     // NOTE we dont need to perform a compatibility flag check here since the
1799     // intrinsics are declared in Builtins*.def via LANGBUILTIN which filter the
1800     // MS builtins via ALL_MS_LANGUAGES and are filtered earlier.
1801     if (IntrinsicID == Intrinsic::not_intrinsic)
1802       IntrinsicID = Intrinsic::getIntrinsicForMSBuiltin(Prefix, Name);
1803   }
1804 
1805   if (IntrinsicID != Intrinsic::not_intrinsic) {
1806     SmallVector<Value*, 16> Args;
1807 
1808     // Find out if any arguments are required to be integer constant
1809     // expressions.
1810     unsigned ICEArguments = 0;
1811     ASTContext::GetBuiltinTypeError Error;
1812     getContext().GetBuiltinType(BuiltinID, Error, &ICEArguments);
1813     assert(Error == ASTContext::GE_None && "Should not codegen an error");
1814 
1815     Function *F = CGM.getIntrinsic(IntrinsicID);
1816     llvm::FunctionType *FTy = F->getFunctionType();
1817 
1818     for (unsigned i = 0, e = E->getNumArgs(); i != e; ++i) {
1819       Value *ArgValue;
1820       // If this is a normal argument, just emit it as a scalar.
1821       if ((ICEArguments & (1 << i)) == 0) {
1822         ArgValue = EmitScalarExpr(E->getArg(i));
1823       } else {
1824         // If this is required to be a constant, constant fold it so that we
1825         // know that the generated intrinsic gets a ConstantInt.
1826         llvm::APSInt Result;
1827         bool IsConst = E->getArg(i)->isIntegerConstantExpr(Result,getContext());
1828         assert(IsConst && "Constant arg isn't actually constant?");
1829         (void)IsConst;
1830         ArgValue = llvm::ConstantInt::get(getLLVMContext(), Result);
1831       }
1832 
1833       // If the intrinsic arg type is different from the builtin arg type
1834       // we need to do a bit cast.
1835       llvm::Type *PTy = FTy->getParamType(i);
1836       if (PTy != ArgValue->getType()) {
1837         assert(PTy->canLosslesslyBitCastTo(FTy->getParamType(i)) &&
1838                "Must be able to losslessly bit cast to param");
1839         ArgValue = Builder.CreateBitCast(ArgValue, PTy);
1840       }
1841 
1842       Args.push_back(ArgValue);
1843     }
1844 
1845     Value *V = Builder.CreateCall(F, Args);
1846     QualType BuiltinRetType = E->getType();
1847 
1848     llvm::Type *RetTy = VoidTy;
1849     if (!BuiltinRetType->isVoidType())
1850       RetTy = ConvertType(BuiltinRetType);
1851 
1852     if (RetTy != V->getType()) {
1853       assert(V->getType()->canLosslesslyBitCastTo(RetTy) &&
1854              "Must be able to losslessly bit cast result type");
1855       V = Builder.CreateBitCast(V, RetTy);
1856     }
1857 
1858     return RValue::get(V);
1859   }
1860 
1861   // See if we have a target specific builtin that needs to be lowered.
1862   if (Value *V = EmitTargetBuiltinExpr(BuiltinID, E))
1863     return RValue::get(V);
1864 
1865   ErrorUnsupported(E, "builtin function");
1866 
1867   // Unknown builtin, for now just dump it out and return undef.
1868   return GetUndefRValue(E->getType());
1869 }
1870 
1871 Value *CodeGenFunction::EmitTargetBuiltinExpr(unsigned BuiltinID,
1872                                               const CallExpr *E) {
1873   switch (getTarget().getTriple().getArch()) {
1874   case llvm::Triple::arm:
1875   case llvm::Triple::armeb:
1876   case llvm::Triple::thumb:
1877   case llvm::Triple::thumbeb:
1878     return EmitARMBuiltinExpr(BuiltinID, E);
1879   case llvm::Triple::aarch64:
1880   case llvm::Triple::aarch64_be:
1881     return EmitAArch64BuiltinExpr(BuiltinID, E);
1882   case llvm::Triple::x86:
1883   case llvm::Triple::x86_64:
1884     return EmitX86BuiltinExpr(BuiltinID, E);
1885   case llvm::Triple::ppc:
1886   case llvm::Triple::ppc64:
1887   case llvm::Triple::ppc64le:
1888     return EmitPPCBuiltinExpr(BuiltinID, E);
1889   case llvm::Triple::r600:
1890   case llvm::Triple::amdgcn:
1891     return EmitAMDGPUBuiltinExpr(BuiltinID, E);
1892   case llvm::Triple::systemz:
1893     return EmitSystemZBuiltinExpr(BuiltinID, E);
1894   case llvm::Triple::nvptx:
1895   case llvm::Triple::nvptx64:
1896     return EmitNVPTXBuiltinExpr(BuiltinID, E);
1897   case llvm::Triple::wasm32:
1898   case llvm::Triple::wasm64:
1899     return EmitWebAssemblyBuiltinExpr(BuiltinID, E);
1900   default:
1901     return nullptr;
1902   }
1903 }
1904 
1905 static llvm::VectorType *GetNeonType(CodeGenFunction *CGF,
1906                                      NeonTypeFlags TypeFlags,
1907                                      bool V1Ty=false) {
1908   int IsQuad = TypeFlags.isQuad();
1909   switch (TypeFlags.getEltType()) {
1910   case NeonTypeFlags::Int8:
1911   case NeonTypeFlags::Poly8:
1912     return llvm::VectorType::get(CGF->Int8Ty, V1Ty ? 1 : (8 << IsQuad));
1913   case NeonTypeFlags::Int16:
1914   case NeonTypeFlags::Poly16:
1915   case NeonTypeFlags::Float16:
1916     return llvm::VectorType::get(CGF->Int16Ty, V1Ty ? 1 : (4 << IsQuad));
1917   case NeonTypeFlags::Int32:
1918     return llvm::VectorType::get(CGF->Int32Ty, V1Ty ? 1 : (2 << IsQuad));
1919   case NeonTypeFlags::Int64:
1920   case NeonTypeFlags::Poly64:
1921     return llvm::VectorType::get(CGF->Int64Ty, V1Ty ? 1 : (1 << IsQuad));
1922   case NeonTypeFlags::Poly128:
1923     // FIXME: i128 and f128 doesn't get fully support in Clang and llvm.
1924     // There is a lot of i128 and f128 API missing.
1925     // so we use v16i8 to represent poly128 and get pattern matched.
1926     return llvm::VectorType::get(CGF->Int8Ty, 16);
1927   case NeonTypeFlags::Float32:
1928     return llvm::VectorType::get(CGF->FloatTy, V1Ty ? 1 : (2 << IsQuad));
1929   case NeonTypeFlags::Float64:
1930     return llvm::VectorType::get(CGF->DoubleTy, V1Ty ? 1 : (1 << IsQuad));
1931   }
1932   llvm_unreachable("Unknown vector element type!");
1933 }
1934 
1935 static llvm::VectorType *GetFloatNeonType(CodeGenFunction *CGF,
1936                                           NeonTypeFlags IntTypeFlags) {
1937   int IsQuad = IntTypeFlags.isQuad();
1938   switch (IntTypeFlags.getEltType()) {
1939   case NeonTypeFlags::Int32:
1940     return llvm::VectorType::get(CGF->FloatTy, (2 << IsQuad));
1941   case NeonTypeFlags::Int64:
1942     return llvm::VectorType::get(CGF->DoubleTy, (1 << IsQuad));
1943   default:
1944     llvm_unreachable("Type can't be converted to floating-point!");
1945   }
1946 }
1947 
1948 Value *CodeGenFunction::EmitNeonSplat(Value *V, Constant *C) {
1949   unsigned nElts = cast<llvm::VectorType>(V->getType())->getNumElements();
1950   Value* SV = llvm::ConstantVector::getSplat(nElts, C);
1951   return Builder.CreateShuffleVector(V, V, SV, "lane");
1952 }
1953 
1954 Value *CodeGenFunction::EmitNeonCall(Function *F, SmallVectorImpl<Value*> &Ops,
1955                                      const char *name,
1956                                      unsigned shift, bool rightshift) {
1957   unsigned j = 0;
1958   for (Function::const_arg_iterator ai = F->arg_begin(), ae = F->arg_end();
1959        ai != ae; ++ai, ++j)
1960     if (shift > 0 && shift == j)
1961       Ops[j] = EmitNeonShiftVector(Ops[j], ai->getType(), rightshift);
1962     else
1963       Ops[j] = Builder.CreateBitCast(Ops[j], ai->getType(), name);
1964 
1965   return Builder.CreateCall(F, Ops, name);
1966 }
1967 
1968 Value *CodeGenFunction::EmitNeonShiftVector(Value *V, llvm::Type *Ty,
1969                                             bool neg) {
1970   int SV = cast<ConstantInt>(V)->getSExtValue();
1971   return ConstantInt::get(Ty, neg ? -SV : SV);
1972 }
1973 
1974 // \brief Right-shift a vector by a constant.
1975 Value *CodeGenFunction::EmitNeonRShiftImm(Value *Vec, Value *Shift,
1976                                           llvm::Type *Ty, bool usgn,
1977                                           const char *name) {
1978   llvm::VectorType *VTy = cast<llvm::VectorType>(Ty);
1979 
1980   int ShiftAmt = cast<ConstantInt>(Shift)->getSExtValue();
1981   int EltSize = VTy->getScalarSizeInBits();
1982 
1983   Vec = Builder.CreateBitCast(Vec, Ty);
1984 
1985   // lshr/ashr are undefined when the shift amount is equal to the vector
1986   // element size.
1987   if (ShiftAmt == EltSize) {
1988     if (usgn) {
1989       // Right-shifting an unsigned value by its size yields 0.
1990       return llvm::ConstantAggregateZero::get(VTy);
1991     } else {
1992       // Right-shifting a signed value by its size is equivalent
1993       // to a shift of size-1.
1994       --ShiftAmt;
1995       Shift = ConstantInt::get(VTy->getElementType(), ShiftAmt);
1996     }
1997   }
1998 
1999   Shift = EmitNeonShiftVector(Shift, Ty, false);
2000   if (usgn)
2001     return Builder.CreateLShr(Vec, Shift, name);
2002   else
2003     return Builder.CreateAShr(Vec, Shift, name);
2004 }
2005 
2006 enum {
2007   AddRetType = (1 << 0),
2008   Add1ArgType = (1 << 1),
2009   Add2ArgTypes = (1 << 2),
2010 
2011   VectorizeRetType = (1 << 3),
2012   VectorizeArgTypes = (1 << 4),
2013 
2014   InventFloatType = (1 << 5),
2015   UnsignedAlts = (1 << 6),
2016 
2017   Use64BitVectors = (1 << 7),
2018   Use128BitVectors = (1 << 8),
2019 
2020   Vectorize1ArgType = Add1ArgType | VectorizeArgTypes,
2021   VectorRet = AddRetType | VectorizeRetType,
2022   VectorRetGetArgs01 =
2023       AddRetType | Add2ArgTypes | VectorizeRetType | VectorizeArgTypes,
2024   FpCmpzModifiers =
2025       AddRetType | VectorizeRetType | Add1ArgType | InventFloatType
2026 };
2027 
2028  struct NeonIntrinsicInfo {
2029   unsigned BuiltinID;
2030   unsigned LLVMIntrinsic;
2031   unsigned AltLLVMIntrinsic;
2032   const char *NameHint;
2033   unsigned TypeModifier;
2034 
2035   bool operator<(unsigned RHSBuiltinID) const {
2036     return BuiltinID < RHSBuiltinID;
2037   }
2038 };
2039 
2040 #define NEONMAP0(NameBase) \
2041   { NEON::BI__builtin_neon_ ## NameBase, 0, 0, #NameBase, 0 }
2042 
2043 #define NEONMAP1(NameBase, LLVMIntrinsic, TypeModifier) \
2044   { NEON:: BI__builtin_neon_ ## NameBase, \
2045       Intrinsic::LLVMIntrinsic, 0, #NameBase, TypeModifier }
2046 
2047 #define NEONMAP2(NameBase, LLVMIntrinsic, AltLLVMIntrinsic, TypeModifier) \
2048   { NEON:: BI__builtin_neon_ ## NameBase, \
2049       Intrinsic::LLVMIntrinsic, Intrinsic::AltLLVMIntrinsic, \
2050       #NameBase, TypeModifier }
2051 
2052 static NeonIntrinsicInfo ARMSIMDIntrinsicMap [] = {
2053   NEONMAP2(vabd_v, arm_neon_vabdu, arm_neon_vabds, Add1ArgType | UnsignedAlts),
2054   NEONMAP2(vabdq_v, arm_neon_vabdu, arm_neon_vabds, Add1ArgType | UnsignedAlts),
2055   NEONMAP1(vabs_v, arm_neon_vabs, 0),
2056   NEONMAP1(vabsq_v, arm_neon_vabs, 0),
2057   NEONMAP0(vaddhn_v),
2058   NEONMAP1(vaesdq_v, arm_neon_aesd, 0),
2059   NEONMAP1(vaeseq_v, arm_neon_aese, 0),
2060   NEONMAP1(vaesimcq_v, arm_neon_aesimc, 0),
2061   NEONMAP1(vaesmcq_v, arm_neon_aesmc, 0),
2062   NEONMAP1(vbsl_v, arm_neon_vbsl, AddRetType),
2063   NEONMAP1(vbslq_v, arm_neon_vbsl, AddRetType),
2064   NEONMAP1(vcage_v, arm_neon_vacge, 0),
2065   NEONMAP1(vcageq_v, arm_neon_vacge, 0),
2066   NEONMAP1(vcagt_v, arm_neon_vacgt, 0),
2067   NEONMAP1(vcagtq_v, arm_neon_vacgt, 0),
2068   NEONMAP1(vcale_v, arm_neon_vacge, 0),
2069   NEONMAP1(vcaleq_v, arm_neon_vacge, 0),
2070   NEONMAP1(vcalt_v, arm_neon_vacgt, 0),
2071   NEONMAP1(vcaltq_v, arm_neon_vacgt, 0),
2072   NEONMAP1(vcls_v, arm_neon_vcls, Add1ArgType),
2073   NEONMAP1(vclsq_v, arm_neon_vcls, Add1ArgType),
2074   NEONMAP1(vclz_v, ctlz, Add1ArgType),
2075   NEONMAP1(vclzq_v, ctlz, Add1ArgType),
2076   NEONMAP1(vcnt_v, ctpop, Add1ArgType),
2077   NEONMAP1(vcntq_v, ctpop, Add1ArgType),
2078   NEONMAP1(vcvt_f16_f32, arm_neon_vcvtfp2hf, 0),
2079   NEONMAP1(vcvt_f32_f16, arm_neon_vcvthf2fp, 0),
2080   NEONMAP0(vcvt_f32_v),
2081   NEONMAP2(vcvt_n_f32_v, arm_neon_vcvtfxu2fp, arm_neon_vcvtfxs2fp, 0),
2082   NEONMAP1(vcvt_n_s32_v, arm_neon_vcvtfp2fxs, 0),
2083   NEONMAP1(vcvt_n_s64_v, arm_neon_vcvtfp2fxs, 0),
2084   NEONMAP1(vcvt_n_u32_v, arm_neon_vcvtfp2fxu, 0),
2085   NEONMAP1(vcvt_n_u64_v, arm_neon_vcvtfp2fxu, 0),
2086   NEONMAP0(vcvt_s32_v),
2087   NEONMAP0(vcvt_s64_v),
2088   NEONMAP0(vcvt_u32_v),
2089   NEONMAP0(vcvt_u64_v),
2090   NEONMAP1(vcvta_s32_v, arm_neon_vcvtas, 0),
2091   NEONMAP1(vcvta_s64_v, arm_neon_vcvtas, 0),
2092   NEONMAP1(vcvta_u32_v, arm_neon_vcvtau, 0),
2093   NEONMAP1(vcvta_u64_v, arm_neon_vcvtau, 0),
2094   NEONMAP1(vcvtaq_s32_v, arm_neon_vcvtas, 0),
2095   NEONMAP1(vcvtaq_s64_v, arm_neon_vcvtas, 0),
2096   NEONMAP1(vcvtaq_u32_v, arm_neon_vcvtau, 0),
2097   NEONMAP1(vcvtaq_u64_v, arm_neon_vcvtau, 0),
2098   NEONMAP1(vcvtm_s32_v, arm_neon_vcvtms, 0),
2099   NEONMAP1(vcvtm_s64_v, arm_neon_vcvtms, 0),
2100   NEONMAP1(vcvtm_u32_v, arm_neon_vcvtmu, 0),
2101   NEONMAP1(vcvtm_u64_v, arm_neon_vcvtmu, 0),
2102   NEONMAP1(vcvtmq_s32_v, arm_neon_vcvtms, 0),
2103   NEONMAP1(vcvtmq_s64_v, arm_neon_vcvtms, 0),
2104   NEONMAP1(vcvtmq_u32_v, arm_neon_vcvtmu, 0),
2105   NEONMAP1(vcvtmq_u64_v, arm_neon_vcvtmu, 0),
2106   NEONMAP1(vcvtn_s32_v, arm_neon_vcvtns, 0),
2107   NEONMAP1(vcvtn_s64_v, arm_neon_vcvtns, 0),
2108   NEONMAP1(vcvtn_u32_v, arm_neon_vcvtnu, 0),
2109   NEONMAP1(vcvtn_u64_v, arm_neon_vcvtnu, 0),
2110   NEONMAP1(vcvtnq_s32_v, arm_neon_vcvtns, 0),
2111   NEONMAP1(vcvtnq_s64_v, arm_neon_vcvtns, 0),
2112   NEONMAP1(vcvtnq_u32_v, arm_neon_vcvtnu, 0),
2113   NEONMAP1(vcvtnq_u64_v, arm_neon_vcvtnu, 0),
2114   NEONMAP1(vcvtp_s32_v, arm_neon_vcvtps, 0),
2115   NEONMAP1(vcvtp_s64_v, arm_neon_vcvtps, 0),
2116   NEONMAP1(vcvtp_u32_v, arm_neon_vcvtpu, 0),
2117   NEONMAP1(vcvtp_u64_v, arm_neon_vcvtpu, 0),
2118   NEONMAP1(vcvtpq_s32_v, arm_neon_vcvtps, 0),
2119   NEONMAP1(vcvtpq_s64_v, arm_neon_vcvtps, 0),
2120   NEONMAP1(vcvtpq_u32_v, arm_neon_vcvtpu, 0),
2121   NEONMAP1(vcvtpq_u64_v, arm_neon_vcvtpu, 0),
2122   NEONMAP0(vcvtq_f32_v),
2123   NEONMAP2(vcvtq_n_f32_v, arm_neon_vcvtfxu2fp, arm_neon_vcvtfxs2fp, 0),
2124   NEONMAP1(vcvtq_n_s32_v, arm_neon_vcvtfp2fxs, 0),
2125   NEONMAP1(vcvtq_n_s64_v, arm_neon_vcvtfp2fxs, 0),
2126   NEONMAP1(vcvtq_n_u32_v, arm_neon_vcvtfp2fxu, 0),
2127   NEONMAP1(vcvtq_n_u64_v, arm_neon_vcvtfp2fxu, 0),
2128   NEONMAP0(vcvtq_s32_v),
2129   NEONMAP0(vcvtq_s64_v),
2130   NEONMAP0(vcvtq_u32_v),
2131   NEONMAP0(vcvtq_u64_v),
2132   NEONMAP0(vext_v),
2133   NEONMAP0(vextq_v),
2134   NEONMAP0(vfma_v),
2135   NEONMAP0(vfmaq_v),
2136   NEONMAP2(vhadd_v, arm_neon_vhaddu, arm_neon_vhadds, Add1ArgType | UnsignedAlts),
2137   NEONMAP2(vhaddq_v, arm_neon_vhaddu, arm_neon_vhadds, Add1ArgType | UnsignedAlts),
2138   NEONMAP2(vhsub_v, arm_neon_vhsubu, arm_neon_vhsubs, Add1ArgType | UnsignedAlts),
2139   NEONMAP2(vhsubq_v, arm_neon_vhsubu, arm_neon_vhsubs, Add1ArgType | UnsignedAlts),
2140   NEONMAP0(vld1_dup_v),
2141   NEONMAP1(vld1_v, arm_neon_vld1, 0),
2142   NEONMAP0(vld1q_dup_v),
2143   NEONMAP1(vld1q_v, arm_neon_vld1, 0),
2144   NEONMAP1(vld2_lane_v, arm_neon_vld2lane, 0),
2145   NEONMAP1(vld2_v, arm_neon_vld2, 0),
2146   NEONMAP1(vld2q_lane_v, arm_neon_vld2lane, 0),
2147   NEONMAP1(vld2q_v, arm_neon_vld2, 0),
2148   NEONMAP1(vld3_lane_v, arm_neon_vld3lane, 0),
2149   NEONMAP1(vld3_v, arm_neon_vld3, 0),
2150   NEONMAP1(vld3q_lane_v, arm_neon_vld3lane, 0),
2151   NEONMAP1(vld3q_v, arm_neon_vld3, 0),
2152   NEONMAP1(vld4_lane_v, arm_neon_vld4lane, 0),
2153   NEONMAP1(vld4_v, arm_neon_vld4, 0),
2154   NEONMAP1(vld4q_lane_v, arm_neon_vld4lane, 0),
2155   NEONMAP1(vld4q_v, arm_neon_vld4, 0),
2156   NEONMAP2(vmax_v, arm_neon_vmaxu, arm_neon_vmaxs, Add1ArgType | UnsignedAlts),
2157   NEONMAP1(vmaxnm_v, arm_neon_vmaxnm, Add1ArgType),
2158   NEONMAP1(vmaxnmq_v, arm_neon_vmaxnm, Add1ArgType),
2159   NEONMAP2(vmaxq_v, arm_neon_vmaxu, arm_neon_vmaxs, Add1ArgType | UnsignedAlts),
2160   NEONMAP2(vmin_v, arm_neon_vminu, arm_neon_vmins, Add1ArgType | UnsignedAlts),
2161   NEONMAP1(vminnm_v, arm_neon_vminnm, Add1ArgType),
2162   NEONMAP1(vminnmq_v, arm_neon_vminnm, Add1ArgType),
2163   NEONMAP2(vminq_v, arm_neon_vminu, arm_neon_vmins, Add1ArgType | UnsignedAlts),
2164   NEONMAP0(vmovl_v),
2165   NEONMAP0(vmovn_v),
2166   NEONMAP1(vmul_v, arm_neon_vmulp, Add1ArgType),
2167   NEONMAP0(vmull_v),
2168   NEONMAP1(vmulq_v, arm_neon_vmulp, Add1ArgType),
2169   NEONMAP2(vpadal_v, arm_neon_vpadalu, arm_neon_vpadals, UnsignedAlts),
2170   NEONMAP2(vpadalq_v, arm_neon_vpadalu, arm_neon_vpadals, UnsignedAlts),
2171   NEONMAP1(vpadd_v, arm_neon_vpadd, Add1ArgType),
2172   NEONMAP2(vpaddl_v, arm_neon_vpaddlu, arm_neon_vpaddls, UnsignedAlts),
2173   NEONMAP2(vpaddlq_v, arm_neon_vpaddlu, arm_neon_vpaddls, UnsignedAlts),
2174   NEONMAP1(vpaddq_v, arm_neon_vpadd, Add1ArgType),
2175   NEONMAP2(vpmax_v, arm_neon_vpmaxu, arm_neon_vpmaxs, Add1ArgType | UnsignedAlts),
2176   NEONMAP2(vpmin_v, arm_neon_vpminu, arm_neon_vpmins, Add1ArgType | UnsignedAlts),
2177   NEONMAP1(vqabs_v, arm_neon_vqabs, Add1ArgType),
2178   NEONMAP1(vqabsq_v, arm_neon_vqabs, Add1ArgType),
2179   NEONMAP2(vqadd_v, arm_neon_vqaddu, arm_neon_vqadds, Add1ArgType | UnsignedAlts),
2180   NEONMAP2(vqaddq_v, arm_neon_vqaddu, arm_neon_vqadds, Add1ArgType | UnsignedAlts),
2181   NEONMAP2(vqdmlal_v, arm_neon_vqdmull, arm_neon_vqadds, 0),
2182   NEONMAP2(vqdmlsl_v, arm_neon_vqdmull, arm_neon_vqsubs, 0),
2183   NEONMAP1(vqdmulh_v, arm_neon_vqdmulh, Add1ArgType),
2184   NEONMAP1(vqdmulhq_v, arm_neon_vqdmulh, Add1ArgType),
2185   NEONMAP1(vqdmull_v, arm_neon_vqdmull, Add1ArgType),
2186   NEONMAP2(vqmovn_v, arm_neon_vqmovnu, arm_neon_vqmovns, Add1ArgType | UnsignedAlts),
2187   NEONMAP1(vqmovun_v, arm_neon_vqmovnsu, Add1ArgType),
2188   NEONMAP1(vqneg_v, arm_neon_vqneg, Add1ArgType),
2189   NEONMAP1(vqnegq_v, arm_neon_vqneg, Add1ArgType),
2190   NEONMAP1(vqrdmulh_v, arm_neon_vqrdmulh, Add1ArgType),
2191   NEONMAP1(vqrdmulhq_v, arm_neon_vqrdmulh, Add1ArgType),
2192   NEONMAP2(vqrshl_v, arm_neon_vqrshiftu, arm_neon_vqrshifts, Add1ArgType | UnsignedAlts),
2193   NEONMAP2(vqrshlq_v, arm_neon_vqrshiftu, arm_neon_vqrshifts, Add1ArgType | UnsignedAlts),
2194   NEONMAP2(vqshl_n_v, arm_neon_vqshiftu, arm_neon_vqshifts, UnsignedAlts),
2195   NEONMAP2(vqshl_v, arm_neon_vqshiftu, arm_neon_vqshifts, Add1ArgType | UnsignedAlts),
2196   NEONMAP2(vqshlq_n_v, arm_neon_vqshiftu, arm_neon_vqshifts, UnsignedAlts),
2197   NEONMAP2(vqshlq_v, arm_neon_vqshiftu, arm_neon_vqshifts, Add1ArgType | UnsignedAlts),
2198   NEONMAP1(vqshlu_n_v, arm_neon_vqshiftsu, 0),
2199   NEONMAP1(vqshluq_n_v, arm_neon_vqshiftsu, 0),
2200   NEONMAP2(vqsub_v, arm_neon_vqsubu, arm_neon_vqsubs, Add1ArgType | UnsignedAlts),
2201   NEONMAP2(vqsubq_v, arm_neon_vqsubu, arm_neon_vqsubs, Add1ArgType | UnsignedAlts),
2202   NEONMAP1(vraddhn_v, arm_neon_vraddhn, Add1ArgType),
2203   NEONMAP2(vrecpe_v, arm_neon_vrecpe, arm_neon_vrecpe, 0),
2204   NEONMAP2(vrecpeq_v, arm_neon_vrecpe, arm_neon_vrecpe, 0),
2205   NEONMAP1(vrecps_v, arm_neon_vrecps, Add1ArgType),
2206   NEONMAP1(vrecpsq_v, arm_neon_vrecps, Add1ArgType),
2207   NEONMAP2(vrhadd_v, arm_neon_vrhaddu, arm_neon_vrhadds, Add1ArgType | UnsignedAlts),
2208   NEONMAP2(vrhaddq_v, arm_neon_vrhaddu, arm_neon_vrhadds, Add1ArgType | UnsignedAlts),
2209   NEONMAP1(vrnd_v, arm_neon_vrintz, Add1ArgType),
2210   NEONMAP1(vrnda_v, arm_neon_vrinta, Add1ArgType),
2211   NEONMAP1(vrndaq_v, arm_neon_vrinta, Add1ArgType),
2212   NEONMAP1(vrndm_v, arm_neon_vrintm, Add1ArgType),
2213   NEONMAP1(vrndmq_v, arm_neon_vrintm, Add1ArgType),
2214   NEONMAP1(vrndn_v, arm_neon_vrintn, Add1ArgType),
2215   NEONMAP1(vrndnq_v, arm_neon_vrintn, Add1ArgType),
2216   NEONMAP1(vrndp_v, arm_neon_vrintp, Add1ArgType),
2217   NEONMAP1(vrndpq_v, arm_neon_vrintp, Add1ArgType),
2218   NEONMAP1(vrndq_v, arm_neon_vrintz, Add1ArgType),
2219   NEONMAP1(vrndx_v, arm_neon_vrintx, Add1ArgType),
2220   NEONMAP1(vrndxq_v, arm_neon_vrintx, Add1ArgType),
2221   NEONMAP2(vrshl_v, arm_neon_vrshiftu, arm_neon_vrshifts, Add1ArgType | UnsignedAlts),
2222   NEONMAP2(vrshlq_v, arm_neon_vrshiftu, arm_neon_vrshifts, Add1ArgType | UnsignedAlts),
2223   NEONMAP2(vrshr_n_v, arm_neon_vrshiftu, arm_neon_vrshifts, UnsignedAlts),
2224   NEONMAP2(vrshrq_n_v, arm_neon_vrshiftu, arm_neon_vrshifts, UnsignedAlts),
2225   NEONMAP2(vrsqrte_v, arm_neon_vrsqrte, arm_neon_vrsqrte, 0),
2226   NEONMAP2(vrsqrteq_v, arm_neon_vrsqrte, arm_neon_vrsqrte, 0),
2227   NEONMAP1(vrsqrts_v, arm_neon_vrsqrts, Add1ArgType),
2228   NEONMAP1(vrsqrtsq_v, arm_neon_vrsqrts, Add1ArgType),
2229   NEONMAP1(vrsubhn_v, arm_neon_vrsubhn, Add1ArgType),
2230   NEONMAP1(vsha1su0q_v, arm_neon_sha1su0, 0),
2231   NEONMAP1(vsha1su1q_v, arm_neon_sha1su1, 0),
2232   NEONMAP1(vsha256h2q_v, arm_neon_sha256h2, 0),
2233   NEONMAP1(vsha256hq_v, arm_neon_sha256h, 0),
2234   NEONMAP1(vsha256su0q_v, arm_neon_sha256su0, 0),
2235   NEONMAP1(vsha256su1q_v, arm_neon_sha256su1, 0),
2236   NEONMAP0(vshl_n_v),
2237   NEONMAP2(vshl_v, arm_neon_vshiftu, arm_neon_vshifts, Add1ArgType | UnsignedAlts),
2238   NEONMAP0(vshll_n_v),
2239   NEONMAP0(vshlq_n_v),
2240   NEONMAP2(vshlq_v, arm_neon_vshiftu, arm_neon_vshifts, Add1ArgType | UnsignedAlts),
2241   NEONMAP0(vshr_n_v),
2242   NEONMAP0(vshrn_n_v),
2243   NEONMAP0(vshrq_n_v),
2244   NEONMAP1(vst1_v, arm_neon_vst1, 0),
2245   NEONMAP1(vst1q_v, arm_neon_vst1, 0),
2246   NEONMAP1(vst2_lane_v, arm_neon_vst2lane, 0),
2247   NEONMAP1(vst2_v, arm_neon_vst2, 0),
2248   NEONMAP1(vst2q_lane_v, arm_neon_vst2lane, 0),
2249   NEONMAP1(vst2q_v, arm_neon_vst2, 0),
2250   NEONMAP1(vst3_lane_v, arm_neon_vst3lane, 0),
2251   NEONMAP1(vst3_v, arm_neon_vst3, 0),
2252   NEONMAP1(vst3q_lane_v, arm_neon_vst3lane, 0),
2253   NEONMAP1(vst3q_v, arm_neon_vst3, 0),
2254   NEONMAP1(vst4_lane_v, arm_neon_vst4lane, 0),
2255   NEONMAP1(vst4_v, arm_neon_vst4, 0),
2256   NEONMAP1(vst4q_lane_v, arm_neon_vst4lane, 0),
2257   NEONMAP1(vst4q_v, arm_neon_vst4, 0),
2258   NEONMAP0(vsubhn_v),
2259   NEONMAP0(vtrn_v),
2260   NEONMAP0(vtrnq_v),
2261   NEONMAP0(vtst_v),
2262   NEONMAP0(vtstq_v),
2263   NEONMAP0(vuzp_v),
2264   NEONMAP0(vuzpq_v),
2265   NEONMAP0(vzip_v),
2266   NEONMAP0(vzipq_v)
2267 };
2268 
2269 static NeonIntrinsicInfo AArch64SIMDIntrinsicMap[] = {
2270   NEONMAP1(vabs_v, aarch64_neon_abs, 0),
2271   NEONMAP1(vabsq_v, aarch64_neon_abs, 0),
2272   NEONMAP0(vaddhn_v),
2273   NEONMAP1(vaesdq_v, aarch64_crypto_aesd, 0),
2274   NEONMAP1(vaeseq_v, aarch64_crypto_aese, 0),
2275   NEONMAP1(vaesimcq_v, aarch64_crypto_aesimc, 0),
2276   NEONMAP1(vaesmcq_v, aarch64_crypto_aesmc, 0),
2277   NEONMAP1(vcage_v, aarch64_neon_facge, 0),
2278   NEONMAP1(vcageq_v, aarch64_neon_facge, 0),
2279   NEONMAP1(vcagt_v, aarch64_neon_facgt, 0),
2280   NEONMAP1(vcagtq_v, aarch64_neon_facgt, 0),
2281   NEONMAP1(vcale_v, aarch64_neon_facge, 0),
2282   NEONMAP1(vcaleq_v, aarch64_neon_facge, 0),
2283   NEONMAP1(vcalt_v, aarch64_neon_facgt, 0),
2284   NEONMAP1(vcaltq_v, aarch64_neon_facgt, 0),
2285   NEONMAP1(vcls_v, aarch64_neon_cls, Add1ArgType),
2286   NEONMAP1(vclsq_v, aarch64_neon_cls, Add1ArgType),
2287   NEONMAP1(vclz_v, ctlz, Add1ArgType),
2288   NEONMAP1(vclzq_v, ctlz, Add1ArgType),
2289   NEONMAP1(vcnt_v, ctpop, Add1ArgType),
2290   NEONMAP1(vcntq_v, ctpop, Add1ArgType),
2291   NEONMAP1(vcvt_f16_f32, aarch64_neon_vcvtfp2hf, 0),
2292   NEONMAP1(vcvt_f32_f16, aarch64_neon_vcvthf2fp, 0),
2293   NEONMAP0(vcvt_f32_v),
2294   NEONMAP2(vcvt_n_f32_v, aarch64_neon_vcvtfxu2fp, aarch64_neon_vcvtfxs2fp, 0),
2295   NEONMAP2(vcvt_n_f64_v, aarch64_neon_vcvtfxu2fp, aarch64_neon_vcvtfxs2fp, 0),
2296   NEONMAP1(vcvt_n_s32_v, aarch64_neon_vcvtfp2fxs, 0),
2297   NEONMAP1(vcvt_n_s64_v, aarch64_neon_vcvtfp2fxs, 0),
2298   NEONMAP1(vcvt_n_u32_v, aarch64_neon_vcvtfp2fxu, 0),
2299   NEONMAP1(vcvt_n_u64_v, aarch64_neon_vcvtfp2fxu, 0),
2300   NEONMAP0(vcvtq_f32_v),
2301   NEONMAP2(vcvtq_n_f32_v, aarch64_neon_vcvtfxu2fp, aarch64_neon_vcvtfxs2fp, 0),
2302   NEONMAP2(vcvtq_n_f64_v, aarch64_neon_vcvtfxu2fp, aarch64_neon_vcvtfxs2fp, 0),
2303   NEONMAP1(vcvtq_n_s32_v, aarch64_neon_vcvtfp2fxs, 0),
2304   NEONMAP1(vcvtq_n_s64_v, aarch64_neon_vcvtfp2fxs, 0),
2305   NEONMAP1(vcvtq_n_u32_v, aarch64_neon_vcvtfp2fxu, 0),
2306   NEONMAP1(vcvtq_n_u64_v, aarch64_neon_vcvtfp2fxu, 0),
2307   NEONMAP1(vcvtx_f32_v, aarch64_neon_fcvtxn, AddRetType | Add1ArgType),
2308   NEONMAP0(vext_v),
2309   NEONMAP0(vextq_v),
2310   NEONMAP0(vfma_v),
2311   NEONMAP0(vfmaq_v),
2312   NEONMAP2(vhadd_v, aarch64_neon_uhadd, aarch64_neon_shadd, Add1ArgType | UnsignedAlts),
2313   NEONMAP2(vhaddq_v, aarch64_neon_uhadd, aarch64_neon_shadd, Add1ArgType | UnsignedAlts),
2314   NEONMAP2(vhsub_v, aarch64_neon_uhsub, aarch64_neon_shsub, Add1ArgType | UnsignedAlts),
2315   NEONMAP2(vhsubq_v, aarch64_neon_uhsub, aarch64_neon_shsub, Add1ArgType | UnsignedAlts),
2316   NEONMAP0(vmovl_v),
2317   NEONMAP0(vmovn_v),
2318   NEONMAP1(vmul_v, aarch64_neon_pmul, Add1ArgType),
2319   NEONMAP1(vmulq_v, aarch64_neon_pmul, Add1ArgType),
2320   NEONMAP1(vpadd_v, aarch64_neon_addp, Add1ArgType),
2321   NEONMAP2(vpaddl_v, aarch64_neon_uaddlp, aarch64_neon_saddlp, UnsignedAlts),
2322   NEONMAP2(vpaddlq_v, aarch64_neon_uaddlp, aarch64_neon_saddlp, UnsignedAlts),
2323   NEONMAP1(vpaddq_v, aarch64_neon_addp, Add1ArgType),
2324   NEONMAP1(vqabs_v, aarch64_neon_sqabs, Add1ArgType),
2325   NEONMAP1(vqabsq_v, aarch64_neon_sqabs, Add1ArgType),
2326   NEONMAP2(vqadd_v, aarch64_neon_uqadd, aarch64_neon_sqadd, Add1ArgType | UnsignedAlts),
2327   NEONMAP2(vqaddq_v, aarch64_neon_uqadd, aarch64_neon_sqadd, Add1ArgType | UnsignedAlts),
2328   NEONMAP2(vqdmlal_v, aarch64_neon_sqdmull, aarch64_neon_sqadd, 0),
2329   NEONMAP2(vqdmlsl_v, aarch64_neon_sqdmull, aarch64_neon_sqsub, 0),
2330   NEONMAP1(vqdmulh_v, aarch64_neon_sqdmulh, Add1ArgType),
2331   NEONMAP1(vqdmulhq_v, aarch64_neon_sqdmulh, Add1ArgType),
2332   NEONMAP1(vqdmull_v, aarch64_neon_sqdmull, Add1ArgType),
2333   NEONMAP2(vqmovn_v, aarch64_neon_uqxtn, aarch64_neon_sqxtn, Add1ArgType | UnsignedAlts),
2334   NEONMAP1(vqmovun_v, aarch64_neon_sqxtun, Add1ArgType),
2335   NEONMAP1(vqneg_v, aarch64_neon_sqneg, Add1ArgType),
2336   NEONMAP1(vqnegq_v, aarch64_neon_sqneg, Add1ArgType),
2337   NEONMAP1(vqrdmulh_v, aarch64_neon_sqrdmulh, Add1ArgType),
2338   NEONMAP1(vqrdmulhq_v, aarch64_neon_sqrdmulh, Add1ArgType),
2339   NEONMAP2(vqrshl_v, aarch64_neon_uqrshl, aarch64_neon_sqrshl, Add1ArgType | UnsignedAlts),
2340   NEONMAP2(vqrshlq_v, aarch64_neon_uqrshl, aarch64_neon_sqrshl, Add1ArgType | UnsignedAlts),
2341   NEONMAP2(vqshl_n_v, aarch64_neon_uqshl, aarch64_neon_sqshl, UnsignedAlts),
2342   NEONMAP2(vqshl_v, aarch64_neon_uqshl, aarch64_neon_sqshl, Add1ArgType | UnsignedAlts),
2343   NEONMAP2(vqshlq_n_v, aarch64_neon_uqshl, aarch64_neon_sqshl,UnsignedAlts),
2344   NEONMAP2(vqshlq_v, aarch64_neon_uqshl, aarch64_neon_sqshl, Add1ArgType | UnsignedAlts),
2345   NEONMAP1(vqshlu_n_v, aarch64_neon_sqshlu, 0),
2346   NEONMAP1(vqshluq_n_v, aarch64_neon_sqshlu, 0),
2347   NEONMAP2(vqsub_v, aarch64_neon_uqsub, aarch64_neon_sqsub, Add1ArgType | UnsignedAlts),
2348   NEONMAP2(vqsubq_v, aarch64_neon_uqsub, aarch64_neon_sqsub, Add1ArgType | UnsignedAlts),
2349   NEONMAP1(vraddhn_v, aarch64_neon_raddhn, Add1ArgType),
2350   NEONMAP2(vrecpe_v, aarch64_neon_frecpe, aarch64_neon_urecpe, 0),
2351   NEONMAP2(vrecpeq_v, aarch64_neon_frecpe, aarch64_neon_urecpe, 0),
2352   NEONMAP1(vrecps_v, aarch64_neon_frecps, Add1ArgType),
2353   NEONMAP1(vrecpsq_v, aarch64_neon_frecps, Add1ArgType),
2354   NEONMAP2(vrhadd_v, aarch64_neon_urhadd, aarch64_neon_srhadd, Add1ArgType | UnsignedAlts),
2355   NEONMAP2(vrhaddq_v, aarch64_neon_urhadd, aarch64_neon_srhadd, Add1ArgType | UnsignedAlts),
2356   NEONMAP2(vrshl_v, aarch64_neon_urshl, aarch64_neon_srshl, Add1ArgType | UnsignedAlts),
2357   NEONMAP2(vrshlq_v, aarch64_neon_urshl, aarch64_neon_srshl, Add1ArgType | UnsignedAlts),
2358   NEONMAP2(vrshr_n_v, aarch64_neon_urshl, aarch64_neon_srshl, UnsignedAlts),
2359   NEONMAP2(vrshrq_n_v, aarch64_neon_urshl, aarch64_neon_srshl, UnsignedAlts),
2360   NEONMAP2(vrsqrte_v, aarch64_neon_frsqrte, aarch64_neon_ursqrte, 0),
2361   NEONMAP2(vrsqrteq_v, aarch64_neon_frsqrte, aarch64_neon_ursqrte, 0),
2362   NEONMAP1(vrsqrts_v, aarch64_neon_frsqrts, Add1ArgType),
2363   NEONMAP1(vrsqrtsq_v, aarch64_neon_frsqrts, Add1ArgType),
2364   NEONMAP1(vrsubhn_v, aarch64_neon_rsubhn, Add1ArgType),
2365   NEONMAP1(vsha1su0q_v, aarch64_crypto_sha1su0, 0),
2366   NEONMAP1(vsha1su1q_v, aarch64_crypto_sha1su1, 0),
2367   NEONMAP1(vsha256h2q_v, aarch64_crypto_sha256h2, 0),
2368   NEONMAP1(vsha256hq_v, aarch64_crypto_sha256h, 0),
2369   NEONMAP1(vsha256su0q_v, aarch64_crypto_sha256su0, 0),
2370   NEONMAP1(vsha256su1q_v, aarch64_crypto_sha256su1, 0),
2371   NEONMAP0(vshl_n_v),
2372   NEONMAP2(vshl_v, aarch64_neon_ushl, aarch64_neon_sshl, Add1ArgType | UnsignedAlts),
2373   NEONMAP0(vshll_n_v),
2374   NEONMAP0(vshlq_n_v),
2375   NEONMAP2(vshlq_v, aarch64_neon_ushl, aarch64_neon_sshl, Add1ArgType | UnsignedAlts),
2376   NEONMAP0(vshr_n_v),
2377   NEONMAP0(vshrn_n_v),
2378   NEONMAP0(vshrq_n_v),
2379   NEONMAP0(vsubhn_v),
2380   NEONMAP0(vtst_v),
2381   NEONMAP0(vtstq_v),
2382 };
2383 
2384 static NeonIntrinsicInfo AArch64SISDIntrinsicMap[] = {
2385   NEONMAP1(vabdd_f64, aarch64_sisd_fabd, Add1ArgType),
2386   NEONMAP1(vabds_f32, aarch64_sisd_fabd, Add1ArgType),
2387   NEONMAP1(vabsd_s64, aarch64_neon_abs, Add1ArgType),
2388   NEONMAP1(vaddlv_s32, aarch64_neon_saddlv, AddRetType | Add1ArgType),
2389   NEONMAP1(vaddlv_u32, aarch64_neon_uaddlv, AddRetType | Add1ArgType),
2390   NEONMAP1(vaddlvq_s32, aarch64_neon_saddlv, AddRetType | Add1ArgType),
2391   NEONMAP1(vaddlvq_u32, aarch64_neon_uaddlv, AddRetType | Add1ArgType),
2392   NEONMAP1(vaddv_f32, aarch64_neon_faddv, AddRetType | Add1ArgType),
2393   NEONMAP1(vaddv_s32, aarch64_neon_saddv, AddRetType | Add1ArgType),
2394   NEONMAP1(vaddv_u32, aarch64_neon_uaddv, AddRetType | Add1ArgType),
2395   NEONMAP1(vaddvq_f32, aarch64_neon_faddv, AddRetType | Add1ArgType),
2396   NEONMAP1(vaddvq_f64, aarch64_neon_faddv, AddRetType | Add1ArgType),
2397   NEONMAP1(vaddvq_s32, aarch64_neon_saddv, AddRetType | Add1ArgType),
2398   NEONMAP1(vaddvq_s64, aarch64_neon_saddv, AddRetType | Add1ArgType),
2399   NEONMAP1(vaddvq_u32, aarch64_neon_uaddv, AddRetType | Add1ArgType),
2400   NEONMAP1(vaddvq_u64, aarch64_neon_uaddv, AddRetType | Add1ArgType),
2401   NEONMAP1(vcaged_f64, aarch64_neon_facge, AddRetType | Add1ArgType),
2402   NEONMAP1(vcages_f32, aarch64_neon_facge, AddRetType | Add1ArgType),
2403   NEONMAP1(vcagtd_f64, aarch64_neon_facgt, AddRetType | Add1ArgType),
2404   NEONMAP1(vcagts_f32, aarch64_neon_facgt, AddRetType | Add1ArgType),
2405   NEONMAP1(vcaled_f64, aarch64_neon_facge, AddRetType | Add1ArgType),
2406   NEONMAP1(vcales_f32, aarch64_neon_facge, AddRetType | Add1ArgType),
2407   NEONMAP1(vcaltd_f64, aarch64_neon_facgt, AddRetType | Add1ArgType),
2408   NEONMAP1(vcalts_f32, aarch64_neon_facgt, AddRetType | Add1ArgType),
2409   NEONMAP1(vcvtad_s64_f64, aarch64_neon_fcvtas, AddRetType | Add1ArgType),
2410   NEONMAP1(vcvtad_u64_f64, aarch64_neon_fcvtau, AddRetType | Add1ArgType),
2411   NEONMAP1(vcvtas_s32_f32, aarch64_neon_fcvtas, AddRetType | Add1ArgType),
2412   NEONMAP1(vcvtas_u32_f32, aarch64_neon_fcvtau, AddRetType | Add1ArgType),
2413   NEONMAP1(vcvtd_n_f64_s64, aarch64_neon_vcvtfxs2fp, AddRetType | Add1ArgType),
2414   NEONMAP1(vcvtd_n_f64_u64, aarch64_neon_vcvtfxu2fp, AddRetType | Add1ArgType),
2415   NEONMAP1(vcvtd_n_s64_f64, aarch64_neon_vcvtfp2fxs, AddRetType | Add1ArgType),
2416   NEONMAP1(vcvtd_n_u64_f64, aarch64_neon_vcvtfp2fxu, AddRetType | Add1ArgType),
2417   NEONMAP1(vcvtmd_s64_f64, aarch64_neon_fcvtms, AddRetType | Add1ArgType),
2418   NEONMAP1(vcvtmd_u64_f64, aarch64_neon_fcvtmu, AddRetType | Add1ArgType),
2419   NEONMAP1(vcvtms_s32_f32, aarch64_neon_fcvtms, AddRetType | Add1ArgType),
2420   NEONMAP1(vcvtms_u32_f32, aarch64_neon_fcvtmu, AddRetType | Add1ArgType),
2421   NEONMAP1(vcvtnd_s64_f64, aarch64_neon_fcvtns, AddRetType | Add1ArgType),
2422   NEONMAP1(vcvtnd_u64_f64, aarch64_neon_fcvtnu, AddRetType | Add1ArgType),
2423   NEONMAP1(vcvtns_s32_f32, aarch64_neon_fcvtns, AddRetType | Add1ArgType),
2424   NEONMAP1(vcvtns_u32_f32, aarch64_neon_fcvtnu, AddRetType | Add1ArgType),
2425   NEONMAP1(vcvtpd_s64_f64, aarch64_neon_fcvtps, AddRetType | Add1ArgType),
2426   NEONMAP1(vcvtpd_u64_f64, aarch64_neon_fcvtpu, AddRetType | Add1ArgType),
2427   NEONMAP1(vcvtps_s32_f32, aarch64_neon_fcvtps, AddRetType | Add1ArgType),
2428   NEONMAP1(vcvtps_u32_f32, aarch64_neon_fcvtpu, AddRetType | Add1ArgType),
2429   NEONMAP1(vcvts_n_f32_s32, aarch64_neon_vcvtfxs2fp, AddRetType | Add1ArgType),
2430   NEONMAP1(vcvts_n_f32_u32, aarch64_neon_vcvtfxu2fp, AddRetType | Add1ArgType),
2431   NEONMAP1(vcvts_n_s32_f32, aarch64_neon_vcvtfp2fxs, AddRetType | Add1ArgType),
2432   NEONMAP1(vcvts_n_u32_f32, aarch64_neon_vcvtfp2fxu, AddRetType | Add1ArgType),
2433   NEONMAP1(vcvtxd_f32_f64, aarch64_sisd_fcvtxn, 0),
2434   NEONMAP1(vmaxnmv_f32, aarch64_neon_fmaxnmv, AddRetType | Add1ArgType),
2435   NEONMAP1(vmaxnmvq_f32, aarch64_neon_fmaxnmv, AddRetType | Add1ArgType),
2436   NEONMAP1(vmaxnmvq_f64, aarch64_neon_fmaxnmv, AddRetType | Add1ArgType),
2437   NEONMAP1(vmaxv_f32, aarch64_neon_fmaxv, AddRetType | Add1ArgType),
2438   NEONMAP1(vmaxv_s32, aarch64_neon_smaxv, AddRetType | Add1ArgType),
2439   NEONMAP1(vmaxv_u32, aarch64_neon_umaxv, AddRetType | Add1ArgType),
2440   NEONMAP1(vmaxvq_f32, aarch64_neon_fmaxv, AddRetType | Add1ArgType),
2441   NEONMAP1(vmaxvq_f64, aarch64_neon_fmaxv, AddRetType | Add1ArgType),
2442   NEONMAP1(vmaxvq_s32, aarch64_neon_smaxv, AddRetType | Add1ArgType),
2443   NEONMAP1(vmaxvq_u32, aarch64_neon_umaxv, AddRetType | Add1ArgType),
2444   NEONMAP1(vminnmv_f32, aarch64_neon_fminnmv, AddRetType | Add1ArgType),
2445   NEONMAP1(vminnmvq_f32, aarch64_neon_fminnmv, AddRetType | Add1ArgType),
2446   NEONMAP1(vminnmvq_f64, aarch64_neon_fminnmv, AddRetType | Add1ArgType),
2447   NEONMAP1(vminv_f32, aarch64_neon_fminv, AddRetType | Add1ArgType),
2448   NEONMAP1(vminv_s32, aarch64_neon_sminv, AddRetType | Add1ArgType),
2449   NEONMAP1(vminv_u32, aarch64_neon_uminv, AddRetType | Add1ArgType),
2450   NEONMAP1(vminvq_f32, aarch64_neon_fminv, AddRetType | Add1ArgType),
2451   NEONMAP1(vminvq_f64, aarch64_neon_fminv, AddRetType | Add1ArgType),
2452   NEONMAP1(vminvq_s32, aarch64_neon_sminv, AddRetType | Add1ArgType),
2453   NEONMAP1(vminvq_u32, aarch64_neon_uminv, AddRetType | Add1ArgType),
2454   NEONMAP1(vmull_p64, aarch64_neon_pmull64, 0),
2455   NEONMAP1(vmulxd_f64, aarch64_neon_fmulx, Add1ArgType),
2456   NEONMAP1(vmulxs_f32, aarch64_neon_fmulx, Add1ArgType),
2457   NEONMAP1(vpaddd_s64, aarch64_neon_uaddv, AddRetType | Add1ArgType),
2458   NEONMAP1(vpaddd_u64, aarch64_neon_uaddv, AddRetType | Add1ArgType),
2459   NEONMAP1(vpmaxnmqd_f64, aarch64_neon_fmaxnmv, AddRetType | Add1ArgType),
2460   NEONMAP1(vpmaxnms_f32, aarch64_neon_fmaxnmv, AddRetType | Add1ArgType),
2461   NEONMAP1(vpmaxqd_f64, aarch64_neon_fmaxv, AddRetType | Add1ArgType),
2462   NEONMAP1(vpmaxs_f32, aarch64_neon_fmaxv, AddRetType | Add1ArgType),
2463   NEONMAP1(vpminnmqd_f64, aarch64_neon_fminnmv, AddRetType | Add1ArgType),
2464   NEONMAP1(vpminnms_f32, aarch64_neon_fminnmv, AddRetType | Add1ArgType),
2465   NEONMAP1(vpminqd_f64, aarch64_neon_fminv, AddRetType | Add1ArgType),
2466   NEONMAP1(vpmins_f32, aarch64_neon_fminv, AddRetType | Add1ArgType),
2467   NEONMAP1(vqabsb_s8, aarch64_neon_sqabs, Vectorize1ArgType | Use64BitVectors),
2468   NEONMAP1(vqabsd_s64, aarch64_neon_sqabs, Add1ArgType),
2469   NEONMAP1(vqabsh_s16, aarch64_neon_sqabs, Vectorize1ArgType | Use64BitVectors),
2470   NEONMAP1(vqabss_s32, aarch64_neon_sqabs, Add1ArgType),
2471   NEONMAP1(vqaddb_s8, aarch64_neon_sqadd, Vectorize1ArgType | Use64BitVectors),
2472   NEONMAP1(vqaddb_u8, aarch64_neon_uqadd, Vectorize1ArgType | Use64BitVectors),
2473   NEONMAP1(vqaddd_s64, aarch64_neon_sqadd, Add1ArgType),
2474   NEONMAP1(vqaddd_u64, aarch64_neon_uqadd, Add1ArgType),
2475   NEONMAP1(vqaddh_s16, aarch64_neon_sqadd, Vectorize1ArgType | Use64BitVectors),
2476   NEONMAP1(vqaddh_u16, aarch64_neon_uqadd, Vectorize1ArgType | Use64BitVectors),
2477   NEONMAP1(vqadds_s32, aarch64_neon_sqadd, Add1ArgType),
2478   NEONMAP1(vqadds_u32, aarch64_neon_uqadd, Add1ArgType),
2479   NEONMAP1(vqdmulhh_s16, aarch64_neon_sqdmulh, Vectorize1ArgType | Use64BitVectors),
2480   NEONMAP1(vqdmulhs_s32, aarch64_neon_sqdmulh, Add1ArgType),
2481   NEONMAP1(vqdmullh_s16, aarch64_neon_sqdmull, VectorRet | Use128BitVectors),
2482   NEONMAP1(vqdmulls_s32, aarch64_neon_sqdmulls_scalar, 0),
2483   NEONMAP1(vqmovnd_s64, aarch64_neon_scalar_sqxtn, AddRetType | Add1ArgType),
2484   NEONMAP1(vqmovnd_u64, aarch64_neon_scalar_uqxtn, AddRetType | Add1ArgType),
2485   NEONMAP1(vqmovnh_s16, aarch64_neon_sqxtn, VectorRet | Use64BitVectors),
2486   NEONMAP1(vqmovnh_u16, aarch64_neon_uqxtn, VectorRet | Use64BitVectors),
2487   NEONMAP1(vqmovns_s32, aarch64_neon_sqxtn, VectorRet | Use64BitVectors),
2488   NEONMAP1(vqmovns_u32, aarch64_neon_uqxtn, VectorRet | Use64BitVectors),
2489   NEONMAP1(vqmovund_s64, aarch64_neon_scalar_sqxtun, AddRetType | Add1ArgType),
2490   NEONMAP1(vqmovunh_s16, aarch64_neon_sqxtun, VectorRet | Use64BitVectors),
2491   NEONMAP1(vqmovuns_s32, aarch64_neon_sqxtun, VectorRet | Use64BitVectors),
2492   NEONMAP1(vqnegb_s8, aarch64_neon_sqneg, Vectorize1ArgType | Use64BitVectors),
2493   NEONMAP1(vqnegd_s64, aarch64_neon_sqneg, Add1ArgType),
2494   NEONMAP1(vqnegh_s16, aarch64_neon_sqneg, Vectorize1ArgType | Use64BitVectors),
2495   NEONMAP1(vqnegs_s32, aarch64_neon_sqneg, Add1ArgType),
2496   NEONMAP1(vqrdmulhh_s16, aarch64_neon_sqrdmulh, Vectorize1ArgType | Use64BitVectors),
2497   NEONMAP1(vqrdmulhs_s32, aarch64_neon_sqrdmulh, Add1ArgType),
2498   NEONMAP1(vqrshlb_s8, aarch64_neon_sqrshl, Vectorize1ArgType | Use64BitVectors),
2499   NEONMAP1(vqrshlb_u8, aarch64_neon_uqrshl, Vectorize1ArgType | Use64BitVectors),
2500   NEONMAP1(vqrshld_s64, aarch64_neon_sqrshl, Add1ArgType),
2501   NEONMAP1(vqrshld_u64, aarch64_neon_uqrshl, Add1ArgType),
2502   NEONMAP1(vqrshlh_s16, aarch64_neon_sqrshl, Vectorize1ArgType | Use64BitVectors),
2503   NEONMAP1(vqrshlh_u16, aarch64_neon_uqrshl, Vectorize1ArgType | Use64BitVectors),
2504   NEONMAP1(vqrshls_s32, aarch64_neon_sqrshl, Add1ArgType),
2505   NEONMAP1(vqrshls_u32, aarch64_neon_uqrshl, Add1ArgType),
2506   NEONMAP1(vqrshrnd_n_s64, aarch64_neon_sqrshrn, AddRetType),
2507   NEONMAP1(vqrshrnd_n_u64, aarch64_neon_uqrshrn, AddRetType),
2508   NEONMAP1(vqrshrnh_n_s16, aarch64_neon_sqrshrn, VectorRet | Use64BitVectors),
2509   NEONMAP1(vqrshrnh_n_u16, aarch64_neon_uqrshrn, VectorRet | Use64BitVectors),
2510   NEONMAP1(vqrshrns_n_s32, aarch64_neon_sqrshrn, VectorRet | Use64BitVectors),
2511   NEONMAP1(vqrshrns_n_u32, aarch64_neon_uqrshrn, VectorRet | Use64BitVectors),
2512   NEONMAP1(vqrshrund_n_s64, aarch64_neon_sqrshrun, AddRetType),
2513   NEONMAP1(vqrshrunh_n_s16, aarch64_neon_sqrshrun, VectorRet | Use64BitVectors),
2514   NEONMAP1(vqrshruns_n_s32, aarch64_neon_sqrshrun, VectorRet | Use64BitVectors),
2515   NEONMAP1(vqshlb_n_s8, aarch64_neon_sqshl, Vectorize1ArgType | Use64BitVectors),
2516   NEONMAP1(vqshlb_n_u8, aarch64_neon_uqshl, Vectorize1ArgType | Use64BitVectors),
2517   NEONMAP1(vqshlb_s8, aarch64_neon_sqshl, Vectorize1ArgType | Use64BitVectors),
2518   NEONMAP1(vqshlb_u8, aarch64_neon_uqshl, Vectorize1ArgType | Use64BitVectors),
2519   NEONMAP1(vqshld_s64, aarch64_neon_sqshl, Add1ArgType),
2520   NEONMAP1(vqshld_u64, aarch64_neon_uqshl, Add1ArgType),
2521   NEONMAP1(vqshlh_n_s16, aarch64_neon_sqshl, Vectorize1ArgType | Use64BitVectors),
2522   NEONMAP1(vqshlh_n_u16, aarch64_neon_uqshl, Vectorize1ArgType | Use64BitVectors),
2523   NEONMAP1(vqshlh_s16, aarch64_neon_sqshl, Vectorize1ArgType | Use64BitVectors),
2524   NEONMAP1(vqshlh_u16, aarch64_neon_uqshl, Vectorize1ArgType | Use64BitVectors),
2525   NEONMAP1(vqshls_n_s32, aarch64_neon_sqshl, Add1ArgType),
2526   NEONMAP1(vqshls_n_u32, aarch64_neon_uqshl, Add1ArgType),
2527   NEONMAP1(vqshls_s32, aarch64_neon_sqshl, Add1ArgType),
2528   NEONMAP1(vqshls_u32, aarch64_neon_uqshl, Add1ArgType),
2529   NEONMAP1(vqshlub_n_s8, aarch64_neon_sqshlu, Vectorize1ArgType | Use64BitVectors),
2530   NEONMAP1(vqshluh_n_s16, aarch64_neon_sqshlu, Vectorize1ArgType | Use64BitVectors),
2531   NEONMAP1(vqshlus_n_s32, aarch64_neon_sqshlu, Add1ArgType),
2532   NEONMAP1(vqshrnd_n_s64, aarch64_neon_sqshrn, AddRetType),
2533   NEONMAP1(vqshrnd_n_u64, aarch64_neon_uqshrn, AddRetType),
2534   NEONMAP1(vqshrnh_n_s16, aarch64_neon_sqshrn, VectorRet | Use64BitVectors),
2535   NEONMAP1(vqshrnh_n_u16, aarch64_neon_uqshrn, VectorRet | Use64BitVectors),
2536   NEONMAP1(vqshrns_n_s32, aarch64_neon_sqshrn, VectorRet | Use64BitVectors),
2537   NEONMAP1(vqshrns_n_u32, aarch64_neon_uqshrn, VectorRet | Use64BitVectors),
2538   NEONMAP1(vqshrund_n_s64, aarch64_neon_sqshrun, AddRetType),
2539   NEONMAP1(vqshrunh_n_s16, aarch64_neon_sqshrun, VectorRet | Use64BitVectors),
2540   NEONMAP1(vqshruns_n_s32, aarch64_neon_sqshrun, VectorRet | Use64BitVectors),
2541   NEONMAP1(vqsubb_s8, aarch64_neon_sqsub, Vectorize1ArgType | Use64BitVectors),
2542   NEONMAP1(vqsubb_u8, aarch64_neon_uqsub, Vectorize1ArgType | Use64BitVectors),
2543   NEONMAP1(vqsubd_s64, aarch64_neon_sqsub, Add1ArgType),
2544   NEONMAP1(vqsubd_u64, aarch64_neon_uqsub, Add1ArgType),
2545   NEONMAP1(vqsubh_s16, aarch64_neon_sqsub, Vectorize1ArgType | Use64BitVectors),
2546   NEONMAP1(vqsubh_u16, aarch64_neon_uqsub, Vectorize1ArgType | Use64BitVectors),
2547   NEONMAP1(vqsubs_s32, aarch64_neon_sqsub, Add1ArgType),
2548   NEONMAP1(vqsubs_u32, aarch64_neon_uqsub, Add1ArgType),
2549   NEONMAP1(vrecped_f64, aarch64_neon_frecpe, Add1ArgType),
2550   NEONMAP1(vrecpes_f32, aarch64_neon_frecpe, Add1ArgType),
2551   NEONMAP1(vrecpxd_f64, aarch64_neon_frecpx, Add1ArgType),
2552   NEONMAP1(vrecpxs_f32, aarch64_neon_frecpx, Add1ArgType),
2553   NEONMAP1(vrshld_s64, aarch64_neon_srshl, Add1ArgType),
2554   NEONMAP1(vrshld_u64, aarch64_neon_urshl, Add1ArgType),
2555   NEONMAP1(vrsqrted_f64, aarch64_neon_frsqrte, Add1ArgType),
2556   NEONMAP1(vrsqrtes_f32, aarch64_neon_frsqrte, Add1ArgType),
2557   NEONMAP1(vrsqrtsd_f64, aarch64_neon_frsqrts, Add1ArgType),
2558   NEONMAP1(vrsqrtss_f32, aarch64_neon_frsqrts, Add1ArgType),
2559   NEONMAP1(vsha1cq_u32, aarch64_crypto_sha1c, 0),
2560   NEONMAP1(vsha1h_u32, aarch64_crypto_sha1h, 0),
2561   NEONMAP1(vsha1mq_u32, aarch64_crypto_sha1m, 0),
2562   NEONMAP1(vsha1pq_u32, aarch64_crypto_sha1p, 0),
2563   NEONMAP1(vshld_s64, aarch64_neon_sshl, Add1ArgType),
2564   NEONMAP1(vshld_u64, aarch64_neon_ushl, Add1ArgType),
2565   NEONMAP1(vslid_n_s64, aarch64_neon_vsli, Vectorize1ArgType),
2566   NEONMAP1(vslid_n_u64, aarch64_neon_vsli, Vectorize1ArgType),
2567   NEONMAP1(vsqaddb_u8, aarch64_neon_usqadd, Vectorize1ArgType | Use64BitVectors),
2568   NEONMAP1(vsqaddd_u64, aarch64_neon_usqadd, Add1ArgType),
2569   NEONMAP1(vsqaddh_u16, aarch64_neon_usqadd, Vectorize1ArgType | Use64BitVectors),
2570   NEONMAP1(vsqadds_u32, aarch64_neon_usqadd, Add1ArgType),
2571   NEONMAP1(vsrid_n_s64, aarch64_neon_vsri, Vectorize1ArgType),
2572   NEONMAP1(vsrid_n_u64, aarch64_neon_vsri, Vectorize1ArgType),
2573   NEONMAP1(vuqaddb_s8, aarch64_neon_suqadd, Vectorize1ArgType | Use64BitVectors),
2574   NEONMAP1(vuqaddd_s64, aarch64_neon_suqadd, Add1ArgType),
2575   NEONMAP1(vuqaddh_s16, aarch64_neon_suqadd, Vectorize1ArgType | Use64BitVectors),
2576   NEONMAP1(vuqadds_s32, aarch64_neon_suqadd, Add1ArgType),
2577 };
2578 
2579 #undef NEONMAP0
2580 #undef NEONMAP1
2581 #undef NEONMAP2
2582 
2583 static bool NEONSIMDIntrinsicsProvenSorted = false;
2584 
2585 static bool AArch64SIMDIntrinsicsProvenSorted = false;
2586 static bool AArch64SISDIntrinsicsProvenSorted = false;
2587 
2588 
2589 static const NeonIntrinsicInfo *
2590 findNeonIntrinsicInMap(ArrayRef<NeonIntrinsicInfo> IntrinsicMap,
2591                        unsigned BuiltinID, bool &MapProvenSorted) {
2592 
2593 #ifndef NDEBUG
2594   if (!MapProvenSorted) {
2595     // FIXME: use std::is_sorted once C++11 is allowed
2596     for (unsigned i = 0; i < IntrinsicMap.size() - 1; ++i)
2597       assert(IntrinsicMap[i].BuiltinID <= IntrinsicMap[i + 1].BuiltinID);
2598     MapProvenSorted = true;
2599   }
2600 #endif
2601 
2602   const NeonIntrinsicInfo *Builtin =
2603       std::lower_bound(IntrinsicMap.begin(), IntrinsicMap.end(), BuiltinID);
2604 
2605   if (Builtin != IntrinsicMap.end() && Builtin->BuiltinID == BuiltinID)
2606     return Builtin;
2607 
2608   return nullptr;
2609 }
2610 
2611 Function *CodeGenFunction::LookupNeonLLVMIntrinsic(unsigned IntrinsicID,
2612                                                    unsigned Modifier,
2613                                                    llvm::Type *ArgType,
2614                                                    const CallExpr *E) {
2615   int VectorSize = 0;
2616   if (Modifier & Use64BitVectors)
2617     VectorSize = 64;
2618   else if (Modifier & Use128BitVectors)
2619     VectorSize = 128;
2620 
2621   // Return type.
2622   SmallVector<llvm::Type *, 3> Tys;
2623   if (Modifier & AddRetType) {
2624     llvm::Type *Ty = ConvertType(E->getCallReturnType(getContext()));
2625     if (Modifier & VectorizeRetType)
2626       Ty = llvm::VectorType::get(
2627           Ty, VectorSize ? VectorSize / Ty->getPrimitiveSizeInBits() : 1);
2628 
2629     Tys.push_back(Ty);
2630   }
2631 
2632   // Arguments.
2633   if (Modifier & VectorizeArgTypes) {
2634     int Elts = VectorSize ? VectorSize / ArgType->getPrimitiveSizeInBits() : 1;
2635     ArgType = llvm::VectorType::get(ArgType, Elts);
2636   }
2637 
2638   if (Modifier & (Add1ArgType | Add2ArgTypes))
2639     Tys.push_back(ArgType);
2640 
2641   if (Modifier & Add2ArgTypes)
2642     Tys.push_back(ArgType);
2643 
2644   if (Modifier & InventFloatType)
2645     Tys.push_back(FloatTy);
2646 
2647   return CGM.getIntrinsic(IntrinsicID, Tys);
2648 }
2649 
2650 static Value *EmitCommonNeonSISDBuiltinExpr(CodeGenFunction &CGF,
2651                                             const NeonIntrinsicInfo &SISDInfo,
2652                                             SmallVectorImpl<Value *> &Ops,
2653                                             const CallExpr *E) {
2654   unsigned BuiltinID = SISDInfo.BuiltinID;
2655   unsigned int Int = SISDInfo.LLVMIntrinsic;
2656   unsigned Modifier = SISDInfo.TypeModifier;
2657   const char *s = SISDInfo.NameHint;
2658 
2659   switch (BuiltinID) {
2660   case NEON::BI__builtin_neon_vcled_s64:
2661   case NEON::BI__builtin_neon_vcled_u64:
2662   case NEON::BI__builtin_neon_vcles_f32:
2663   case NEON::BI__builtin_neon_vcled_f64:
2664   case NEON::BI__builtin_neon_vcltd_s64:
2665   case NEON::BI__builtin_neon_vcltd_u64:
2666   case NEON::BI__builtin_neon_vclts_f32:
2667   case NEON::BI__builtin_neon_vcltd_f64:
2668   case NEON::BI__builtin_neon_vcales_f32:
2669   case NEON::BI__builtin_neon_vcaled_f64:
2670   case NEON::BI__builtin_neon_vcalts_f32:
2671   case NEON::BI__builtin_neon_vcaltd_f64:
2672     // Only one direction of comparisons actually exist, cmle is actually a cmge
2673     // with swapped operands. The table gives us the right intrinsic but we
2674     // still need to do the swap.
2675     std::swap(Ops[0], Ops[1]);
2676     break;
2677   }
2678 
2679   assert(Int && "Generic code assumes a valid intrinsic");
2680 
2681   // Determine the type(s) of this overloaded AArch64 intrinsic.
2682   const Expr *Arg = E->getArg(0);
2683   llvm::Type *ArgTy = CGF.ConvertType(Arg->getType());
2684   Function *F = CGF.LookupNeonLLVMIntrinsic(Int, Modifier, ArgTy, E);
2685 
2686   int j = 0;
2687   ConstantInt *C0 = ConstantInt::get(CGF.SizeTy, 0);
2688   for (Function::const_arg_iterator ai = F->arg_begin(), ae = F->arg_end();
2689        ai != ae; ++ai, ++j) {
2690     llvm::Type *ArgTy = ai->getType();
2691     if (Ops[j]->getType()->getPrimitiveSizeInBits() ==
2692              ArgTy->getPrimitiveSizeInBits())
2693       continue;
2694 
2695     assert(ArgTy->isVectorTy() && !Ops[j]->getType()->isVectorTy());
2696     // The constant argument to an _n_ intrinsic always has Int32Ty, so truncate
2697     // it before inserting.
2698     Ops[j] =
2699         CGF.Builder.CreateTruncOrBitCast(Ops[j], ArgTy->getVectorElementType());
2700     Ops[j] =
2701         CGF.Builder.CreateInsertElement(UndefValue::get(ArgTy), Ops[j], C0);
2702   }
2703 
2704   Value *Result = CGF.EmitNeonCall(F, Ops, s);
2705   llvm::Type *ResultType = CGF.ConvertType(E->getType());
2706   if (ResultType->getPrimitiveSizeInBits() <
2707       Result->getType()->getPrimitiveSizeInBits())
2708     return CGF.Builder.CreateExtractElement(Result, C0);
2709 
2710   return CGF.Builder.CreateBitCast(Result, ResultType, s);
2711 }
2712 
2713 Value *CodeGenFunction::EmitCommonNeonBuiltinExpr(
2714     unsigned BuiltinID, unsigned LLVMIntrinsic, unsigned AltLLVMIntrinsic,
2715     const char *NameHint, unsigned Modifier, const CallExpr *E,
2716     SmallVectorImpl<llvm::Value *> &Ops, Address PtrOp0, Address PtrOp1) {
2717   // Get the last argument, which specifies the vector type.
2718   llvm::APSInt NeonTypeConst;
2719   const Expr *Arg = E->getArg(E->getNumArgs() - 1);
2720   if (!Arg->isIntegerConstantExpr(NeonTypeConst, getContext()))
2721     return nullptr;
2722 
2723   // Determine the type of this overloaded NEON intrinsic.
2724   NeonTypeFlags Type(NeonTypeConst.getZExtValue());
2725   bool Usgn = Type.isUnsigned();
2726   bool Quad = Type.isQuad();
2727 
2728   llvm::VectorType *VTy = GetNeonType(this, Type);
2729   llvm::Type *Ty = VTy;
2730   if (!Ty)
2731     return nullptr;
2732 
2733   auto getAlignmentValue32 = [&](Address addr) -> Value* {
2734     return Builder.getInt32(addr.getAlignment().getQuantity());
2735   };
2736 
2737   unsigned Int = LLVMIntrinsic;
2738   if ((Modifier & UnsignedAlts) && !Usgn)
2739     Int = AltLLVMIntrinsic;
2740 
2741   switch (BuiltinID) {
2742   default: break;
2743   case NEON::BI__builtin_neon_vabs_v:
2744   case NEON::BI__builtin_neon_vabsq_v:
2745     if (VTy->getElementType()->isFloatingPointTy())
2746       return EmitNeonCall(CGM.getIntrinsic(Intrinsic::fabs, Ty), Ops, "vabs");
2747     return EmitNeonCall(CGM.getIntrinsic(LLVMIntrinsic, Ty), Ops, "vabs");
2748   case NEON::BI__builtin_neon_vaddhn_v: {
2749     llvm::VectorType *SrcTy =
2750         llvm::VectorType::getExtendedElementVectorType(VTy);
2751 
2752     // %sum = add <4 x i32> %lhs, %rhs
2753     Ops[0] = Builder.CreateBitCast(Ops[0], SrcTy);
2754     Ops[1] = Builder.CreateBitCast(Ops[1], SrcTy);
2755     Ops[0] = Builder.CreateAdd(Ops[0], Ops[1], "vaddhn");
2756 
2757     // %high = lshr <4 x i32> %sum, <i32 16, i32 16, i32 16, i32 16>
2758     Constant *ShiftAmt =
2759         ConstantInt::get(SrcTy, SrcTy->getScalarSizeInBits() / 2);
2760     Ops[0] = Builder.CreateLShr(Ops[0], ShiftAmt, "vaddhn");
2761 
2762     // %res = trunc <4 x i32> %high to <4 x i16>
2763     return Builder.CreateTrunc(Ops[0], VTy, "vaddhn");
2764   }
2765   case NEON::BI__builtin_neon_vcale_v:
2766   case NEON::BI__builtin_neon_vcaleq_v:
2767   case NEON::BI__builtin_neon_vcalt_v:
2768   case NEON::BI__builtin_neon_vcaltq_v:
2769     std::swap(Ops[0], Ops[1]);
2770   case NEON::BI__builtin_neon_vcage_v:
2771   case NEON::BI__builtin_neon_vcageq_v:
2772   case NEON::BI__builtin_neon_vcagt_v:
2773   case NEON::BI__builtin_neon_vcagtq_v: {
2774     llvm::Type *VecFlt = llvm::VectorType::get(
2775         VTy->getScalarSizeInBits() == 32 ? FloatTy : DoubleTy,
2776         VTy->getNumElements());
2777     llvm::Type *Tys[] = { VTy, VecFlt };
2778     Function *F = CGM.getIntrinsic(LLVMIntrinsic, Tys);
2779     return EmitNeonCall(F, Ops, NameHint);
2780   }
2781   case NEON::BI__builtin_neon_vclz_v:
2782   case NEON::BI__builtin_neon_vclzq_v:
2783     // We generate target-independent intrinsic, which needs a second argument
2784     // for whether or not clz of zero is undefined; on ARM it isn't.
2785     Ops.push_back(Builder.getInt1(getTarget().isCLZForZeroUndef()));
2786     break;
2787   case NEON::BI__builtin_neon_vcvt_f32_v:
2788   case NEON::BI__builtin_neon_vcvtq_f32_v:
2789     Ops[0] = Builder.CreateBitCast(Ops[0], Ty);
2790     Ty = GetNeonType(this, NeonTypeFlags(NeonTypeFlags::Float32, false, Quad));
2791     return Usgn ? Builder.CreateUIToFP(Ops[0], Ty, "vcvt")
2792                 : Builder.CreateSIToFP(Ops[0], Ty, "vcvt");
2793   case NEON::BI__builtin_neon_vcvt_n_f32_v:
2794   case NEON::BI__builtin_neon_vcvt_n_f64_v:
2795   case NEON::BI__builtin_neon_vcvtq_n_f32_v:
2796   case NEON::BI__builtin_neon_vcvtq_n_f64_v: {
2797     llvm::Type *Tys[2] = { GetFloatNeonType(this, Type), Ty };
2798     Int = Usgn ? LLVMIntrinsic : AltLLVMIntrinsic;
2799     Function *F = CGM.getIntrinsic(Int, Tys);
2800     return EmitNeonCall(F, Ops, "vcvt_n");
2801   }
2802   case NEON::BI__builtin_neon_vcvt_n_s32_v:
2803   case NEON::BI__builtin_neon_vcvt_n_u32_v:
2804   case NEON::BI__builtin_neon_vcvt_n_s64_v:
2805   case NEON::BI__builtin_neon_vcvt_n_u64_v:
2806   case NEON::BI__builtin_neon_vcvtq_n_s32_v:
2807   case NEON::BI__builtin_neon_vcvtq_n_u32_v:
2808   case NEON::BI__builtin_neon_vcvtq_n_s64_v:
2809   case NEON::BI__builtin_neon_vcvtq_n_u64_v: {
2810     llvm::Type *Tys[2] = { Ty, GetFloatNeonType(this, Type) };
2811     Function *F = CGM.getIntrinsic(LLVMIntrinsic, Tys);
2812     return EmitNeonCall(F, Ops, "vcvt_n");
2813   }
2814   case NEON::BI__builtin_neon_vcvt_s32_v:
2815   case NEON::BI__builtin_neon_vcvt_u32_v:
2816   case NEON::BI__builtin_neon_vcvt_s64_v:
2817   case NEON::BI__builtin_neon_vcvt_u64_v:
2818   case NEON::BI__builtin_neon_vcvtq_s32_v:
2819   case NEON::BI__builtin_neon_vcvtq_u32_v:
2820   case NEON::BI__builtin_neon_vcvtq_s64_v:
2821   case NEON::BI__builtin_neon_vcvtq_u64_v: {
2822     Ops[0] = Builder.CreateBitCast(Ops[0], GetFloatNeonType(this, Type));
2823     return Usgn ? Builder.CreateFPToUI(Ops[0], Ty, "vcvt")
2824                 : Builder.CreateFPToSI(Ops[0], Ty, "vcvt");
2825   }
2826   case NEON::BI__builtin_neon_vcvta_s32_v:
2827   case NEON::BI__builtin_neon_vcvta_s64_v:
2828   case NEON::BI__builtin_neon_vcvta_u32_v:
2829   case NEON::BI__builtin_neon_vcvta_u64_v:
2830   case NEON::BI__builtin_neon_vcvtaq_s32_v:
2831   case NEON::BI__builtin_neon_vcvtaq_s64_v:
2832   case NEON::BI__builtin_neon_vcvtaq_u32_v:
2833   case NEON::BI__builtin_neon_vcvtaq_u64_v:
2834   case NEON::BI__builtin_neon_vcvtn_s32_v:
2835   case NEON::BI__builtin_neon_vcvtn_s64_v:
2836   case NEON::BI__builtin_neon_vcvtn_u32_v:
2837   case NEON::BI__builtin_neon_vcvtn_u64_v:
2838   case NEON::BI__builtin_neon_vcvtnq_s32_v:
2839   case NEON::BI__builtin_neon_vcvtnq_s64_v:
2840   case NEON::BI__builtin_neon_vcvtnq_u32_v:
2841   case NEON::BI__builtin_neon_vcvtnq_u64_v:
2842   case NEON::BI__builtin_neon_vcvtp_s32_v:
2843   case NEON::BI__builtin_neon_vcvtp_s64_v:
2844   case NEON::BI__builtin_neon_vcvtp_u32_v:
2845   case NEON::BI__builtin_neon_vcvtp_u64_v:
2846   case NEON::BI__builtin_neon_vcvtpq_s32_v:
2847   case NEON::BI__builtin_neon_vcvtpq_s64_v:
2848   case NEON::BI__builtin_neon_vcvtpq_u32_v:
2849   case NEON::BI__builtin_neon_vcvtpq_u64_v:
2850   case NEON::BI__builtin_neon_vcvtm_s32_v:
2851   case NEON::BI__builtin_neon_vcvtm_s64_v:
2852   case NEON::BI__builtin_neon_vcvtm_u32_v:
2853   case NEON::BI__builtin_neon_vcvtm_u64_v:
2854   case NEON::BI__builtin_neon_vcvtmq_s32_v:
2855   case NEON::BI__builtin_neon_vcvtmq_s64_v:
2856   case NEON::BI__builtin_neon_vcvtmq_u32_v:
2857   case NEON::BI__builtin_neon_vcvtmq_u64_v: {
2858     llvm::Type *Tys[2] = { Ty, GetFloatNeonType(this, Type) };
2859     return EmitNeonCall(CGM.getIntrinsic(LLVMIntrinsic, Tys), Ops, NameHint);
2860   }
2861   case NEON::BI__builtin_neon_vext_v:
2862   case NEON::BI__builtin_neon_vextq_v: {
2863     int CV = cast<ConstantInt>(Ops[2])->getSExtValue();
2864     SmallVector<Constant*, 16> Indices;
2865     for (unsigned i = 0, e = VTy->getNumElements(); i != e; ++i)
2866       Indices.push_back(ConstantInt::get(Int32Ty, i+CV));
2867 
2868     Ops[0] = Builder.CreateBitCast(Ops[0], Ty);
2869     Ops[1] = Builder.CreateBitCast(Ops[1], Ty);
2870     Value *SV = llvm::ConstantVector::get(Indices);
2871     return Builder.CreateShuffleVector(Ops[0], Ops[1], SV, "vext");
2872   }
2873   case NEON::BI__builtin_neon_vfma_v:
2874   case NEON::BI__builtin_neon_vfmaq_v: {
2875     Value *F = CGM.getIntrinsic(Intrinsic::fma, Ty);
2876     Ops[0] = Builder.CreateBitCast(Ops[0], Ty);
2877     Ops[1] = Builder.CreateBitCast(Ops[1], Ty);
2878     Ops[2] = Builder.CreateBitCast(Ops[2], Ty);
2879 
2880     // NEON intrinsic puts accumulator first, unlike the LLVM fma.
2881     return Builder.CreateCall(F, {Ops[1], Ops[2], Ops[0]});
2882   }
2883   case NEON::BI__builtin_neon_vld1_v:
2884   case NEON::BI__builtin_neon_vld1q_v:
2885     Ops.push_back(getAlignmentValue32(PtrOp0));
2886     return EmitNeonCall(CGM.getIntrinsic(LLVMIntrinsic, Ty), Ops, "vld1");
2887   case NEON::BI__builtin_neon_vld2_v:
2888   case NEON::BI__builtin_neon_vld2q_v:
2889   case NEON::BI__builtin_neon_vld3_v:
2890   case NEON::BI__builtin_neon_vld3q_v:
2891   case NEON::BI__builtin_neon_vld4_v:
2892   case NEON::BI__builtin_neon_vld4q_v: {
2893     Function *F = CGM.getIntrinsic(LLVMIntrinsic, Ty);
2894     Value *Align = getAlignmentValue32(PtrOp1);
2895     Ops[1] = Builder.CreateCall(F, {Ops[1], Align}, NameHint);
2896     Ty = llvm::PointerType::getUnqual(Ops[1]->getType());
2897     Ops[0] = Builder.CreateBitCast(Ops[0], Ty);
2898     return Builder.CreateDefaultAlignedStore(Ops[1], Ops[0]);
2899   }
2900   case NEON::BI__builtin_neon_vld1_dup_v:
2901   case NEON::BI__builtin_neon_vld1q_dup_v: {
2902     Value *V = UndefValue::get(Ty);
2903     Ty = llvm::PointerType::getUnqual(VTy->getElementType());
2904     PtrOp0 = Builder.CreateBitCast(PtrOp0, Ty);
2905     LoadInst *Ld = Builder.CreateLoad(PtrOp0);
2906     llvm::Constant *CI = ConstantInt::get(SizeTy, 0);
2907     Ops[0] = Builder.CreateInsertElement(V, Ld, CI);
2908     return EmitNeonSplat(Ops[0], CI);
2909   }
2910   case NEON::BI__builtin_neon_vld2_lane_v:
2911   case NEON::BI__builtin_neon_vld2q_lane_v:
2912   case NEON::BI__builtin_neon_vld3_lane_v:
2913   case NEON::BI__builtin_neon_vld3q_lane_v:
2914   case NEON::BI__builtin_neon_vld4_lane_v:
2915   case NEON::BI__builtin_neon_vld4q_lane_v: {
2916     Function *F = CGM.getIntrinsic(LLVMIntrinsic, Ty);
2917     for (unsigned I = 2; I < Ops.size() - 1; ++I)
2918       Ops[I] = Builder.CreateBitCast(Ops[I], Ty);
2919     Ops.push_back(getAlignmentValue32(PtrOp1));
2920     Ops[1] = Builder.CreateCall(F, makeArrayRef(Ops).slice(1), NameHint);
2921     Ty = llvm::PointerType::getUnqual(Ops[1]->getType());
2922     Ops[0] = Builder.CreateBitCast(Ops[0], Ty);
2923     return Builder.CreateDefaultAlignedStore(Ops[1], Ops[0]);
2924   }
2925   case NEON::BI__builtin_neon_vmovl_v: {
2926     llvm::Type *DTy =llvm::VectorType::getTruncatedElementVectorType(VTy);
2927     Ops[0] = Builder.CreateBitCast(Ops[0], DTy);
2928     if (Usgn)
2929       return Builder.CreateZExt(Ops[0], Ty, "vmovl");
2930     return Builder.CreateSExt(Ops[0], Ty, "vmovl");
2931   }
2932   case NEON::BI__builtin_neon_vmovn_v: {
2933     llvm::Type *QTy = llvm::VectorType::getExtendedElementVectorType(VTy);
2934     Ops[0] = Builder.CreateBitCast(Ops[0], QTy);
2935     return Builder.CreateTrunc(Ops[0], Ty, "vmovn");
2936   }
2937   case NEON::BI__builtin_neon_vmull_v:
2938     // FIXME: the integer vmull operations could be emitted in terms of pure
2939     // LLVM IR (2 exts followed by a mul). Unfortunately LLVM has a habit of
2940     // hoisting the exts outside loops. Until global ISel comes along that can
2941     // see through such movement this leads to bad CodeGen. So we need an
2942     // intrinsic for now.
2943     Int = Usgn ? Intrinsic::arm_neon_vmullu : Intrinsic::arm_neon_vmulls;
2944     Int = Type.isPoly() ? (unsigned)Intrinsic::arm_neon_vmullp : Int;
2945     return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vmull");
2946   case NEON::BI__builtin_neon_vpadal_v:
2947   case NEON::BI__builtin_neon_vpadalq_v: {
2948     // The source operand type has twice as many elements of half the size.
2949     unsigned EltBits = VTy->getElementType()->getPrimitiveSizeInBits();
2950     llvm::Type *EltTy =
2951       llvm::IntegerType::get(getLLVMContext(), EltBits / 2);
2952     llvm::Type *NarrowTy =
2953       llvm::VectorType::get(EltTy, VTy->getNumElements() * 2);
2954     llvm::Type *Tys[2] = { Ty, NarrowTy };
2955     return EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, NameHint);
2956   }
2957   case NEON::BI__builtin_neon_vpaddl_v:
2958   case NEON::BI__builtin_neon_vpaddlq_v: {
2959     // The source operand type has twice as many elements of half the size.
2960     unsigned EltBits = VTy->getElementType()->getPrimitiveSizeInBits();
2961     llvm::Type *EltTy = llvm::IntegerType::get(getLLVMContext(), EltBits / 2);
2962     llvm::Type *NarrowTy =
2963       llvm::VectorType::get(EltTy, VTy->getNumElements() * 2);
2964     llvm::Type *Tys[2] = { Ty, NarrowTy };
2965     return EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vpaddl");
2966   }
2967   case NEON::BI__builtin_neon_vqdmlal_v:
2968   case NEON::BI__builtin_neon_vqdmlsl_v: {
2969     SmallVector<Value *, 2> MulOps(Ops.begin() + 1, Ops.end());
2970     Ops[1] =
2971         EmitNeonCall(CGM.getIntrinsic(LLVMIntrinsic, Ty), MulOps, "vqdmlal");
2972     Ops.resize(2);
2973     return EmitNeonCall(CGM.getIntrinsic(AltLLVMIntrinsic, Ty), Ops, NameHint);
2974   }
2975   case NEON::BI__builtin_neon_vqshl_n_v:
2976   case NEON::BI__builtin_neon_vqshlq_n_v:
2977     return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vqshl_n",
2978                         1, false);
2979   case NEON::BI__builtin_neon_vqshlu_n_v:
2980   case NEON::BI__builtin_neon_vqshluq_n_v:
2981     return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vqshlu_n",
2982                         1, false);
2983   case NEON::BI__builtin_neon_vrecpe_v:
2984   case NEON::BI__builtin_neon_vrecpeq_v:
2985   case NEON::BI__builtin_neon_vrsqrte_v:
2986   case NEON::BI__builtin_neon_vrsqrteq_v:
2987     Int = Ty->isFPOrFPVectorTy() ? LLVMIntrinsic : AltLLVMIntrinsic;
2988     return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, NameHint);
2989 
2990   case NEON::BI__builtin_neon_vrshr_n_v:
2991   case NEON::BI__builtin_neon_vrshrq_n_v:
2992     return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vrshr_n",
2993                         1, true);
2994   case NEON::BI__builtin_neon_vshl_n_v:
2995   case NEON::BI__builtin_neon_vshlq_n_v:
2996     Ops[1] = EmitNeonShiftVector(Ops[1], Ty, false);
2997     return Builder.CreateShl(Builder.CreateBitCast(Ops[0],Ty), Ops[1],
2998                              "vshl_n");
2999   case NEON::BI__builtin_neon_vshll_n_v: {
3000     llvm::Type *SrcTy = llvm::VectorType::getTruncatedElementVectorType(VTy);
3001     Ops[0] = Builder.CreateBitCast(Ops[0], SrcTy);
3002     if (Usgn)
3003       Ops[0] = Builder.CreateZExt(Ops[0], VTy);
3004     else
3005       Ops[0] = Builder.CreateSExt(Ops[0], VTy);
3006     Ops[1] = EmitNeonShiftVector(Ops[1], VTy, false);
3007     return Builder.CreateShl(Ops[0], Ops[1], "vshll_n");
3008   }
3009   case NEON::BI__builtin_neon_vshrn_n_v: {
3010     llvm::Type *SrcTy = llvm::VectorType::getExtendedElementVectorType(VTy);
3011     Ops[0] = Builder.CreateBitCast(Ops[0], SrcTy);
3012     Ops[1] = EmitNeonShiftVector(Ops[1], SrcTy, false);
3013     if (Usgn)
3014       Ops[0] = Builder.CreateLShr(Ops[0], Ops[1]);
3015     else
3016       Ops[0] = Builder.CreateAShr(Ops[0], Ops[1]);
3017     return Builder.CreateTrunc(Ops[0], Ty, "vshrn_n");
3018   }
3019   case NEON::BI__builtin_neon_vshr_n_v:
3020   case NEON::BI__builtin_neon_vshrq_n_v:
3021     return EmitNeonRShiftImm(Ops[0], Ops[1], Ty, Usgn, "vshr_n");
3022   case NEON::BI__builtin_neon_vst1_v:
3023   case NEON::BI__builtin_neon_vst1q_v:
3024   case NEON::BI__builtin_neon_vst2_v:
3025   case NEON::BI__builtin_neon_vst2q_v:
3026   case NEON::BI__builtin_neon_vst3_v:
3027   case NEON::BI__builtin_neon_vst3q_v:
3028   case NEON::BI__builtin_neon_vst4_v:
3029   case NEON::BI__builtin_neon_vst4q_v:
3030   case NEON::BI__builtin_neon_vst2_lane_v:
3031   case NEON::BI__builtin_neon_vst2q_lane_v:
3032   case NEON::BI__builtin_neon_vst3_lane_v:
3033   case NEON::BI__builtin_neon_vst3q_lane_v:
3034   case NEON::BI__builtin_neon_vst4_lane_v:
3035   case NEON::BI__builtin_neon_vst4q_lane_v:
3036     Ops.push_back(getAlignmentValue32(PtrOp0));
3037     return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "");
3038   case NEON::BI__builtin_neon_vsubhn_v: {
3039     llvm::VectorType *SrcTy =
3040         llvm::VectorType::getExtendedElementVectorType(VTy);
3041 
3042     // %sum = add <4 x i32> %lhs, %rhs
3043     Ops[0] = Builder.CreateBitCast(Ops[0], SrcTy);
3044     Ops[1] = Builder.CreateBitCast(Ops[1], SrcTy);
3045     Ops[0] = Builder.CreateSub(Ops[0], Ops[1], "vsubhn");
3046 
3047     // %high = lshr <4 x i32> %sum, <i32 16, i32 16, i32 16, i32 16>
3048     Constant *ShiftAmt =
3049         ConstantInt::get(SrcTy, SrcTy->getScalarSizeInBits() / 2);
3050     Ops[0] = Builder.CreateLShr(Ops[0], ShiftAmt, "vsubhn");
3051 
3052     // %res = trunc <4 x i32> %high to <4 x i16>
3053     return Builder.CreateTrunc(Ops[0], VTy, "vsubhn");
3054   }
3055   case NEON::BI__builtin_neon_vtrn_v:
3056   case NEON::BI__builtin_neon_vtrnq_v: {
3057     Ops[0] = Builder.CreateBitCast(Ops[0], llvm::PointerType::getUnqual(Ty));
3058     Ops[1] = Builder.CreateBitCast(Ops[1], Ty);
3059     Ops[2] = Builder.CreateBitCast(Ops[2], Ty);
3060     Value *SV = nullptr;
3061 
3062     for (unsigned vi = 0; vi != 2; ++vi) {
3063       SmallVector<Constant*, 16> Indices;
3064       for (unsigned i = 0, e = VTy->getNumElements(); i != e; i += 2) {
3065         Indices.push_back(Builder.getInt32(i+vi));
3066         Indices.push_back(Builder.getInt32(i+e+vi));
3067       }
3068       Value *Addr = Builder.CreateConstInBoundsGEP1_32(Ty, Ops[0], vi);
3069       SV = llvm::ConstantVector::get(Indices);
3070       SV = Builder.CreateShuffleVector(Ops[1], Ops[2], SV, "vtrn");
3071       SV = Builder.CreateDefaultAlignedStore(SV, Addr);
3072     }
3073     return SV;
3074   }
3075   case NEON::BI__builtin_neon_vtst_v:
3076   case NEON::BI__builtin_neon_vtstq_v: {
3077     Ops[0] = Builder.CreateBitCast(Ops[0], Ty);
3078     Ops[1] = Builder.CreateBitCast(Ops[1], Ty);
3079     Ops[0] = Builder.CreateAnd(Ops[0], Ops[1]);
3080     Ops[0] = Builder.CreateICmp(ICmpInst::ICMP_NE, Ops[0],
3081                                 ConstantAggregateZero::get(Ty));
3082     return Builder.CreateSExt(Ops[0], Ty, "vtst");
3083   }
3084   case NEON::BI__builtin_neon_vuzp_v:
3085   case NEON::BI__builtin_neon_vuzpq_v: {
3086     Ops[0] = Builder.CreateBitCast(Ops[0], llvm::PointerType::getUnqual(Ty));
3087     Ops[1] = Builder.CreateBitCast(Ops[1], Ty);
3088     Ops[2] = Builder.CreateBitCast(Ops[2], Ty);
3089     Value *SV = nullptr;
3090 
3091     for (unsigned vi = 0; vi != 2; ++vi) {
3092       SmallVector<Constant*, 16> Indices;
3093       for (unsigned i = 0, e = VTy->getNumElements(); i != e; ++i)
3094         Indices.push_back(ConstantInt::get(Int32Ty, 2*i+vi));
3095 
3096       Value *Addr = Builder.CreateConstInBoundsGEP1_32(Ty, Ops[0], vi);
3097       SV = llvm::ConstantVector::get(Indices);
3098       SV = Builder.CreateShuffleVector(Ops[1], Ops[2], SV, "vuzp");
3099       SV = Builder.CreateDefaultAlignedStore(SV, Addr);
3100     }
3101     return SV;
3102   }
3103   case NEON::BI__builtin_neon_vzip_v:
3104   case NEON::BI__builtin_neon_vzipq_v: {
3105     Ops[0] = Builder.CreateBitCast(Ops[0], llvm::PointerType::getUnqual(Ty));
3106     Ops[1] = Builder.CreateBitCast(Ops[1], Ty);
3107     Ops[2] = Builder.CreateBitCast(Ops[2], Ty);
3108     Value *SV = nullptr;
3109 
3110     for (unsigned vi = 0; vi != 2; ++vi) {
3111       SmallVector<Constant*, 16> Indices;
3112       for (unsigned i = 0, e = VTy->getNumElements(); i != e; i += 2) {
3113         Indices.push_back(ConstantInt::get(Int32Ty, (i + vi*e) >> 1));
3114         Indices.push_back(ConstantInt::get(Int32Ty, ((i + vi*e) >> 1)+e));
3115       }
3116       Value *Addr = Builder.CreateConstInBoundsGEP1_32(Ty, Ops[0], vi);
3117       SV = llvm::ConstantVector::get(Indices);
3118       SV = Builder.CreateShuffleVector(Ops[1], Ops[2], SV, "vzip");
3119       SV = Builder.CreateDefaultAlignedStore(SV, Addr);
3120     }
3121     return SV;
3122   }
3123   }
3124 
3125   assert(Int && "Expected valid intrinsic number");
3126 
3127   // Determine the type(s) of this overloaded AArch64 intrinsic.
3128   Function *F = LookupNeonLLVMIntrinsic(Int, Modifier, Ty, E);
3129 
3130   Value *Result = EmitNeonCall(F, Ops, NameHint);
3131   llvm::Type *ResultType = ConvertType(E->getType());
3132   // AArch64 intrinsic one-element vector type cast to
3133   // scalar type expected by the builtin
3134   return Builder.CreateBitCast(Result, ResultType, NameHint);
3135 }
3136 
3137 Value *CodeGenFunction::EmitAArch64CompareBuiltinExpr(
3138     Value *Op, llvm::Type *Ty, const CmpInst::Predicate Fp,
3139     const CmpInst::Predicate Ip, const Twine &Name) {
3140   llvm::Type *OTy = Op->getType();
3141 
3142   // FIXME: this is utterly horrific. We should not be looking at previous
3143   // codegen context to find out what needs doing. Unfortunately TableGen
3144   // currently gives us exactly the same calls for vceqz_f32 and vceqz_s32
3145   // (etc).
3146   if (BitCastInst *BI = dyn_cast<BitCastInst>(Op))
3147     OTy = BI->getOperand(0)->getType();
3148 
3149   Op = Builder.CreateBitCast(Op, OTy);
3150   if (OTy->getScalarType()->isFloatingPointTy()) {
3151     Op = Builder.CreateFCmp(Fp, Op, Constant::getNullValue(OTy));
3152   } else {
3153     Op = Builder.CreateICmp(Ip, Op, Constant::getNullValue(OTy));
3154   }
3155   return Builder.CreateSExt(Op, Ty, Name);
3156 }
3157 
3158 static Value *packTBLDVectorList(CodeGenFunction &CGF, ArrayRef<Value *> Ops,
3159                                  Value *ExtOp, Value *IndexOp,
3160                                  llvm::Type *ResTy, unsigned IntID,
3161                                  const char *Name) {
3162   SmallVector<Value *, 2> TblOps;
3163   if (ExtOp)
3164     TblOps.push_back(ExtOp);
3165 
3166   // Build a vector containing sequential number like (0, 1, 2, ..., 15)
3167   SmallVector<Constant*, 16> Indices;
3168   llvm::VectorType *TblTy = cast<llvm::VectorType>(Ops[0]->getType());
3169   for (unsigned i = 0, e = TblTy->getNumElements(); i != e; ++i) {
3170     Indices.push_back(ConstantInt::get(CGF.Int32Ty, 2*i));
3171     Indices.push_back(ConstantInt::get(CGF.Int32Ty, 2*i+1));
3172   }
3173   Value *SV = llvm::ConstantVector::get(Indices);
3174 
3175   int PairPos = 0, End = Ops.size() - 1;
3176   while (PairPos < End) {
3177     TblOps.push_back(CGF.Builder.CreateShuffleVector(Ops[PairPos],
3178                                                      Ops[PairPos+1], SV, Name));
3179     PairPos += 2;
3180   }
3181 
3182   // If there's an odd number of 64-bit lookup table, fill the high 64-bit
3183   // of the 128-bit lookup table with zero.
3184   if (PairPos == End) {
3185     Value *ZeroTbl = ConstantAggregateZero::get(TblTy);
3186     TblOps.push_back(CGF.Builder.CreateShuffleVector(Ops[PairPos],
3187                                                      ZeroTbl, SV, Name));
3188   }
3189 
3190   Function *TblF;
3191   TblOps.push_back(IndexOp);
3192   TblF = CGF.CGM.getIntrinsic(IntID, ResTy);
3193 
3194   return CGF.EmitNeonCall(TblF, TblOps, Name);
3195 }
3196 
3197 Value *CodeGenFunction::GetValueForARMHint(unsigned BuiltinID) {
3198   unsigned Value;
3199   switch (BuiltinID) {
3200   default:
3201     return nullptr;
3202   case ARM::BI__builtin_arm_nop:
3203     Value = 0;
3204     break;
3205   case ARM::BI__builtin_arm_yield:
3206   case ARM::BI__yield:
3207     Value = 1;
3208     break;
3209   case ARM::BI__builtin_arm_wfe:
3210   case ARM::BI__wfe:
3211     Value = 2;
3212     break;
3213   case ARM::BI__builtin_arm_wfi:
3214   case ARM::BI__wfi:
3215     Value = 3;
3216     break;
3217   case ARM::BI__builtin_arm_sev:
3218   case ARM::BI__sev:
3219     Value = 4;
3220     break;
3221   case ARM::BI__builtin_arm_sevl:
3222   case ARM::BI__sevl:
3223     Value = 5;
3224     break;
3225   }
3226 
3227   return Builder.CreateCall(CGM.getIntrinsic(Intrinsic::arm_hint),
3228                             llvm::ConstantInt::get(Int32Ty, Value));
3229 }
3230 
3231 // Generates the IR for the read/write special register builtin,
3232 // ValueType is the type of the value that is to be written or read,
3233 // RegisterType is the type of the register being written to or read from.
3234 static Value *EmitSpecialRegisterBuiltin(CodeGenFunction &CGF,
3235                                          const CallExpr *E,
3236                                          llvm::Type *RegisterType,
3237                                          llvm::Type *ValueType, bool IsRead) {
3238   // write and register intrinsics only support 32 and 64 bit operations.
3239   assert((RegisterType->isIntegerTy(32) || RegisterType->isIntegerTy(64))
3240           && "Unsupported size for register.");
3241 
3242   CodeGen::CGBuilderTy &Builder = CGF.Builder;
3243   CodeGen::CodeGenModule &CGM = CGF.CGM;
3244   LLVMContext &Context = CGM.getLLVMContext();
3245 
3246   const Expr *SysRegStrExpr = E->getArg(0)->IgnoreParenCasts();
3247   StringRef SysReg = cast<StringLiteral>(SysRegStrExpr)->getString();
3248 
3249   llvm::Metadata *Ops[] = { llvm::MDString::get(Context, SysReg) };
3250   llvm::MDNode *RegName = llvm::MDNode::get(Context, Ops);
3251   llvm::Value *Metadata = llvm::MetadataAsValue::get(Context, RegName);
3252 
3253   llvm::Type *Types[] = { RegisterType };
3254 
3255   bool MixedTypes = RegisterType->isIntegerTy(64) && ValueType->isIntegerTy(32);
3256   assert(!(RegisterType->isIntegerTy(32) && ValueType->isIntegerTy(64))
3257             && "Can't fit 64-bit value in 32-bit register");
3258 
3259   if (IsRead) {
3260     llvm::Value *F = CGM.getIntrinsic(llvm::Intrinsic::read_register, Types);
3261     llvm::Value *Call = Builder.CreateCall(F, Metadata);
3262 
3263     if (MixedTypes)
3264       // Read into 64 bit register and then truncate result to 32 bit.
3265       return Builder.CreateTrunc(Call, ValueType);
3266 
3267     if (ValueType->isPointerTy())
3268       // Have i32/i64 result (Call) but want to return a VoidPtrTy (i8*).
3269       return Builder.CreateIntToPtr(Call, ValueType);
3270 
3271     return Call;
3272   }
3273 
3274   llvm::Value *F = CGM.getIntrinsic(llvm::Intrinsic::write_register, Types);
3275   llvm::Value *ArgValue = CGF.EmitScalarExpr(E->getArg(1));
3276   if (MixedTypes) {
3277     // Extend 32 bit write value to 64 bit to pass to write.
3278     ArgValue = Builder.CreateZExt(ArgValue, RegisterType);
3279     return Builder.CreateCall(F, { Metadata, ArgValue });
3280   }
3281 
3282   if (ValueType->isPointerTy()) {
3283     // Have VoidPtrTy ArgValue but want to return an i32/i64.
3284     ArgValue = Builder.CreatePtrToInt(ArgValue, RegisterType);
3285     return Builder.CreateCall(F, { Metadata, ArgValue });
3286   }
3287 
3288   return Builder.CreateCall(F, { Metadata, ArgValue });
3289 }
3290 
3291 /// Return true if BuiltinID is an overloaded Neon intrinsic with an extra
3292 /// argument that specifies the vector type.
3293 static bool HasExtraNeonArgument(unsigned BuiltinID) {
3294   switch (BuiltinID) {
3295   default: break;
3296   case NEON::BI__builtin_neon_vget_lane_i8:
3297   case NEON::BI__builtin_neon_vget_lane_i16:
3298   case NEON::BI__builtin_neon_vget_lane_i32:
3299   case NEON::BI__builtin_neon_vget_lane_i64:
3300   case NEON::BI__builtin_neon_vget_lane_f32:
3301   case NEON::BI__builtin_neon_vgetq_lane_i8:
3302   case NEON::BI__builtin_neon_vgetq_lane_i16:
3303   case NEON::BI__builtin_neon_vgetq_lane_i32:
3304   case NEON::BI__builtin_neon_vgetq_lane_i64:
3305   case NEON::BI__builtin_neon_vgetq_lane_f32:
3306   case NEON::BI__builtin_neon_vset_lane_i8:
3307   case NEON::BI__builtin_neon_vset_lane_i16:
3308   case NEON::BI__builtin_neon_vset_lane_i32:
3309   case NEON::BI__builtin_neon_vset_lane_i64:
3310   case NEON::BI__builtin_neon_vset_lane_f32:
3311   case NEON::BI__builtin_neon_vsetq_lane_i8:
3312   case NEON::BI__builtin_neon_vsetq_lane_i16:
3313   case NEON::BI__builtin_neon_vsetq_lane_i32:
3314   case NEON::BI__builtin_neon_vsetq_lane_i64:
3315   case NEON::BI__builtin_neon_vsetq_lane_f32:
3316   case NEON::BI__builtin_neon_vsha1h_u32:
3317   case NEON::BI__builtin_neon_vsha1cq_u32:
3318   case NEON::BI__builtin_neon_vsha1pq_u32:
3319   case NEON::BI__builtin_neon_vsha1mq_u32:
3320   case ARM::BI_MoveToCoprocessor:
3321   case ARM::BI_MoveToCoprocessor2:
3322     return false;
3323   }
3324   return true;
3325 }
3326 
3327 Value *CodeGenFunction::EmitARMBuiltinExpr(unsigned BuiltinID,
3328                                            const CallExpr *E) {
3329   if (auto Hint = GetValueForARMHint(BuiltinID))
3330     return Hint;
3331 
3332   if (BuiltinID == ARM::BI__emit) {
3333     bool IsThumb = getTarget().getTriple().getArch() == llvm::Triple::thumb;
3334     llvm::FunctionType *FTy =
3335         llvm::FunctionType::get(VoidTy, /*Variadic=*/false);
3336 
3337     APSInt Value;
3338     if (!E->getArg(0)->EvaluateAsInt(Value, CGM.getContext()))
3339       llvm_unreachable("Sema will ensure that the parameter is constant");
3340 
3341     uint64_t ZExtValue = Value.zextOrTrunc(IsThumb ? 16 : 32).getZExtValue();
3342 
3343     llvm::InlineAsm *Emit =
3344         IsThumb ? InlineAsm::get(FTy, ".inst.n 0x" + utohexstr(ZExtValue), "",
3345                                  /*SideEffects=*/true)
3346                 : InlineAsm::get(FTy, ".inst 0x" + utohexstr(ZExtValue), "",
3347                                  /*SideEffects=*/true);
3348 
3349     return Builder.CreateCall(Emit);
3350   }
3351 
3352   if (BuiltinID == ARM::BI__builtin_arm_dbg) {
3353     Value *Option = EmitScalarExpr(E->getArg(0));
3354     return Builder.CreateCall(CGM.getIntrinsic(Intrinsic::arm_dbg), Option);
3355   }
3356 
3357   if (BuiltinID == ARM::BI__builtin_arm_prefetch) {
3358     Value *Address = EmitScalarExpr(E->getArg(0));
3359     Value *RW      = EmitScalarExpr(E->getArg(1));
3360     Value *IsData  = EmitScalarExpr(E->getArg(2));
3361 
3362     // Locality is not supported on ARM target
3363     Value *Locality = llvm::ConstantInt::get(Int32Ty, 3);
3364 
3365     Value *F = CGM.getIntrinsic(Intrinsic::prefetch);
3366     return Builder.CreateCall(F, {Address, RW, Locality, IsData});
3367   }
3368 
3369   if (BuiltinID == ARM::BI__builtin_arm_rbit) {
3370     return Builder.CreateCall(CGM.getIntrinsic(Intrinsic::arm_rbit),
3371                                                EmitScalarExpr(E->getArg(0)),
3372                               "rbit");
3373   }
3374 
3375   if (BuiltinID == ARM::BI__clear_cache) {
3376     assert(E->getNumArgs() == 2 && "__clear_cache takes 2 arguments");
3377     const FunctionDecl *FD = E->getDirectCallee();
3378     Value *Ops[2];
3379     for (unsigned i = 0; i < 2; i++)
3380       Ops[i] = EmitScalarExpr(E->getArg(i));
3381     llvm::Type *Ty = CGM.getTypes().ConvertType(FD->getType());
3382     llvm::FunctionType *FTy = cast<llvm::FunctionType>(Ty);
3383     StringRef Name = FD->getName();
3384     return EmitNounwindRuntimeCall(CGM.CreateRuntimeFunction(FTy, Name), Ops);
3385   }
3386 
3387   if (BuiltinID == ARM::BI__builtin_arm_ldrexd ||
3388       ((BuiltinID == ARM::BI__builtin_arm_ldrex ||
3389         BuiltinID == ARM::BI__builtin_arm_ldaex) &&
3390        getContext().getTypeSize(E->getType()) == 64) ||
3391       BuiltinID == ARM::BI__ldrexd) {
3392     Function *F;
3393 
3394     switch (BuiltinID) {
3395     default: llvm_unreachable("unexpected builtin");
3396     case ARM::BI__builtin_arm_ldaex:
3397       F = CGM.getIntrinsic(Intrinsic::arm_ldaexd);
3398       break;
3399     case ARM::BI__builtin_arm_ldrexd:
3400     case ARM::BI__builtin_arm_ldrex:
3401     case ARM::BI__ldrexd:
3402       F = CGM.getIntrinsic(Intrinsic::arm_ldrexd);
3403       break;
3404     }
3405 
3406     Value *LdPtr = EmitScalarExpr(E->getArg(0));
3407     Value *Val = Builder.CreateCall(F, Builder.CreateBitCast(LdPtr, Int8PtrTy),
3408                                     "ldrexd");
3409 
3410     Value *Val0 = Builder.CreateExtractValue(Val, 1);
3411     Value *Val1 = Builder.CreateExtractValue(Val, 0);
3412     Val0 = Builder.CreateZExt(Val0, Int64Ty);
3413     Val1 = Builder.CreateZExt(Val1, Int64Ty);
3414 
3415     Value *ShiftCst = llvm::ConstantInt::get(Int64Ty, 32);
3416     Val = Builder.CreateShl(Val0, ShiftCst, "shl", true /* nuw */);
3417     Val = Builder.CreateOr(Val, Val1);
3418     return Builder.CreateBitCast(Val, ConvertType(E->getType()));
3419   }
3420 
3421   if (BuiltinID == ARM::BI__builtin_arm_ldrex ||
3422       BuiltinID == ARM::BI__builtin_arm_ldaex) {
3423     Value *LoadAddr = EmitScalarExpr(E->getArg(0));
3424 
3425     QualType Ty = E->getType();
3426     llvm::Type *RealResTy = ConvertType(Ty);
3427     llvm::Type *IntResTy = llvm::IntegerType::get(getLLVMContext(),
3428                                                   getContext().getTypeSize(Ty));
3429     LoadAddr = Builder.CreateBitCast(LoadAddr, IntResTy->getPointerTo());
3430 
3431     Function *F = CGM.getIntrinsic(BuiltinID == ARM::BI__builtin_arm_ldaex
3432                                        ? Intrinsic::arm_ldaex
3433                                        : Intrinsic::arm_ldrex,
3434                                    LoadAddr->getType());
3435     Value *Val = Builder.CreateCall(F, LoadAddr, "ldrex");
3436 
3437     if (RealResTy->isPointerTy())
3438       return Builder.CreateIntToPtr(Val, RealResTy);
3439     else {
3440       Val = Builder.CreateTruncOrBitCast(Val, IntResTy);
3441       return Builder.CreateBitCast(Val, RealResTy);
3442     }
3443   }
3444 
3445   if (BuiltinID == ARM::BI__builtin_arm_strexd ||
3446       ((BuiltinID == ARM::BI__builtin_arm_stlex ||
3447         BuiltinID == ARM::BI__builtin_arm_strex) &&
3448        getContext().getTypeSize(E->getArg(0)->getType()) == 64)) {
3449     Function *F = CGM.getIntrinsic(BuiltinID == ARM::BI__builtin_arm_stlex
3450                                        ? Intrinsic::arm_stlexd
3451                                        : Intrinsic::arm_strexd);
3452     llvm::Type *STy = llvm::StructType::get(Int32Ty, Int32Ty, nullptr);
3453 
3454     Address Tmp = CreateMemTemp(E->getArg(0)->getType());
3455     Value *Val = EmitScalarExpr(E->getArg(0));
3456     Builder.CreateStore(Val, Tmp);
3457 
3458     Address LdPtr = Builder.CreateBitCast(Tmp,llvm::PointerType::getUnqual(STy));
3459     Val = Builder.CreateLoad(LdPtr);
3460 
3461     Value *Arg0 = Builder.CreateExtractValue(Val, 0);
3462     Value *Arg1 = Builder.CreateExtractValue(Val, 1);
3463     Value *StPtr = Builder.CreateBitCast(EmitScalarExpr(E->getArg(1)), Int8PtrTy);
3464     return Builder.CreateCall(F, {Arg0, Arg1, StPtr}, "strexd");
3465   }
3466 
3467   if (BuiltinID == ARM::BI__builtin_arm_strex ||
3468       BuiltinID == ARM::BI__builtin_arm_stlex) {
3469     Value *StoreVal = EmitScalarExpr(E->getArg(0));
3470     Value *StoreAddr = EmitScalarExpr(E->getArg(1));
3471 
3472     QualType Ty = E->getArg(0)->getType();
3473     llvm::Type *StoreTy = llvm::IntegerType::get(getLLVMContext(),
3474                                                  getContext().getTypeSize(Ty));
3475     StoreAddr = Builder.CreateBitCast(StoreAddr, StoreTy->getPointerTo());
3476 
3477     if (StoreVal->getType()->isPointerTy())
3478       StoreVal = Builder.CreatePtrToInt(StoreVal, Int32Ty);
3479     else {
3480       StoreVal = Builder.CreateBitCast(StoreVal, StoreTy);
3481       StoreVal = Builder.CreateZExtOrBitCast(StoreVal, Int32Ty);
3482     }
3483 
3484     Function *F = CGM.getIntrinsic(BuiltinID == ARM::BI__builtin_arm_stlex
3485                                        ? Intrinsic::arm_stlex
3486                                        : Intrinsic::arm_strex,
3487                                    StoreAddr->getType());
3488     return Builder.CreateCall(F, {StoreVal, StoreAddr}, "strex");
3489   }
3490 
3491   if (BuiltinID == ARM::BI__builtin_arm_clrex) {
3492     Function *F = CGM.getIntrinsic(Intrinsic::arm_clrex);
3493     return Builder.CreateCall(F);
3494   }
3495 
3496   // CRC32
3497   Intrinsic::ID CRCIntrinsicID = Intrinsic::not_intrinsic;
3498   switch (BuiltinID) {
3499   case ARM::BI__builtin_arm_crc32b:
3500     CRCIntrinsicID = Intrinsic::arm_crc32b; break;
3501   case ARM::BI__builtin_arm_crc32cb:
3502     CRCIntrinsicID = Intrinsic::arm_crc32cb; break;
3503   case ARM::BI__builtin_arm_crc32h:
3504     CRCIntrinsicID = Intrinsic::arm_crc32h; break;
3505   case ARM::BI__builtin_arm_crc32ch:
3506     CRCIntrinsicID = Intrinsic::arm_crc32ch; break;
3507   case ARM::BI__builtin_arm_crc32w:
3508   case ARM::BI__builtin_arm_crc32d:
3509     CRCIntrinsicID = Intrinsic::arm_crc32w; break;
3510   case ARM::BI__builtin_arm_crc32cw:
3511   case ARM::BI__builtin_arm_crc32cd:
3512     CRCIntrinsicID = Intrinsic::arm_crc32cw; break;
3513   }
3514 
3515   if (CRCIntrinsicID != Intrinsic::not_intrinsic) {
3516     Value *Arg0 = EmitScalarExpr(E->getArg(0));
3517     Value *Arg1 = EmitScalarExpr(E->getArg(1));
3518 
3519     // crc32{c,}d intrinsics are implemnted as two calls to crc32{c,}w
3520     // intrinsics, hence we need different codegen for these cases.
3521     if (BuiltinID == ARM::BI__builtin_arm_crc32d ||
3522         BuiltinID == ARM::BI__builtin_arm_crc32cd) {
3523       Value *C1 = llvm::ConstantInt::get(Int64Ty, 32);
3524       Value *Arg1a = Builder.CreateTruncOrBitCast(Arg1, Int32Ty);
3525       Value *Arg1b = Builder.CreateLShr(Arg1, C1);
3526       Arg1b = Builder.CreateTruncOrBitCast(Arg1b, Int32Ty);
3527 
3528       Function *F = CGM.getIntrinsic(CRCIntrinsicID);
3529       Value *Res = Builder.CreateCall(F, {Arg0, Arg1a});
3530       return Builder.CreateCall(F, {Res, Arg1b});
3531     } else {
3532       Arg1 = Builder.CreateZExtOrBitCast(Arg1, Int32Ty);
3533 
3534       Function *F = CGM.getIntrinsic(CRCIntrinsicID);
3535       return Builder.CreateCall(F, {Arg0, Arg1});
3536     }
3537   }
3538 
3539   if (BuiltinID == ARM::BI__builtin_arm_rsr ||
3540       BuiltinID == ARM::BI__builtin_arm_rsr64 ||
3541       BuiltinID == ARM::BI__builtin_arm_rsrp ||
3542       BuiltinID == ARM::BI__builtin_arm_wsr ||
3543       BuiltinID == ARM::BI__builtin_arm_wsr64 ||
3544       BuiltinID == ARM::BI__builtin_arm_wsrp) {
3545 
3546     bool IsRead = BuiltinID == ARM::BI__builtin_arm_rsr ||
3547                   BuiltinID == ARM::BI__builtin_arm_rsr64 ||
3548                   BuiltinID == ARM::BI__builtin_arm_rsrp;
3549 
3550     bool IsPointerBuiltin = BuiltinID == ARM::BI__builtin_arm_rsrp ||
3551                             BuiltinID == ARM::BI__builtin_arm_wsrp;
3552 
3553     bool Is64Bit = BuiltinID == ARM::BI__builtin_arm_rsr64 ||
3554                    BuiltinID == ARM::BI__builtin_arm_wsr64;
3555 
3556     llvm::Type *ValueType;
3557     llvm::Type *RegisterType;
3558     if (IsPointerBuiltin) {
3559       ValueType = VoidPtrTy;
3560       RegisterType = Int32Ty;
3561     } else if (Is64Bit) {
3562       ValueType = RegisterType = Int64Ty;
3563     } else {
3564       ValueType = RegisterType = Int32Ty;
3565     }
3566 
3567     return EmitSpecialRegisterBuiltin(*this, E, RegisterType, ValueType, IsRead);
3568   }
3569 
3570   // Find out if any arguments are required to be integer constant
3571   // expressions.
3572   unsigned ICEArguments = 0;
3573   ASTContext::GetBuiltinTypeError Error;
3574   getContext().GetBuiltinType(BuiltinID, Error, &ICEArguments);
3575   assert(Error == ASTContext::GE_None && "Should not codegen an error");
3576 
3577   auto getAlignmentValue32 = [&](Address addr) -> Value* {
3578     return Builder.getInt32(addr.getAlignment().getQuantity());
3579   };
3580 
3581   Address PtrOp0 = Address::invalid();
3582   Address PtrOp1 = Address::invalid();
3583   SmallVector<Value*, 4> Ops;
3584   bool HasExtraArg = HasExtraNeonArgument(BuiltinID);
3585   unsigned NumArgs = E->getNumArgs() - (HasExtraArg ? 1 : 0);
3586   for (unsigned i = 0, e = NumArgs; i != e; i++) {
3587     if (i == 0) {
3588       switch (BuiltinID) {
3589       case NEON::BI__builtin_neon_vld1_v:
3590       case NEON::BI__builtin_neon_vld1q_v:
3591       case NEON::BI__builtin_neon_vld1q_lane_v:
3592       case NEON::BI__builtin_neon_vld1_lane_v:
3593       case NEON::BI__builtin_neon_vld1_dup_v:
3594       case NEON::BI__builtin_neon_vld1q_dup_v:
3595       case NEON::BI__builtin_neon_vst1_v:
3596       case NEON::BI__builtin_neon_vst1q_v:
3597       case NEON::BI__builtin_neon_vst1q_lane_v:
3598       case NEON::BI__builtin_neon_vst1_lane_v:
3599       case NEON::BI__builtin_neon_vst2_v:
3600       case NEON::BI__builtin_neon_vst2q_v:
3601       case NEON::BI__builtin_neon_vst2_lane_v:
3602       case NEON::BI__builtin_neon_vst2q_lane_v:
3603       case NEON::BI__builtin_neon_vst3_v:
3604       case NEON::BI__builtin_neon_vst3q_v:
3605       case NEON::BI__builtin_neon_vst3_lane_v:
3606       case NEON::BI__builtin_neon_vst3q_lane_v:
3607       case NEON::BI__builtin_neon_vst4_v:
3608       case NEON::BI__builtin_neon_vst4q_v:
3609       case NEON::BI__builtin_neon_vst4_lane_v:
3610       case NEON::BI__builtin_neon_vst4q_lane_v:
3611         // Get the alignment for the argument in addition to the value;
3612         // we'll use it later.
3613         PtrOp0 = EmitPointerWithAlignment(E->getArg(0));
3614         Ops.push_back(PtrOp0.getPointer());
3615         continue;
3616       }
3617     }
3618     if (i == 1) {
3619       switch (BuiltinID) {
3620       case NEON::BI__builtin_neon_vld2_v:
3621       case NEON::BI__builtin_neon_vld2q_v:
3622       case NEON::BI__builtin_neon_vld3_v:
3623       case NEON::BI__builtin_neon_vld3q_v:
3624       case NEON::BI__builtin_neon_vld4_v:
3625       case NEON::BI__builtin_neon_vld4q_v:
3626       case NEON::BI__builtin_neon_vld2_lane_v:
3627       case NEON::BI__builtin_neon_vld2q_lane_v:
3628       case NEON::BI__builtin_neon_vld3_lane_v:
3629       case NEON::BI__builtin_neon_vld3q_lane_v:
3630       case NEON::BI__builtin_neon_vld4_lane_v:
3631       case NEON::BI__builtin_neon_vld4q_lane_v:
3632       case NEON::BI__builtin_neon_vld2_dup_v:
3633       case NEON::BI__builtin_neon_vld3_dup_v:
3634       case NEON::BI__builtin_neon_vld4_dup_v:
3635         // Get the alignment for the argument in addition to the value;
3636         // we'll use it later.
3637         PtrOp1 = EmitPointerWithAlignment(E->getArg(1));
3638         Ops.push_back(PtrOp1.getPointer());
3639         continue;
3640       }
3641     }
3642 
3643     if ((ICEArguments & (1 << i)) == 0) {
3644       Ops.push_back(EmitScalarExpr(E->getArg(i)));
3645     } else {
3646       // If this is required to be a constant, constant fold it so that we know
3647       // that the generated intrinsic gets a ConstantInt.
3648       llvm::APSInt Result;
3649       bool IsConst = E->getArg(i)->isIntegerConstantExpr(Result, getContext());
3650       assert(IsConst && "Constant arg isn't actually constant?"); (void)IsConst;
3651       Ops.push_back(llvm::ConstantInt::get(getLLVMContext(), Result));
3652     }
3653   }
3654 
3655   switch (BuiltinID) {
3656   default: break;
3657 
3658   case NEON::BI__builtin_neon_vget_lane_i8:
3659   case NEON::BI__builtin_neon_vget_lane_i16:
3660   case NEON::BI__builtin_neon_vget_lane_i32:
3661   case NEON::BI__builtin_neon_vget_lane_i64:
3662   case NEON::BI__builtin_neon_vget_lane_f32:
3663   case NEON::BI__builtin_neon_vgetq_lane_i8:
3664   case NEON::BI__builtin_neon_vgetq_lane_i16:
3665   case NEON::BI__builtin_neon_vgetq_lane_i32:
3666   case NEON::BI__builtin_neon_vgetq_lane_i64:
3667   case NEON::BI__builtin_neon_vgetq_lane_f32:
3668     return Builder.CreateExtractElement(Ops[0], Ops[1], "vget_lane");
3669 
3670   case NEON::BI__builtin_neon_vset_lane_i8:
3671   case NEON::BI__builtin_neon_vset_lane_i16:
3672   case NEON::BI__builtin_neon_vset_lane_i32:
3673   case NEON::BI__builtin_neon_vset_lane_i64:
3674   case NEON::BI__builtin_neon_vset_lane_f32:
3675   case NEON::BI__builtin_neon_vsetq_lane_i8:
3676   case NEON::BI__builtin_neon_vsetq_lane_i16:
3677   case NEON::BI__builtin_neon_vsetq_lane_i32:
3678   case NEON::BI__builtin_neon_vsetq_lane_i64:
3679   case NEON::BI__builtin_neon_vsetq_lane_f32:
3680     return Builder.CreateInsertElement(Ops[1], Ops[0], Ops[2], "vset_lane");
3681 
3682   case NEON::BI__builtin_neon_vsha1h_u32:
3683     return EmitNeonCall(CGM.getIntrinsic(Intrinsic::arm_neon_sha1h), Ops,
3684                         "vsha1h");
3685   case NEON::BI__builtin_neon_vsha1cq_u32:
3686     return EmitNeonCall(CGM.getIntrinsic(Intrinsic::arm_neon_sha1c), Ops,
3687                         "vsha1h");
3688   case NEON::BI__builtin_neon_vsha1pq_u32:
3689     return EmitNeonCall(CGM.getIntrinsic(Intrinsic::arm_neon_sha1p), Ops,
3690                         "vsha1h");
3691   case NEON::BI__builtin_neon_vsha1mq_u32:
3692     return EmitNeonCall(CGM.getIntrinsic(Intrinsic::arm_neon_sha1m), Ops,
3693                         "vsha1h");
3694 
3695   // The ARM _MoveToCoprocessor builtins put the input register value as
3696   // the first argument, but the LLVM intrinsic expects it as the third one.
3697   case ARM::BI_MoveToCoprocessor:
3698   case ARM::BI_MoveToCoprocessor2: {
3699     Function *F = CGM.getIntrinsic(BuiltinID == ARM::BI_MoveToCoprocessor ?
3700                                    Intrinsic::arm_mcr : Intrinsic::arm_mcr2);
3701     return Builder.CreateCall(F, {Ops[1], Ops[2], Ops[0],
3702                                   Ops[3], Ops[4], Ops[5]});
3703   }
3704   }
3705 
3706   // Get the last argument, which specifies the vector type.
3707   assert(HasExtraArg);
3708   llvm::APSInt Result;
3709   const Expr *Arg = E->getArg(E->getNumArgs()-1);
3710   if (!Arg->isIntegerConstantExpr(Result, getContext()))
3711     return nullptr;
3712 
3713   if (BuiltinID == ARM::BI__builtin_arm_vcvtr_f ||
3714       BuiltinID == ARM::BI__builtin_arm_vcvtr_d) {
3715     // Determine the overloaded type of this builtin.
3716     llvm::Type *Ty;
3717     if (BuiltinID == ARM::BI__builtin_arm_vcvtr_f)
3718       Ty = FloatTy;
3719     else
3720       Ty = DoubleTy;
3721 
3722     // Determine whether this is an unsigned conversion or not.
3723     bool usgn = Result.getZExtValue() == 1;
3724     unsigned Int = usgn ? Intrinsic::arm_vcvtru : Intrinsic::arm_vcvtr;
3725 
3726     // Call the appropriate intrinsic.
3727     Function *F = CGM.getIntrinsic(Int, Ty);
3728     return Builder.CreateCall(F, Ops, "vcvtr");
3729   }
3730 
3731   // Determine the type of this overloaded NEON intrinsic.
3732   NeonTypeFlags Type(Result.getZExtValue());
3733   bool usgn = Type.isUnsigned();
3734   bool rightShift = false;
3735 
3736   llvm::VectorType *VTy = GetNeonType(this, Type);
3737   llvm::Type *Ty = VTy;
3738   if (!Ty)
3739     return nullptr;
3740 
3741   // Many NEON builtins have identical semantics and uses in ARM and
3742   // AArch64. Emit these in a single function.
3743   auto IntrinsicMap = makeArrayRef(ARMSIMDIntrinsicMap);
3744   const NeonIntrinsicInfo *Builtin = findNeonIntrinsicInMap(
3745       IntrinsicMap, BuiltinID, NEONSIMDIntrinsicsProvenSorted);
3746   if (Builtin)
3747     return EmitCommonNeonBuiltinExpr(
3748         Builtin->BuiltinID, Builtin->LLVMIntrinsic, Builtin->AltLLVMIntrinsic,
3749         Builtin->NameHint, Builtin->TypeModifier, E, Ops, PtrOp0, PtrOp1);
3750 
3751   unsigned Int;
3752   switch (BuiltinID) {
3753   default: return nullptr;
3754   case NEON::BI__builtin_neon_vld1q_lane_v:
3755     // Handle 64-bit integer elements as a special case.  Use shuffles of
3756     // one-element vectors to avoid poor code for i64 in the backend.
3757     if (VTy->getElementType()->isIntegerTy(64)) {
3758       // Extract the other lane.
3759       Ops[1] = Builder.CreateBitCast(Ops[1], Ty);
3760       uint32_t Lane = cast<ConstantInt>(Ops[2])->getZExtValue();
3761       Value *SV = llvm::ConstantVector::get(ConstantInt::get(Int32Ty, 1-Lane));
3762       Ops[1] = Builder.CreateShuffleVector(Ops[1], Ops[1], SV);
3763       // Load the value as a one-element vector.
3764       Ty = llvm::VectorType::get(VTy->getElementType(), 1);
3765       Function *F = CGM.getIntrinsic(Intrinsic::arm_neon_vld1, Ty);
3766       Value *Align = getAlignmentValue32(PtrOp0);
3767       Value *Ld = Builder.CreateCall(F, {Ops[0], Align});
3768       // Combine them.
3769       uint32_t Indices[] = {1 - Lane, Lane};
3770       SV = llvm::ConstantDataVector::get(getLLVMContext(), Indices);
3771       return Builder.CreateShuffleVector(Ops[1], Ld, SV, "vld1q_lane");
3772     }
3773     // fall through
3774   case NEON::BI__builtin_neon_vld1_lane_v: {
3775     Ops[1] = Builder.CreateBitCast(Ops[1], Ty);
3776     PtrOp0 = Builder.CreateElementBitCast(PtrOp0, VTy->getElementType());
3777     Value *Ld = Builder.CreateLoad(PtrOp0);
3778     return Builder.CreateInsertElement(Ops[1], Ld, Ops[2], "vld1_lane");
3779   }
3780   case NEON::BI__builtin_neon_vld2_dup_v:
3781   case NEON::BI__builtin_neon_vld3_dup_v:
3782   case NEON::BI__builtin_neon_vld4_dup_v: {
3783     // Handle 64-bit elements as a special-case.  There is no "dup" needed.
3784     if (VTy->getElementType()->getPrimitiveSizeInBits() == 64) {
3785       switch (BuiltinID) {
3786       case NEON::BI__builtin_neon_vld2_dup_v:
3787         Int = Intrinsic::arm_neon_vld2;
3788         break;
3789       case NEON::BI__builtin_neon_vld3_dup_v:
3790         Int = Intrinsic::arm_neon_vld3;
3791         break;
3792       case NEON::BI__builtin_neon_vld4_dup_v:
3793         Int = Intrinsic::arm_neon_vld4;
3794         break;
3795       default: llvm_unreachable("unknown vld_dup intrinsic?");
3796       }
3797       Function *F = CGM.getIntrinsic(Int, Ty);
3798       llvm::Value *Align = getAlignmentValue32(PtrOp1);
3799       Ops[1] = Builder.CreateCall(F, {Ops[1], Align}, "vld_dup");
3800       Ty = llvm::PointerType::getUnqual(Ops[1]->getType());
3801       Ops[0] = Builder.CreateBitCast(Ops[0], Ty);
3802       return Builder.CreateDefaultAlignedStore(Ops[1], Ops[0]);
3803     }
3804     switch (BuiltinID) {
3805     case NEON::BI__builtin_neon_vld2_dup_v:
3806       Int = Intrinsic::arm_neon_vld2lane;
3807       break;
3808     case NEON::BI__builtin_neon_vld3_dup_v:
3809       Int = Intrinsic::arm_neon_vld3lane;
3810       break;
3811     case NEON::BI__builtin_neon_vld4_dup_v:
3812       Int = Intrinsic::arm_neon_vld4lane;
3813       break;
3814     default: llvm_unreachable("unknown vld_dup intrinsic?");
3815     }
3816     Function *F = CGM.getIntrinsic(Int, Ty);
3817     llvm::StructType *STy = cast<llvm::StructType>(F->getReturnType());
3818 
3819     SmallVector<Value*, 6> Args;
3820     Args.push_back(Ops[1]);
3821     Args.append(STy->getNumElements(), UndefValue::get(Ty));
3822 
3823     llvm::Constant *CI = ConstantInt::get(Int32Ty, 0);
3824     Args.push_back(CI);
3825     Args.push_back(getAlignmentValue32(PtrOp1));
3826 
3827     Ops[1] = Builder.CreateCall(F, Args, "vld_dup");
3828     // splat lane 0 to all elts in each vector of the result.
3829     for (unsigned i = 0, e = STy->getNumElements(); i != e; ++i) {
3830       Value *Val = Builder.CreateExtractValue(Ops[1], i);
3831       Value *Elt = Builder.CreateBitCast(Val, Ty);
3832       Elt = EmitNeonSplat(Elt, CI);
3833       Elt = Builder.CreateBitCast(Elt, Val->getType());
3834       Ops[1] = Builder.CreateInsertValue(Ops[1], Elt, i);
3835     }
3836     Ty = llvm::PointerType::getUnqual(Ops[1]->getType());
3837     Ops[0] = Builder.CreateBitCast(Ops[0], Ty);
3838     return Builder.CreateDefaultAlignedStore(Ops[1], Ops[0]);
3839   }
3840   case NEON::BI__builtin_neon_vqrshrn_n_v:
3841     Int =
3842       usgn ? Intrinsic::arm_neon_vqrshiftnu : Intrinsic::arm_neon_vqrshiftns;
3843     return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vqrshrn_n",
3844                         1, true);
3845   case NEON::BI__builtin_neon_vqrshrun_n_v:
3846     return EmitNeonCall(CGM.getIntrinsic(Intrinsic::arm_neon_vqrshiftnsu, Ty),
3847                         Ops, "vqrshrun_n", 1, true);
3848   case NEON::BI__builtin_neon_vqshrn_n_v:
3849     Int = usgn ? Intrinsic::arm_neon_vqshiftnu : Intrinsic::arm_neon_vqshiftns;
3850     return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vqshrn_n",
3851                         1, true);
3852   case NEON::BI__builtin_neon_vqshrun_n_v:
3853     return EmitNeonCall(CGM.getIntrinsic(Intrinsic::arm_neon_vqshiftnsu, Ty),
3854                         Ops, "vqshrun_n", 1, true);
3855   case NEON::BI__builtin_neon_vrecpe_v:
3856   case NEON::BI__builtin_neon_vrecpeq_v:
3857     return EmitNeonCall(CGM.getIntrinsic(Intrinsic::arm_neon_vrecpe, Ty),
3858                         Ops, "vrecpe");
3859   case NEON::BI__builtin_neon_vrshrn_n_v:
3860     return EmitNeonCall(CGM.getIntrinsic(Intrinsic::arm_neon_vrshiftn, Ty),
3861                         Ops, "vrshrn_n", 1, true);
3862   case NEON::BI__builtin_neon_vrsra_n_v:
3863   case NEON::BI__builtin_neon_vrsraq_n_v:
3864     Ops[0] = Builder.CreateBitCast(Ops[0], Ty);
3865     Ops[1] = Builder.CreateBitCast(Ops[1], Ty);
3866     Ops[2] = EmitNeonShiftVector(Ops[2], Ty, true);
3867     Int = usgn ? Intrinsic::arm_neon_vrshiftu : Intrinsic::arm_neon_vrshifts;
3868     Ops[1] = Builder.CreateCall(CGM.getIntrinsic(Int, Ty), {Ops[1], Ops[2]});
3869     return Builder.CreateAdd(Ops[0], Ops[1], "vrsra_n");
3870   case NEON::BI__builtin_neon_vsri_n_v:
3871   case NEON::BI__builtin_neon_vsriq_n_v:
3872     rightShift = true;
3873   case NEON::BI__builtin_neon_vsli_n_v:
3874   case NEON::BI__builtin_neon_vsliq_n_v:
3875     Ops[2] = EmitNeonShiftVector(Ops[2], Ty, rightShift);
3876     return EmitNeonCall(CGM.getIntrinsic(Intrinsic::arm_neon_vshiftins, Ty),
3877                         Ops, "vsli_n");
3878   case NEON::BI__builtin_neon_vsra_n_v:
3879   case NEON::BI__builtin_neon_vsraq_n_v:
3880     Ops[0] = Builder.CreateBitCast(Ops[0], Ty);
3881     Ops[1] = EmitNeonRShiftImm(Ops[1], Ops[2], Ty, usgn, "vsra_n");
3882     return Builder.CreateAdd(Ops[0], Ops[1]);
3883   case NEON::BI__builtin_neon_vst1q_lane_v:
3884     // Handle 64-bit integer elements as a special case.  Use a shuffle to get
3885     // a one-element vector and avoid poor code for i64 in the backend.
3886     if (VTy->getElementType()->isIntegerTy(64)) {
3887       Ops[1] = Builder.CreateBitCast(Ops[1], Ty);
3888       Value *SV = llvm::ConstantVector::get(cast<llvm::Constant>(Ops[2]));
3889       Ops[1] = Builder.CreateShuffleVector(Ops[1], Ops[1], SV);
3890       Ops[2] = getAlignmentValue32(PtrOp0);
3891       return Builder.CreateCall(CGM.getIntrinsic(Intrinsic::arm_neon_vst1,
3892                                                  Ops[1]->getType()), Ops);
3893     }
3894     // fall through
3895   case NEON::BI__builtin_neon_vst1_lane_v: {
3896     Ops[1] = Builder.CreateBitCast(Ops[1], Ty);
3897     Ops[1] = Builder.CreateExtractElement(Ops[1], Ops[2]);
3898     Ty = llvm::PointerType::getUnqual(Ops[1]->getType());
3899     auto St = Builder.CreateStore(Ops[1], Builder.CreateBitCast(PtrOp0, Ty));
3900     return St;
3901   }
3902   case NEON::BI__builtin_neon_vtbl1_v:
3903     return EmitNeonCall(CGM.getIntrinsic(Intrinsic::arm_neon_vtbl1),
3904                         Ops, "vtbl1");
3905   case NEON::BI__builtin_neon_vtbl2_v:
3906     return EmitNeonCall(CGM.getIntrinsic(Intrinsic::arm_neon_vtbl2),
3907                         Ops, "vtbl2");
3908   case NEON::BI__builtin_neon_vtbl3_v:
3909     return EmitNeonCall(CGM.getIntrinsic(Intrinsic::arm_neon_vtbl3),
3910                         Ops, "vtbl3");
3911   case NEON::BI__builtin_neon_vtbl4_v:
3912     return EmitNeonCall(CGM.getIntrinsic(Intrinsic::arm_neon_vtbl4),
3913                         Ops, "vtbl4");
3914   case NEON::BI__builtin_neon_vtbx1_v:
3915     return EmitNeonCall(CGM.getIntrinsic(Intrinsic::arm_neon_vtbx1),
3916                         Ops, "vtbx1");
3917   case NEON::BI__builtin_neon_vtbx2_v:
3918     return EmitNeonCall(CGM.getIntrinsic(Intrinsic::arm_neon_vtbx2),
3919                         Ops, "vtbx2");
3920   case NEON::BI__builtin_neon_vtbx3_v:
3921     return EmitNeonCall(CGM.getIntrinsic(Intrinsic::arm_neon_vtbx3),
3922                         Ops, "vtbx3");
3923   case NEON::BI__builtin_neon_vtbx4_v:
3924     return EmitNeonCall(CGM.getIntrinsic(Intrinsic::arm_neon_vtbx4),
3925                         Ops, "vtbx4");
3926   }
3927 }
3928 
3929 static Value *EmitAArch64TblBuiltinExpr(CodeGenFunction &CGF, unsigned BuiltinID,
3930                                       const CallExpr *E,
3931                                       SmallVectorImpl<Value *> &Ops) {
3932   unsigned int Int = 0;
3933   const char *s = nullptr;
3934 
3935   switch (BuiltinID) {
3936   default:
3937     return nullptr;
3938   case NEON::BI__builtin_neon_vtbl1_v:
3939   case NEON::BI__builtin_neon_vqtbl1_v:
3940   case NEON::BI__builtin_neon_vqtbl1q_v:
3941   case NEON::BI__builtin_neon_vtbl2_v:
3942   case NEON::BI__builtin_neon_vqtbl2_v:
3943   case NEON::BI__builtin_neon_vqtbl2q_v:
3944   case NEON::BI__builtin_neon_vtbl3_v:
3945   case NEON::BI__builtin_neon_vqtbl3_v:
3946   case NEON::BI__builtin_neon_vqtbl3q_v:
3947   case NEON::BI__builtin_neon_vtbl4_v:
3948   case NEON::BI__builtin_neon_vqtbl4_v:
3949   case NEON::BI__builtin_neon_vqtbl4q_v:
3950     break;
3951   case NEON::BI__builtin_neon_vtbx1_v:
3952   case NEON::BI__builtin_neon_vqtbx1_v:
3953   case NEON::BI__builtin_neon_vqtbx1q_v:
3954   case NEON::BI__builtin_neon_vtbx2_v:
3955   case NEON::BI__builtin_neon_vqtbx2_v:
3956   case NEON::BI__builtin_neon_vqtbx2q_v:
3957   case NEON::BI__builtin_neon_vtbx3_v:
3958   case NEON::BI__builtin_neon_vqtbx3_v:
3959   case NEON::BI__builtin_neon_vqtbx3q_v:
3960   case NEON::BI__builtin_neon_vtbx4_v:
3961   case NEON::BI__builtin_neon_vqtbx4_v:
3962   case NEON::BI__builtin_neon_vqtbx4q_v:
3963     break;
3964   }
3965 
3966   assert(E->getNumArgs() >= 3);
3967 
3968   // Get the last argument, which specifies the vector type.
3969   llvm::APSInt Result;
3970   const Expr *Arg = E->getArg(E->getNumArgs() - 1);
3971   if (!Arg->isIntegerConstantExpr(Result, CGF.getContext()))
3972     return nullptr;
3973 
3974   // Determine the type of this overloaded NEON intrinsic.
3975   NeonTypeFlags Type(Result.getZExtValue());
3976   llvm::VectorType *Ty = GetNeonType(&CGF, Type);
3977   if (!Ty)
3978     return nullptr;
3979 
3980   CodeGen::CGBuilderTy &Builder = CGF.Builder;
3981 
3982   // AArch64 scalar builtins are not overloaded, they do not have an extra
3983   // argument that specifies the vector type, need to handle each case.
3984   switch (BuiltinID) {
3985   case NEON::BI__builtin_neon_vtbl1_v: {
3986     return packTBLDVectorList(CGF, makeArrayRef(Ops).slice(0, 1), nullptr,
3987                               Ops[1], Ty, Intrinsic::aarch64_neon_tbl1,
3988                               "vtbl1");
3989   }
3990   case NEON::BI__builtin_neon_vtbl2_v: {
3991     return packTBLDVectorList(CGF, makeArrayRef(Ops).slice(0, 2), nullptr,
3992                               Ops[2], Ty, Intrinsic::aarch64_neon_tbl1,
3993                               "vtbl1");
3994   }
3995   case NEON::BI__builtin_neon_vtbl3_v: {
3996     return packTBLDVectorList(CGF, makeArrayRef(Ops).slice(0, 3), nullptr,
3997                               Ops[3], Ty, Intrinsic::aarch64_neon_tbl2,
3998                               "vtbl2");
3999   }
4000   case NEON::BI__builtin_neon_vtbl4_v: {
4001     return packTBLDVectorList(CGF, makeArrayRef(Ops).slice(0, 4), nullptr,
4002                               Ops[4], Ty, Intrinsic::aarch64_neon_tbl2,
4003                               "vtbl2");
4004   }
4005   case NEON::BI__builtin_neon_vtbx1_v: {
4006     Value *TblRes =
4007         packTBLDVectorList(CGF, makeArrayRef(Ops).slice(1, 1), nullptr, Ops[2],
4008                            Ty, Intrinsic::aarch64_neon_tbl1, "vtbl1");
4009 
4010     llvm::Constant *EightV = ConstantInt::get(Ty, 8);
4011     Value *CmpRes = Builder.CreateICmp(ICmpInst::ICMP_UGE, Ops[2], EightV);
4012     CmpRes = Builder.CreateSExt(CmpRes, Ty);
4013 
4014     Value *EltsFromInput = Builder.CreateAnd(CmpRes, Ops[0]);
4015     Value *EltsFromTbl = Builder.CreateAnd(Builder.CreateNot(CmpRes), TblRes);
4016     return Builder.CreateOr(EltsFromInput, EltsFromTbl, "vtbx");
4017   }
4018   case NEON::BI__builtin_neon_vtbx2_v: {
4019     return packTBLDVectorList(CGF, makeArrayRef(Ops).slice(1, 2), Ops[0],
4020                               Ops[3], Ty, Intrinsic::aarch64_neon_tbx1,
4021                               "vtbx1");
4022   }
4023   case NEON::BI__builtin_neon_vtbx3_v: {
4024     Value *TblRes =
4025         packTBLDVectorList(CGF, makeArrayRef(Ops).slice(1, 3), nullptr, Ops[4],
4026                            Ty, Intrinsic::aarch64_neon_tbl2, "vtbl2");
4027 
4028     llvm::Constant *TwentyFourV = ConstantInt::get(Ty, 24);
4029     Value *CmpRes = Builder.CreateICmp(ICmpInst::ICMP_UGE, Ops[4],
4030                                            TwentyFourV);
4031     CmpRes = Builder.CreateSExt(CmpRes, Ty);
4032 
4033     Value *EltsFromInput = Builder.CreateAnd(CmpRes, Ops[0]);
4034     Value *EltsFromTbl = Builder.CreateAnd(Builder.CreateNot(CmpRes), TblRes);
4035     return Builder.CreateOr(EltsFromInput, EltsFromTbl, "vtbx");
4036   }
4037   case NEON::BI__builtin_neon_vtbx4_v: {
4038     return packTBLDVectorList(CGF, makeArrayRef(Ops).slice(1, 4), Ops[0],
4039                               Ops[5], Ty, Intrinsic::aarch64_neon_tbx2,
4040                               "vtbx2");
4041   }
4042   case NEON::BI__builtin_neon_vqtbl1_v:
4043   case NEON::BI__builtin_neon_vqtbl1q_v:
4044     Int = Intrinsic::aarch64_neon_tbl1; s = "vtbl1"; break;
4045   case NEON::BI__builtin_neon_vqtbl2_v:
4046   case NEON::BI__builtin_neon_vqtbl2q_v: {
4047     Int = Intrinsic::aarch64_neon_tbl2; s = "vtbl2"; break;
4048   case NEON::BI__builtin_neon_vqtbl3_v:
4049   case NEON::BI__builtin_neon_vqtbl3q_v:
4050     Int = Intrinsic::aarch64_neon_tbl3; s = "vtbl3"; break;
4051   case NEON::BI__builtin_neon_vqtbl4_v:
4052   case NEON::BI__builtin_neon_vqtbl4q_v:
4053     Int = Intrinsic::aarch64_neon_tbl4; s = "vtbl4"; break;
4054   case NEON::BI__builtin_neon_vqtbx1_v:
4055   case NEON::BI__builtin_neon_vqtbx1q_v:
4056     Int = Intrinsic::aarch64_neon_tbx1; s = "vtbx1"; break;
4057   case NEON::BI__builtin_neon_vqtbx2_v:
4058   case NEON::BI__builtin_neon_vqtbx2q_v:
4059     Int = Intrinsic::aarch64_neon_tbx2; s = "vtbx2"; break;
4060   case NEON::BI__builtin_neon_vqtbx3_v:
4061   case NEON::BI__builtin_neon_vqtbx3q_v:
4062     Int = Intrinsic::aarch64_neon_tbx3; s = "vtbx3"; break;
4063   case NEON::BI__builtin_neon_vqtbx4_v:
4064   case NEON::BI__builtin_neon_vqtbx4q_v:
4065     Int = Intrinsic::aarch64_neon_tbx4; s = "vtbx4"; break;
4066   }
4067   }
4068 
4069   if (!Int)
4070     return nullptr;
4071 
4072   Function *F = CGF.CGM.getIntrinsic(Int, Ty);
4073   return CGF.EmitNeonCall(F, Ops, s);
4074 }
4075 
4076 Value *CodeGenFunction::vectorWrapScalar16(Value *Op) {
4077   llvm::Type *VTy = llvm::VectorType::get(Int16Ty, 4);
4078   Op = Builder.CreateBitCast(Op, Int16Ty);
4079   Value *V = UndefValue::get(VTy);
4080   llvm::Constant *CI = ConstantInt::get(SizeTy, 0);
4081   Op = Builder.CreateInsertElement(V, Op, CI);
4082   return Op;
4083 }
4084 
4085 Value *CodeGenFunction::vectorWrapScalar8(Value *Op) {
4086   llvm::Type *VTy = llvm::VectorType::get(Int8Ty, 8);
4087   Op = Builder.CreateBitCast(Op, Int8Ty);
4088   Value *V = UndefValue::get(VTy);
4089   llvm::Constant *CI = ConstantInt::get(SizeTy, 0);
4090   Op = Builder.CreateInsertElement(V, Op, CI);
4091   return Op;
4092 }
4093 
4094 Value *CodeGenFunction::EmitAArch64BuiltinExpr(unsigned BuiltinID,
4095                                                const CallExpr *E) {
4096   unsigned HintID = static_cast<unsigned>(-1);
4097   switch (BuiltinID) {
4098   default: break;
4099   case AArch64::BI__builtin_arm_nop:
4100     HintID = 0;
4101     break;
4102   case AArch64::BI__builtin_arm_yield:
4103     HintID = 1;
4104     break;
4105   case AArch64::BI__builtin_arm_wfe:
4106     HintID = 2;
4107     break;
4108   case AArch64::BI__builtin_arm_wfi:
4109     HintID = 3;
4110     break;
4111   case AArch64::BI__builtin_arm_sev:
4112     HintID = 4;
4113     break;
4114   case AArch64::BI__builtin_arm_sevl:
4115     HintID = 5;
4116     break;
4117   }
4118 
4119   if (HintID != static_cast<unsigned>(-1)) {
4120     Function *F = CGM.getIntrinsic(Intrinsic::aarch64_hint);
4121     return Builder.CreateCall(F, llvm::ConstantInt::get(Int32Ty, HintID));
4122   }
4123 
4124   if (BuiltinID == AArch64::BI__builtin_arm_prefetch) {
4125     Value *Address         = EmitScalarExpr(E->getArg(0));
4126     Value *RW              = EmitScalarExpr(E->getArg(1));
4127     Value *CacheLevel      = EmitScalarExpr(E->getArg(2));
4128     Value *RetentionPolicy = EmitScalarExpr(E->getArg(3));
4129     Value *IsData          = EmitScalarExpr(E->getArg(4));
4130 
4131     Value *Locality = nullptr;
4132     if (cast<llvm::ConstantInt>(RetentionPolicy)->isZero()) {
4133       // Temporal fetch, needs to convert cache level to locality.
4134       Locality = llvm::ConstantInt::get(Int32Ty,
4135         -cast<llvm::ConstantInt>(CacheLevel)->getValue() + 3);
4136     } else {
4137       // Streaming fetch.
4138       Locality = llvm::ConstantInt::get(Int32Ty, 0);
4139     }
4140 
4141     // FIXME: We need AArch64 specific LLVM intrinsic if we want to specify
4142     // PLDL3STRM or PLDL2STRM.
4143     Value *F = CGM.getIntrinsic(Intrinsic::prefetch);
4144     return Builder.CreateCall(F, {Address, RW, Locality, IsData});
4145   }
4146 
4147   if (BuiltinID == AArch64::BI__builtin_arm_rbit) {
4148     assert((getContext().getTypeSize(E->getType()) == 32) &&
4149            "rbit of unusual size!");
4150     llvm::Value *Arg = EmitScalarExpr(E->getArg(0));
4151     return Builder.CreateCall(
4152         CGM.getIntrinsic(Intrinsic::aarch64_rbit, Arg->getType()), Arg, "rbit");
4153   }
4154   if (BuiltinID == AArch64::BI__builtin_arm_rbit64) {
4155     assert((getContext().getTypeSize(E->getType()) == 64) &&
4156            "rbit of unusual size!");
4157     llvm::Value *Arg = EmitScalarExpr(E->getArg(0));
4158     return Builder.CreateCall(
4159         CGM.getIntrinsic(Intrinsic::aarch64_rbit, Arg->getType()), Arg, "rbit");
4160   }
4161 
4162   if (BuiltinID == AArch64::BI__clear_cache) {
4163     assert(E->getNumArgs() == 2 && "__clear_cache takes 2 arguments");
4164     const FunctionDecl *FD = E->getDirectCallee();
4165     Value *Ops[2];
4166     for (unsigned i = 0; i < 2; i++)
4167       Ops[i] = EmitScalarExpr(E->getArg(i));
4168     llvm::Type *Ty = CGM.getTypes().ConvertType(FD->getType());
4169     llvm::FunctionType *FTy = cast<llvm::FunctionType>(Ty);
4170     StringRef Name = FD->getName();
4171     return EmitNounwindRuntimeCall(CGM.CreateRuntimeFunction(FTy, Name), Ops);
4172   }
4173 
4174   if ((BuiltinID == AArch64::BI__builtin_arm_ldrex ||
4175       BuiltinID == AArch64::BI__builtin_arm_ldaex) &&
4176       getContext().getTypeSize(E->getType()) == 128) {
4177     Function *F = CGM.getIntrinsic(BuiltinID == AArch64::BI__builtin_arm_ldaex
4178                                        ? Intrinsic::aarch64_ldaxp
4179                                        : Intrinsic::aarch64_ldxp);
4180 
4181     Value *LdPtr = EmitScalarExpr(E->getArg(0));
4182     Value *Val = Builder.CreateCall(F, Builder.CreateBitCast(LdPtr, Int8PtrTy),
4183                                     "ldxp");
4184 
4185     Value *Val0 = Builder.CreateExtractValue(Val, 1);
4186     Value *Val1 = Builder.CreateExtractValue(Val, 0);
4187     llvm::Type *Int128Ty = llvm::IntegerType::get(getLLVMContext(), 128);
4188     Val0 = Builder.CreateZExt(Val0, Int128Ty);
4189     Val1 = Builder.CreateZExt(Val1, Int128Ty);
4190 
4191     Value *ShiftCst = llvm::ConstantInt::get(Int128Ty, 64);
4192     Val = Builder.CreateShl(Val0, ShiftCst, "shl", true /* nuw */);
4193     Val = Builder.CreateOr(Val, Val1);
4194     return Builder.CreateBitCast(Val, ConvertType(E->getType()));
4195   } else if (BuiltinID == AArch64::BI__builtin_arm_ldrex ||
4196              BuiltinID == AArch64::BI__builtin_arm_ldaex) {
4197     Value *LoadAddr = EmitScalarExpr(E->getArg(0));
4198 
4199     QualType Ty = E->getType();
4200     llvm::Type *RealResTy = ConvertType(Ty);
4201     llvm::Type *IntResTy = llvm::IntegerType::get(getLLVMContext(),
4202                                                   getContext().getTypeSize(Ty));
4203     LoadAddr = Builder.CreateBitCast(LoadAddr, IntResTy->getPointerTo());
4204 
4205     Function *F = CGM.getIntrinsic(BuiltinID == AArch64::BI__builtin_arm_ldaex
4206                                        ? Intrinsic::aarch64_ldaxr
4207                                        : Intrinsic::aarch64_ldxr,
4208                                    LoadAddr->getType());
4209     Value *Val = Builder.CreateCall(F, LoadAddr, "ldxr");
4210 
4211     if (RealResTy->isPointerTy())
4212       return Builder.CreateIntToPtr(Val, RealResTy);
4213 
4214     Val = Builder.CreateTruncOrBitCast(Val, IntResTy);
4215     return Builder.CreateBitCast(Val, RealResTy);
4216   }
4217 
4218   if ((BuiltinID == AArch64::BI__builtin_arm_strex ||
4219        BuiltinID == AArch64::BI__builtin_arm_stlex) &&
4220       getContext().getTypeSize(E->getArg(0)->getType()) == 128) {
4221     Function *F = CGM.getIntrinsic(BuiltinID == AArch64::BI__builtin_arm_stlex
4222                                        ? Intrinsic::aarch64_stlxp
4223                                        : Intrinsic::aarch64_stxp);
4224     llvm::Type *STy = llvm::StructType::get(Int64Ty, Int64Ty, nullptr);
4225 
4226     Address Tmp = CreateMemTemp(E->getArg(0)->getType());
4227     EmitAnyExprToMem(E->getArg(0), Tmp, Qualifiers(), /*init*/ true);
4228 
4229     Tmp = Builder.CreateBitCast(Tmp, llvm::PointerType::getUnqual(STy));
4230     llvm::Value *Val = Builder.CreateLoad(Tmp);
4231 
4232     Value *Arg0 = Builder.CreateExtractValue(Val, 0);
4233     Value *Arg1 = Builder.CreateExtractValue(Val, 1);
4234     Value *StPtr = Builder.CreateBitCast(EmitScalarExpr(E->getArg(1)),
4235                                          Int8PtrTy);
4236     return Builder.CreateCall(F, {Arg0, Arg1, StPtr}, "stxp");
4237   }
4238 
4239   if (BuiltinID == AArch64::BI__builtin_arm_strex ||
4240       BuiltinID == AArch64::BI__builtin_arm_stlex) {
4241     Value *StoreVal = EmitScalarExpr(E->getArg(0));
4242     Value *StoreAddr = EmitScalarExpr(E->getArg(1));
4243 
4244     QualType Ty = E->getArg(0)->getType();
4245     llvm::Type *StoreTy = llvm::IntegerType::get(getLLVMContext(),
4246                                                  getContext().getTypeSize(Ty));
4247     StoreAddr = Builder.CreateBitCast(StoreAddr, StoreTy->getPointerTo());
4248 
4249     if (StoreVal->getType()->isPointerTy())
4250       StoreVal = Builder.CreatePtrToInt(StoreVal, Int64Ty);
4251     else {
4252       StoreVal = Builder.CreateBitCast(StoreVal, StoreTy);
4253       StoreVal = Builder.CreateZExtOrBitCast(StoreVal, Int64Ty);
4254     }
4255 
4256     Function *F = CGM.getIntrinsic(BuiltinID == AArch64::BI__builtin_arm_stlex
4257                                        ? Intrinsic::aarch64_stlxr
4258                                        : Intrinsic::aarch64_stxr,
4259                                    StoreAddr->getType());
4260     return Builder.CreateCall(F, {StoreVal, StoreAddr}, "stxr");
4261   }
4262 
4263   if (BuiltinID == AArch64::BI__builtin_arm_clrex) {
4264     Function *F = CGM.getIntrinsic(Intrinsic::aarch64_clrex);
4265     return Builder.CreateCall(F);
4266   }
4267 
4268   if (BuiltinID == AArch64::BI__builtin_thread_pointer) {
4269     Function *F = CGM.getIntrinsic(Intrinsic::aarch64_thread_pointer);
4270     return Builder.CreateCall(F);
4271   }
4272 
4273   // CRC32
4274   Intrinsic::ID CRCIntrinsicID = Intrinsic::not_intrinsic;
4275   switch (BuiltinID) {
4276   case AArch64::BI__builtin_arm_crc32b:
4277     CRCIntrinsicID = Intrinsic::aarch64_crc32b; break;
4278   case AArch64::BI__builtin_arm_crc32cb:
4279     CRCIntrinsicID = Intrinsic::aarch64_crc32cb; break;
4280   case AArch64::BI__builtin_arm_crc32h:
4281     CRCIntrinsicID = Intrinsic::aarch64_crc32h; break;
4282   case AArch64::BI__builtin_arm_crc32ch:
4283     CRCIntrinsicID = Intrinsic::aarch64_crc32ch; break;
4284   case AArch64::BI__builtin_arm_crc32w:
4285     CRCIntrinsicID = Intrinsic::aarch64_crc32w; break;
4286   case AArch64::BI__builtin_arm_crc32cw:
4287     CRCIntrinsicID = Intrinsic::aarch64_crc32cw; break;
4288   case AArch64::BI__builtin_arm_crc32d:
4289     CRCIntrinsicID = Intrinsic::aarch64_crc32x; break;
4290   case AArch64::BI__builtin_arm_crc32cd:
4291     CRCIntrinsicID = Intrinsic::aarch64_crc32cx; break;
4292   }
4293 
4294   if (CRCIntrinsicID != Intrinsic::not_intrinsic) {
4295     Value *Arg0 = EmitScalarExpr(E->getArg(0));
4296     Value *Arg1 = EmitScalarExpr(E->getArg(1));
4297     Function *F = CGM.getIntrinsic(CRCIntrinsicID);
4298 
4299     llvm::Type *DataTy = F->getFunctionType()->getParamType(1);
4300     Arg1 = Builder.CreateZExtOrBitCast(Arg1, DataTy);
4301 
4302     return Builder.CreateCall(F, {Arg0, Arg1});
4303   }
4304 
4305   if (BuiltinID == AArch64::BI__builtin_arm_rsr ||
4306       BuiltinID == AArch64::BI__builtin_arm_rsr64 ||
4307       BuiltinID == AArch64::BI__builtin_arm_rsrp ||
4308       BuiltinID == AArch64::BI__builtin_arm_wsr ||
4309       BuiltinID == AArch64::BI__builtin_arm_wsr64 ||
4310       BuiltinID == AArch64::BI__builtin_arm_wsrp) {
4311 
4312     bool IsRead = BuiltinID == AArch64::BI__builtin_arm_rsr ||
4313                   BuiltinID == AArch64::BI__builtin_arm_rsr64 ||
4314                   BuiltinID == AArch64::BI__builtin_arm_rsrp;
4315 
4316     bool IsPointerBuiltin = BuiltinID == AArch64::BI__builtin_arm_rsrp ||
4317                             BuiltinID == AArch64::BI__builtin_arm_wsrp;
4318 
4319     bool Is64Bit = BuiltinID != AArch64::BI__builtin_arm_rsr &&
4320                    BuiltinID != AArch64::BI__builtin_arm_wsr;
4321 
4322     llvm::Type *ValueType;
4323     llvm::Type *RegisterType = Int64Ty;
4324     if (IsPointerBuiltin) {
4325       ValueType = VoidPtrTy;
4326     } else if (Is64Bit) {
4327       ValueType = Int64Ty;
4328     } else {
4329       ValueType = Int32Ty;
4330     }
4331 
4332     return EmitSpecialRegisterBuiltin(*this, E, RegisterType, ValueType, IsRead);
4333   }
4334 
4335   // Find out if any arguments are required to be integer constant
4336   // expressions.
4337   unsigned ICEArguments = 0;
4338   ASTContext::GetBuiltinTypeError Error;
4339   getContext().GetBuiltinType(BuiltinID, Error, &ICEArguments);
4340   assert(Error == ASTContext::GE_None && "Should not codegen an error");
4341 
4342   llvm::SmallVector<Value*, 4> Ops;
4343   for (unsigned i = 0, e = E->getNumArgs() - 1; i != e; i++) {
4344     if ((ICEArguments & (1 << i)) == 0) {
4345       Ops.push_back(EmitScalarExpr(E->getArg(i)));
4346     } else {
4347       // If this is required to be a constant, constant fold it so that we know
4348       // that the generated intrinsic gets a ConstantInt.
4349       llvm::APSInt Result;
4350       bool IsConst = E->getArg(i)->isIntegerConstantExpr(Result, getContext());
4351       assert(IsConst && "Constant arg isn't actually constant?");
4352       (void)IsConst;
4353       Ops.push_back(llvm::ConstantInt::get(getLLVMContext(), Result));
4354     }
4355   }
4356 
4357   auto SISDMap = makeArrayRef(AArch64SISDIntrinsicMap);
4358   const NeonIntrinsicInfo *Builtin = findNeonIntrinsicInMap(
4359       SISDMap, BuiltinID, AArch64SISDIntrinsicsProvenSorted);
4360 
4361   if (Builtin) {
4362     Ops.push_back(EmitScalarExpr(E->getArg(E->getNumArgs() - 1)));
4363     Value *Result = EmitCommonNeonSISDBuiltinExpr(*this, *Builtin, Ops, E);
4364     assert(Result && "SISD intrinsic should have been handled");
4365     return Result;
4366   }
4367 
4368   llvm::APSInt Result;
4369   const Expr *Arg = E->getArg(E->getNumArgs()-1);
4370   NeonTypeFlags Type(0);
4371   if (Arg->isIntegerConstantExpr(Result, getContext()))
4372     // Determine the type of this overloaded NEON intrinsic.
4373     Type = NeonTypeFlags(Result.getZExtValue());
4374 
4375   bool usgn = Type.isUnsigned();
4376   bool quad = Type.isQuad();
4377 
4378   // Handle non-overloaded intrinsics first.
4379   switch (BuiltinID) {
4380   default: break;
4381   case NEON::BI__builtin_neon_vldrq_p128: {
4382     llvm::Type *Int128PTy = llvm::Type::getIntNPtrTy(getLLVMContext(), 128);
4383     Value *Ptr = Builder.CreateBitCast(EmitScalarExpr(E->getArg(0)), Int128PTy);
4384     return Builder.CreateDefaultAlignedLoad(Ptr);
4385   }
4386   case NEON::BI__builtin_neon_vstrq_p128: {
4387     llvm::Type *Int128PTy = llvm::Type::getIntNPtrTy(getLLVMContext(), 128);
4388     Value *Ptr = Builder.CreateBitCast(Ops[0], Int128PTy);
4389     return Builder.CreateDefaultAlignedStore(EmitScalarExpr(E->getArg(1)), Ptr);
4390   }
4391   case NEON::BI__builtin_neon_vcvts_u32_f32:
4392   case NEON::BI__builtin_neon_vcvtd_u64_f64:
4393     usgn = true;
4394     // FALL THROUGH
4395   case NEON::BI__builtin_neon_vcvts_s32_f32:
4396   case NEON::BI__builtin_neon_vcvtd_s64_f64: {
4397     Ops.push_back(EmitScalarExpr(E->getArg(0)));
4398     bool Is64 = Ops[0]->getType()->getPrimitiveSizeInBits() == 64;
4399     llvm::Type *InTy = Is64 ? Int64Ty : Int32Ty;
4400     llvm::Type *FTy = Is64 ? DoubleTy : FloatTy;
4401     Ops[0] = Builder.CreateBitCast(Ops[0], FTy);
4402     if (usgn)
4403       return Builder.CreateFPToUI(Ops[0], InTy);
4404     return Builder.CreateFPToSI(Ops[0], InTy);
4405   }
4406   case NEON::BI__builtin_neon_vcvts_f32_u32:
4407   case NEON::BI__builtin_neon_vcvtd_f64_u64:
4408     usgn = true;
4409     // FALL THROUGH
4410   case NEON::BI__builtin_neon_vcvts_f32_s32:
4411   case NEON::BI__builtin_neon_vcvtd_f64_s64: {
4412     Ops.push_back(EmitScalarExpr(E->getArg(0)));
4413     bool Is64 = Ops[0]->getType()->getPrimitiveSizeInBits() == 64;
4414     llvm::Type *InTy = Is64 ? Int64Ty : Int32Ty;
4415     llvm::Type *FTy = Is64 ? DoubleTy : FloatTy;
4416     Ops[0] = Builder.CreateBitCast(Ops[0], InTy);
4417     if (usgn)
4418       return Builder.CreateUIToFP(Ops[0], FTy);
4419     return Builder.CreateSIToFP(Ops[0], FTy);
4420   }
4421   case NEON::BI__builtin_neon_vpaddd_s64: {
4422     llvm::Type *Ty = llvm::VectorType::get(Int64Ty, 2);
4423     Value *Vec = EmitScalarExpr(E->getArg(0));
4424     // The vector is v2f64, so make sure it's bitcast to that.
4425     Vec = Builder.CreateBitCast(Vec, Ty, "v2i64");
4426     llvm::Value *Idx0 = llvm::ConstantInt::get(SizeTy, 0);
4427     llvm::Value *Idx1 = llvm::ConstantInt::get(SizeTy, 1);
4428     Value *Op0 = Builder.CreateExtractElement(Vec, Idx0, "lane0");
4429     Value *Op1 = Builder.CreateExtractElement(Vec, Idx1, "lane1");
4430     // Pairwise addition of a v2f64 into a scalar f64.
4431     return Builder.CreateAdd(Op0, Op1, "vpaddd");
4432   }
4433   case NEON::BI__builtin_neon_vpaddd_f64: {
4434     llvm::Type *Ty =
4435       llvm::VectorType::get(DoubleTy, 2);
4436     Value *Vec = EmitScalarExpr(E->getArg(0));
4437     // The vector is v2f64, so make sure it's bitcast to that.
4438     Vec = Builder.CreateBitCast(Vec, Ty, "v2f64");
4439     llvm::Value *Idx0 = llvm::ConstantInt::get(SizeTy, 0);
4440     llvm::Value *Idx1 = llvm::ConstantInt::get(SizeTy, 1);
4441     Value *Op0 = Builder.CreateExtractElement(Vec, Idx0, "lane0");
4442     Value *Op1 = Builder.CreateExtractElement(Vec, Idx1, "lane1");
4443     // Pairwise addition of a v2f64 into a scalar f64.
4444     return Builder.CreateFAdd(Op0, Op1, "vpaddd");
4445   }
4446   case NEON::BI__builtin_neon_vpadds_f32: {
4447     llvm::Type *Ty =
4448       llvm::VectorType::get(FloatTy, 2);
4449     Value *Vec = EmitScalarExpr(E->getArg(0));
4450     // The vector is v2f32, so make sure it's bitcast to that.
4451     Vec = Builder.CreateBitCast(Vec, Ty, "v2f32");
4452     llvm::Value *Idx0 = llvm::ConstantInt::get(SizeTy, 0);
4453     llvm::Value *Idx1 = llvm::ConstantInt::get(SizeTy, 1);
4454     Value *Op0 = Builder.CreateExtractElement(Vec, Idx0, "lane0");
4455     Value *Op1 = Builder.CreateExtractElement(Vec, Idx1, "lane1");
4456     // Pairwise addition of a v2f32 into a scalar f32.
4457     return Builder.CreateFAdd(Op0, Op1, "vpaddd");
4458   }
4459   case NEON::BI__builtin_neon_vceqzd_s64:
4460   case NEON::BI__builtin_neon_vceqzd_f64:
4461   case NEON::BI__builtin_neon_vceqzs_f32:
4462     Ops.push_back(EmitScalarExpr(E->getArg(0)));
4463     return EmitAArch64CompareBuiltinExpr(
4464         Ops[0], ConvertType(E->getCallReturnType(getContext())),
4465         ICmpInst::FCMP_OEQ, ICmpInst::ICMP_EQ, "vceqz");
4466   case NEON::BI__builtin_neon_vcgezd_s64:
4467   case NEON::BI__builtin_neon_vcgezd_f64:
4468   case NEON::BI__builtin_neon_vcgezs_f32:
4469     Ops.push_back(EmitScalarExpr(E->getArg(0)));
4470     return EmitAArch64CompareBuiltinExpr(
4471         Ops[0], ConvertType(E->getCallReturnType(getContext())),
4472         ICmpInst::FCMP_OGE, ICmpInst::ICMP_SGE, "vcgez");
4473   case NEON::BI__builtin_neon_vclezd_s64:
4474   case NEON::BI__builtin_neon_vclezd_f64:
4475   case NEON::BI__builtin_neon_vclezs_f32:
4476     Ops.push_back(EmitScalarExpr(E->getArg(0)));
4477     return EmitAArch64CompareBuiltinExpr(
4478         Ops[0], ConvertType(E->getCallReturnType(getContext())),
4479         ICmpInst::FCMP_OLE, ICmpInst::ICMP_SLE, "vclez");
4480   case NEON::BI__builtin_neon_vcgtzd_s64:
4481   case NEON::BI__builtin_neon_vcgtzd_f64:
4482   case NEON::BI__builtin_neon_vcgtzs_f32:
4483     Ops.push_back(EmitScalarExpr(E->getArg(0)));
4484     return EmitAArch64CompareBuiltinExpr(
4485         Ops[0], ConvertType(E->getCallReturnType(getContext())),
4486         ICmpInst::FCMP_OGT, ICmpInst::ICMP_SGT, "vcgtz");
4487   case NEON::BI__builtin_neon_vcltzd_s64:
4488   case NEON::BI__builtin_neon_vcltzd_f64:
4489   case NEON::BI__builtin_neon_vcltzs_f32:
4490     Ops.push_back(EmitScalarExpr(E->getArg(0)));
4491     return EmitAArch64CompareBuiltinExpr(
4492         Ops[0], ConvertType(E->getCallReturnType(getContext())),
4493         ICmpInst::FCMP_OLT, ICmpInst::ICMP_SLT, "vcltz");
4494 
4495   case NEON::BI__builtin_neon_vceqzd_u64: {
4496     Ops.push_back(EmitScalarExpr(E->getArg(0)));
4497     Ops[0] = Builder.CreateBitCast(Ops[0], Int64Ty);
4498     Ops[0] =
4499         Builder.CreateICmpEQ(Ops[0], llvm::Constant::getNullValue(Int64Ty));
4500     return Builder.CreateSExt(Ops[0], Int64Ty, "vceqzd");
4501   }
4502   case NEON::BI__builtin_neon_vceqd_f64:
4503   case NEON::BI__builtin_neon_vcled_f64:
4504   case NEON::BI__builtin_neon_vcltd_f64:
4505   case NEON::BI__builtin_neon_vcged_f64:
4506   case NEON::BI__builtin_neon_vcgtd_f64: {
4507     llvm::CmpInst::Predicate P;
4508     switch (BuiltinID) {
4509     default: llvm_unreachable("missing builtin ID in switch!");
4510     case NEON::BI__builtin_neon_vceqd_f64: P = llvm::FCmpInst::FCMP_OEQ; break;
4511     case NEON::BI__builtin_neon_vcled_f64: P = llvm::FCmpInst::FCMP_OLE; break;
4512     case NEON::BI__builtin_neon_vcltd_f64: P = llvm::FCmpInst::FCMP_OLT; break;
4513     case NEON::BI__builtin_neon_vcged_f64: P = llvm::FCmpInst::FCMP_OGE; break;
4514     case NEON::BI__builtin_neon_vcgtd_f64: P = llvm::FCmpInst::FCMP_OGT; break;
4515     }
4516     Ops.push_back(EmitScalarExpr(E->getArg(1)));
4517     Ops[0] = Builder.CreateBitCast(Ops[0], DoubleTy);
4518     Ops[1] = Builder.CreateBitCast(Ops[1], DoubleTy);
4519     Ops[0] = Builder.CreateFCmp(P, Ops[0], Ops[1]);
4520     return Builder.CreateSExt(Ops[0], Int64Ty, "vcmpd");
4521   }
4522   case NEON::BI__builtin_neon_vceqs_f32:
4523   case NEON::BI__builtin_neon_vcles_f32:
4524   case NEON::BI__builtin_neon_vclts_f32:
4525   case NEON::BI__builtin_neon_vcges_f32:
4526   case NEON::BI__builtin_neon_vcgts_f32: {
4527     llvm::CmpInst::Predicate P;
4528     switch (BuiltinID) {
4529     default: llvm_unreachable("missing builtin ID in switch!");
4530     case NEON::BI__builtin_neon_vceqs_f32: P = llvm::FCmpInst::FCMP_OEQ; break;
4531     case NEON::BI__builtin_neon_vcles_f32: P = llvm::FCmpInst::FCMP_OLE; break;
4532     case NEON::BI__builtin_neon_vclts_f32: P = llvm::FCmpInst::FCMP_OLT; break;
4533     case NEON::BI__builtin_neon_vcges_f32: P = llvm::FCmpInst::FCMP_OGE; break;
4534     case NEON::BI__builtin_neon_vcgts_f32: P = llvm::FCmpInst::FCMP_OGT; break;
4535     }
4536     Ops.push_back(EmitScalarExpr(E->getArg(1)));
4537     Ops[0] = Builder.CreateBitCast(Ops[0], FloatTy);
4538     Ops[1] = Builder.CreateBitCast(Ops[1], FloatTy);
4539     Ops[0] = Builder.CreateFCmp(P, Ops[0], Ops[1]);
4540     return Builder.CreateSExt(Ops[0], Int32Ty, "vcmpd");
4541   }
4542   case NEON::BI__builtin_neon_vceqd_s64:
4543   case NEON::BI__builtin_neon_vceqd_u64:
4544   case NEON::BI__builtin_neon_vcgtd_s64:
4545   case NEON::BI__builtin_neon_vcgtd_u64:
4546   case NEON::BI__builtin_neon_vcltd_s64:
4547   case NEON::BI__builtin_neon_vcltd_u64:
4548   case NEON::BI__builtin_neon_vcged_u64:
4549   case NEON::BI__builtin_neon_vcged_s64:
4550   case NEON::BI__builtin_neon_vcled_u64:
4551   case NEON::BI__builtin_neon_vcled_s64: {
4552     llvm::CmpInst::Predicate P;
4553     switch (BuiltinID) {
4554     default: llvm_unreachable("missing builtin ID in switch!");
4555     case NEON::BI__builtin_neon_vceqd_s64:
4556     case NEON::BI__builtin_neon_vceqd_u64:P = llvm::ICmpInst::ICMP_EQ;break;
4557     case NEON::BI__builtin_neon_vcgtd_s64:P = llvm::ICmpInst::ICMP_SGT;break;
4558     case NEON::BI__builtin_neon_vcgtd_u64:P = llvm::ICmpInst::ICMP_UGT;break;
4559     case NEON::BI__builtin_neon_vcltd_s64:P = llvm::ICmpInst::ICMP_SLT;break;
4560     case NEON::BI__builtin_neon_vcltd_u64:P = llvm::ICmpInst::ICMP_ULT;break;
4561     case NEON::BI__builtin_neon_vcged_u64:P = llvm::ICmpInst::ICMP_UGE;break;
4562     case NEON::BI__builtin_neon_vcged_s64:P = llvm::ICmpInst::ICMP_SGE;break;
4563     case NEON::BI__builtin_neon_vcled_u64:P = llvm::ICmpInst::ICMP_ULE;break;
4564     case NEON::BI__builtin_neon_vcled_s64:P = llvm::ICmpInst::ICMP_SLE;break;
4565     }
4566     Ops.push_back(EmitScalarExpr(E->getArg(1)));
4567     Ops[0] = Builder.CreateBitCast(Ops[0], Int64Ty);
4568     Ops[1] = Builder.CreateBitCast(Ops[1], Int64Ty);
4569     Ops[0] = Builder.CreateICmp(P, Ops[0], Ops[1]);
4570     return Builder.CreateSExt(Ops[0], Int64Ty, "vceqd");
4571   }
4572   case NEON::BI__builtin_neon_vtstd_s64:
4573   case NEON::BI__builtin_neon_vtstd_u64: {
4574     Ops.push_back(EmitScalarExpr(E->getArg(1)));
4575     Ops[0] = Builder.CreateBitCast(Ops[0], Int64Ty);
4576     Ops[1] = Builder.CreateBitCast(Ops[1], Int64Ty);
4577     Ops[0] = Builder.CreateAnd(Ops[0], Ops[1]);
4578     Ops[0] = Builder.CreateICmp(ICmpInst::ICMP_NE, Ops[0],
4579                                 llvm::Constant::getNullValue(Int64Ty));
4580     return Builder.CreateSExt(Ops[0], Int64Ty, "vtstd");
4581   }
4582   case NEON::BI__builtin_neon_vset_lane_i8:
4583   case NEON::BI__builtin_neon_vset_lane_i16:
4584   case NEON::BI__builtin_neon_vset_lane_i32:
4585   case NEON::BI__builtin_neon_vset_lane_i64:
4586   case NEON::BI__builtin_neon_vset_lane_f32:
4587   case NEON::BI__builtin_neon_vsetq_lane_i8:
4588   case NEON::BI__builtin_neon_vsetq_lane_i16:
4589   case NEON::BI__builtin_neon_vsetq_lane_i32:
4590   case NEON::BI__builtin_neon_vsetq_lane_i64:
4591   case NEON::BI__builtin_neon_vsetq_lane_f32:
4592     Ops.push_back(EmitScalarExpr(E->getArg(2)));
4593     return Builder.CreateInsertElement(Ops[1], Ops[0], Ops[2], "vset_lane");
4594   case NEON::BI__builtin_neon_vset_lane_f64:
4595     // The vector type needs a cast for the v1f64 variant.
4596     Ops[1] = Builder.CreateBitCast(Ops[1],
4597                                    llvm::VectorType::get(DoubleTy, 1));
4598     Ops.push_back(EmitScalarExpr(E->getArg(2)));
4599     return Builder.CreateInsertElement(Ops[1], Ops[0], Ops[2], "vset_lane");
4600   case NEON::BI__builtin_neon_vsetq_lane_f64:
4601     // The vector type needs a cast for the v2f64 variant.
4602     Ops[1] = Builder.CreateBitCast(Ops[1],
4603         llvm::VectorType::get(DoubleTy, 2));
4604     Ops.push_back(EmitScalarExpr(E->getArg(2)));
4605     return Builder.CreateInsertElement(Ops[1], Ops[0], Ops[2], "vset_lane");
4606 
4607   case NEON::BI__builtin_neon_vget_lane_i8:
4608   case NEON::BI__builtin_neon_vdupb_lane_i8:
4609     Ops[0] = Builder.CreateBitCast(Ops[0], llvm::VectorType::get(Int8Ty, 8));
4610     return Builder.CreateExtractElement(Ops[0], EmitScalarExpr(E->getArg(1)),
4611                                         "vget_lane");
4612   case NEON::BI__builtin_neon_vgetq_lane_i8:
4613   case NEON::BI__builtin_neon_vdupb_laneq_i8:
4614     Ops[0] = Builder.CreateBitCast(Ops[0], llvm::VectorType::get(Int8Ty, 16));
4615     return Builder.CreateExtractElement(Ops[0], EmitScalarExpr(E->getArg(1)),
4616                                         "vgetq_lane");
4617   case NEON::BI__builtin_neon_vget_lane_i16:
4618   case NEON::BI__builtin_neon_vduph_lane_i16:
4619     Ops[0] = Builder.CreateBitCast(Ops[0], llvm::VectorType::get(Int16Ty, 4));
4620     return Builder.CreateExtractElement(Ops[0], EmitScalarExpr(E->getArg(1)),
4621                                         "vget_lane");
4622   case NEON::BI__builtin_neon_vgetq_lane_i16:
4623   case NEON::BI__builtin_neon_vduph_laneq_i16:
4624     Ops[0] = Builder.CreateBitCast(Ops[0], llvm::VectorType::get(Int16Ty, 8));
4625     return Builder.CreateExtractElement(Ops[0], EmitScalarExpr(E->getArg(1)),
4626                                         "vgetq_lane");
4627   case NEON::BI__builtin_neon_vget_lane_i32:
4628   case NEON::BI__builtin_neon_vdups_lane_i32:
4629     Ops[0] = Builder.CreateBitCast(Ops[0], llvm::VectorType::get(Int32Ty, 2));
4630     return Builder.CreateExtractElement(Ops[0], EmitScalarExpr(E->getArg(1)),
4631                                         "vget_lane");
4632   case NEON::BI__builtin_neon_vdups_lane_f32:
4633     Ops[0] = Builder.CreateBitCast(Ops[0],
4634         llvm::VectorType::get(FloatTy, 2));
4635     return Builder.CreateExtractElement(Ops[0], EmitScalarExpr(E->getArg(1)),
4636                                         "vdups_lane");
4637   case NEON::BI__builtin_neon_vgetq_lane_i32:
4638   case NEON::BI__builtin_neon_vdups_laneq_i32:
4639     Ops[0] = Builder.CreateBitCast(Ops[0], llvm::VectorType::get(Int32Ty, 4));
4640     return Builder.CreateExtractElement(Ops[0], EmitScalarExpr(E->getArg(1)),
4641                                         "vgetq_lane");
4642   case NEON::BI__builtin_neon_vget_lane_i64:
4643   case NEON::BI__builtin_neon_vdupd_lane_i64:
4644     Ops[0] = Builder.CreateBitCast(Ops[0], llvm::VectorType::get(Int64Ty, 1));
4645     return Builder.CreateExtractElement(Ops[0], EmitScalarExpr(E->getArg(1)),
4646                                         "vget_lane");
4647   case NEON::BI__builtin_neon_vdupd_lane_f64:
4648     Ops[0] = Builder.CreateBitCast(Ops[0],
4649         llvm::VectorType::get(DoubleTy, 1));
4650     return Builder.CreateExtractElement(Ops[0], EmitScalarExpr(E->getArg(1)),
4651                                         "vdupd_lane");
4652   case NEON::BI__builtin_neon_vgetq_lane_i64:
4653   case NEON::BI__builtin_neon_vdupd_laneq_i64:
4654     Ops[0] = Builder.CreateBitCast(Ops[0], llvm::VectorType::get(Int64Ty, 2));
4655     return Builder.CreateExtractElement(Ops[0], EmitScalarExpr(E->getArg(1)),
4656                                         "vgetq_lane");
4657   case NEON::BI__builtin_neon_vget_lane_f32:
4658     Ops[0] = Builder.CreateBitCast(Ops[0],
4659         llvm::VectorType::get(FloatTy, 2));
4660     return Builder.CreateExtractElement(Ops[0], EmitScalarExpr(E->getArg(1)),
4661                                         "vget_lane");
4662   case NEON::BI__builtin_neon_vget_lane_f64:
4663     Ops[0] = Builder.CreateBitCast(Ops[0],
4664         llvm::VectorType::get(DoubleTy, 1));
4665     return Builder.CreateExtractElement(Ops[0], EmitScalarExpr(E->getArg(1)),
4666                                         "vget_lane");
4667   case NEON::BI__builtin_neon_vgetq_lane_f32:
4668   case NEON::BI__builtin_neon_vdups_laneq_f32:
4669     Ops[0] = Builder.CreateBitCast(Ops[0],
4670         llvm::VectorType::get(FloatTy, 4));
4671     return Builder.CreateExtractElement(Ops[0], EmitScalarExpr(E->getArg(1)),
4672                                         "vgetq_lane");
4673   case NEON::BI__builtin_neon_vgetq_lane_f64:
4674   case NEON::BI__builtin_neon_vdupd_laneq_f64:
4675     Ops[0] = Builder.CreateBitCast(Ops[0],
4676         llvm::VectorType::get(DoubleTy, 2));
4677     return Builder.CreateExtractElement(Ops[0], EmitScalarExpr(E->getArg(1)),
4678                                         "vgetq_lane");
4679   case NEON::BI__builtin_neon_vaddd_s64:
4680   case NEON::BI__builtin_neon_vaddd_u64:
4681     return Builder.CreateAdd(Ops[0], EmitScalarExpr(E->getArg(1)), "vaddd");
4682   case NEON::BI__builtin_neon_vsubd_s64:
4683   case NEON::BI__builtin_neon_vsubd_u64:
4684     return Builder.CreateSub(Ops[0], EmitScalarExpr(E->getArg(1)), "vsubd");
4685   case NEON::BI__builtin_neon_vqdmlalh_s16:
4686   case NEON::BI__builtin_neon_vqdmlslh_s16: {
4687     SmallVector<Value *, 2> ProductOps;
4688     ProductOps.push_back(vectorWrapScalar16(Ops[1]));
4689     ProductOps.push_back(vectorWrapScalar16(EmitScalarExpr(E->getArg(2))));
4690     llvm::Type *VTy = llvm::VectorType::get(Int32Ty, 4);
4691     Ops[1] = EmitNeonCall(CGM.getIntrinsic(Intrinsic::aarch64_neon_sqdmull, VTy),
4692                           ProductOps, "vqdmlXl");
4693     Constant *CI = ConstantInt::get(SizeTy, 0);
4694     Ops[1] = Builder.CreateExtractElement(Ops[1], CI, "lane0");
4695 
4696     unsigned AccumInt = BuiltinID == NEON::BI__builtin_neon_vqdmlalh_s16
4697                                         ? Intrinsic::aarch64_neon_sqadd
4698                                         : Intrinsic::aarch64_neon_sqsub;
4699     return EmitNeonCall(CGM.getIntrinsic(AccumInt, Int32Ty), Ops, "vqdmlXl");
4700   }
4701   case NEON::BI__builtin_neon_vqshlud_n_s64: {
4702     Ops.push_back(EmitScalarExpr(E->getArg(1)));
4703     Ops[1] = Builder.CreateZExt(Ops[1], Int64Ty);
4704     return EmitNeonCall(CGM.getIntrinsic(Intrinsic::aarch64_neon_sqshlu, Int64Ty),
4705                         Ops, "vqshlu_n");
4706   }
4707   case NEON::BI__builtin_neon_vqshld_n_u64:
4708   case NEON::BI__builtin_neon_vqshld_n_s64: {
4709     unsigned Int = BuiltinID == NEON::BI__builtin_neon_vqshld_n_u64
4710                                    ? Intrinsic::aarch64_neon_uqshl
4711                                    : Intrinsic::aarch64_neon_sqshl;
4712     Ops.push_back(EmitScalarExpr(E->getArg(1)));
4713     Ops[1] = Builder.CreateZExt(Ops[1], Int64Ty);
4714     return EmitNeonCall(CGM.getIntrinsic(Int, Int64Ty), Ops, "vqshl_n");
4715   }
4716   case NEON::BI__builtin_neon_vrshrd_n_u64:
4717   case NEON::BI__builtin_neon_vrshrd_n_s64: {
4718     unsigned Int = BuiltinID == NEON::BI__builtin_neon_vrshrd_n_u64
4719                                    ? Intrinsic::aarch64_neon_urshl
4720                                    : Intrinsic::aarch64_neon_srshl;
4721     Ops.push_back(EmitScalarExpr(E->getArg(1)));
4722     int SV = cast<ConstantInt>(Ops[1])->getSExtValue();
4723     Ops[1] = ConstantInt::get(Int64Ty, -SV);
4724     return EmitNeonCall(CGM.getIntrinsic(Int, Int64Ty), Ops, "vrshr_n");
4725   }
4726   case NEON::BI__builtin_neon_vrsrad_n_u64:
4727   case NEON::BI__builtin_neon_vrsrad_n_s64: {
4728     unsigned Int = BuiltinID == NEON::BI__builtin_neon_vrsrad_n_u64
4729                                    ? Intrinsic::aarch64_neon_urshl
4730                                    : Intrinsic::aarch64_neon_srshl;
4731     Ops[1] = Builder.CreateBitCast(Ops[1], Int64Ty);
4732     Ops.push_back(Builder.CreateNeg(EmitScalarExpr(E->getArg(2))));
4733     Ops[1] = Builder.CreateCall(CGM.getIntrinsic(Int, Int64Ty),
4734                                 {Ops[1], Builder.CreateSExt(Ops[2], Int64Ty)});
4735     return Builder.CreateAdd(Ops[0], Builder.CreateBitCast(Ops[1], Int64Ty));
4736   }
4737   case NEON::BI__builtin_neon_vshld_n_s64:
4738   case NEON::BI__builtin_neon_vshld_n_u64: {
4739     llvm::ConstantInt *Amt = cast<ConstantInt>(EmitScalarExpr(E->getArg(1)));
4740     return Builder.CreateShl(
4741         Ops[0], ConstantInt::get(Int64Ty, Amt->getZExtValue()), "shld_n");
4742   }
4743   case NEON::BI__builtin_neon_vshrd_n_s64: {
4744     llvm::ConstantInt *Amt = cast<ConstantInt>(EmitScalarExpr(E->getArg(1)));
4745     return Builder.CreateAShr(
4746         Ops[0], ConstantInt::get(Int64Ty, std::min(static_cast<uint64_t>(63),
4747                                                    Amt->getZExtValue())),
4748         "shrd_n");
4749   }
4750   case NEON::BI__builtin_neon_vshrd_n_u64: {
4751     llvm::ConstantInt *Amt = cast<ConstantInt>(EmitScalarExpr(E->getArg(1)));
4752     uint64_t ShiftAmt = Amt->getZExtValue();
4753     // Right-shifting an unsigned value by its size yields 0.
4754     if (ShiftAmt == 64)
4755       return ConstantInt::get(Int64Ty, 0);
4756     return Builder.CreateLShr(Ops[0], ConstantInt::get(Int64Ty, ShiftAmt),
4757                               "shrd_n");
4758   }
4759   case NEON::BI__builtin_neon_vsrad_n_s64: {
4760     llvm::ConstantInt *Amt = cast<ConstantInt>(EmitScalarExpr(E->getArg(2)));
4761     Ops[1] = Builder.CreateAShr(
4762         Ops[1], ConstantInt::get(Int64Ty, std::min(static_cast<uint64_t>(63),
4763                                                    Amt->getZExtValue())),
4764         "shrd_n");
4765     return Builder.CreateAdd(Ops[0], Ops[1]);
4766   }
4767   case NEON::BI__builtin_neon_vsrad_n_u64: {
4768     llvm::ConstantInt *Amt = cast<ConstantInt>(EmitScalarExpr(E->getArg(2)));
4769     uint64_t ShiftAmt = Amt->getZExtValue();
4770     // Right-shifting an unsigned value by its size yields 0.
4771     // As Op + 0 = Op, return Ops[0] directly.
4772     if (ShiftAmt == 64)
4773       return Ops[0];
4774     Ops[1] = Builder.CreateLShr(Ops[1], ConstantInt::get(Int64Ty, ShiftAmt),
4775                                 "shrd_n");
4776     return Builder.CreateAdd(Ops[0], Ops[1]);
4777   }
4778   case NEON::BI__builtin_neon_vqdmlalh_lane_s16:
4779   case NEON::BI__builtin_neon_vqdmlalh_laneq_s16:
4780   case NEON::BI__builtin_neon_vqdmlslh_lane_s16:
4781   case NEON::BI__builtin_neon_vqdmlslh_laneq_s16: {
4782     Ops[2] = Builder.CreateExtractElement(Ops[2], EmitScalarExpr(E->getArg(3)),
4783                                           "lane");
4784     SmallVector<Value *, 2> ProductOps;
4785     ProductOps.push_back(vectorWrapScalar16(Ops[1]));
4786     ProductOps.push_back(vectorWrapScalar16(Ops[2]));
4787     llvm::Type *VTy = llvm::VectorType::get(Int32Ty, 4);
4788     Ops[1] = EmitNeonCall(CGM.getIntrinsic(Intrinsic::aarch64_neon_sqdmull, VTy),
4789                           ProductOps, "vqdmlXl");
4790     Constant *CI = ConstantInt::get(SizeTy, 0);
4791     Ops[1] = Builder.CreateExtractElement(Ops[1], CI, "lane0");
4792     Ops.pop_back();
4793 
4794     unsigned AccInt = (BuiltinID == NEON::BI__builtin_neon_vqdmlalh_lane_s16 ||
4795                        BuiltinID == NEON::BI__builtin_neon_vqdmlalh_laneq_s16)
4796                           ? Intrinsic::aarch64_neon_sqadd
4797                           : Intrinsic::aarch64_neon_sqsub;
4798     return EmitNeonCall(CGM.getIntrinsic(AccInt, Int32Ty), Ops, "vqdmlXl");
4799   }
4800   case NEON::BI__builtin_neon_vqdmlals_s32:
4801   case NEON::BI__builtin_neon_vqdmlsls_s32: {
4802     SmallVector<Value *, 2> ProductOps;
4803     ProductOps.push_back(Ops[1]);
4804     ProductOps.push_back(EmitScalarExpr(E->getArg(2)));
4805     Ops[1] =
4806         EmitNeonCall(CGM.getIntrinsic(Intrinsic::aarch64_neon_sqdmulls_scalar),
4807                      ProductOps, "vqdmlXl");
4808 
4809     unsigned AccumInt = BuiltinID == NEON::BI__builtin_neon_vqdmlals_s32
4810                                         ? Intrinsic::aarch64_neon_sqadd
4811                                         : Intrinsic::aarch64_neon_sqsub;
4812     return EmitNeonCall(CGM.getIntrinsic(AccumInt, Int64Ty), Ops, "vqdmlXl");
4813   }
4814   case NEON::BI__builtin_neon_vqdmlals_lane_s32:
4815   case NEON::BI__builtin_neon_vqdmlals_laneq_s32:
4816   case NEON::BI__builtin_neon_vqdmlsls_lane_s32:
4817   case NEON::BI__builtin_neon_vqdmlsls_laneq_s32: {
4818     Ops[2] = Builder.CreateExtractElement(Ops[2], EmitScalarExpr(E->getArg(3)),
4819                                           "lane");
4820     SmallVector<Value *, 2> ProductOps;
4821     ProductOps.push_back(Ops[1]);
4822     ProductOps.push_back(Ops[2]);
4823     Ops[1] =
4824         EmitNeonCall(CGM.getIntrinsic(Intrinsic::aarch64_neon_sqdmulls_scalar),
4825                      ProductOps, "vqdmlXl");
4826     Ops.pop_back();
4827 
4828     unsigned AccInt = (BuiltinID == NEON::BI__builtin_neon_vqdmlals_lane_s32 ||
4829                        BuiltinID == NEON::BI__builtin_neon_vqdmlals_laneq_s32)
4830                           ? Intrinsic::aarch64_neon_sqadd
4831                           : Intrinsic::aarch64_neon_sqsub;
4832     return EmitNeonCall(CGM.getIntrinsic(AccInt, Int64Ty), Ops, "vqdmlXl");
4833   }
4834   }
4835 
4836   llvm::VectorType *VTy = GetNeonType(this, Type);
4837   llvm::Type *Ty = VTy;
4838   if (!Ty)
4839     return nullptr;
4840 
4841   // Not all intrinsics handled by the common case work for AArch64 yet, so only
4842   // defer to common code if it's been added to our special map.
4843   Builtin = findNeonIntrinsicInMap(AArch64SIMDIntrinsicMap, BuiltinID,
4844                                    AArch64SIMDIntrinsicsProvenSorted);
4845 
4846   if (Builtin)
4847     return EmitCommonNeonBuiltinExpr(
4848         Builtin->BuiltinID, Builtin->LLVMIntrinsic, Builtin->AltLLVMIntrinsic,
4849         Builtin->NameHint, Builtin->TypeModifier, E, Ops,
4850         /*never use addresses*/ Address::invalid(), Address::invalid());
4851 
4852   if (Value *V = EmitAArch64TblBuiltinExpr(*this, BuiltinID, E, Ops))
4853     return V;
4854 
4855   unsigned Int;
4856   switch (BuiltinID) {
4857   default: return nullptr;
4858   case NEON::BI__builtin_neon_vbsl_v:
4859   case NEON::BI__builtin_neon_vbslq_v: {
4860     llvm::Type *BitTy = llvm::VectorType::getInteger(VTy);
4861     Ops[0] = Builder.CreateBitCast(Ops[0], BitTy, "vbsl");
4862     Ops[1] = Builder.CreateBitCast(Ops[1], BitTy, "vbsl");
4863     Ops[2] = Builder.CreateBitCast(Ops[2], BitTy, "vbsl");
4864 
4865     Ops[1] = Builder.CreateAnd(Ops[0], Ops[1], "vbsl");
4866     Ops[2] = Builder.CreateAnd(Builder.CreateNot(Ops[0]), Ops[2], "vbsl");
4867     Ops[0] = Builder.CreateOr(Ops[1], Ops[2], "vbsl");
4868     return Builder.CreateBitCast(Ops[0], Ty);
4869   }
4870   case NEON::BI__builtin_neon_vfma_lane_v:
4871   case NEON::BI__builtin_neon_vfmaq_lane_v: { // Only used for FP types
4872     // The ARM builtins (and instructions) have the addend as the first
4873     // operand, but the 'fma' intrinsics have it last. Swap it around here.
4874     Value *Addend = Ops[0];
4875     Value *Multiplicand = Ops[1];
4876     Value *LaneSource = Ops[2];
4877     Ops[0] = Multiplicand;
4878     Ops[1] = LaneSource;
4879     Ops[2] = Addend;
4880 
4881     // Now adjust things to handle the lane access.
4882     llvm::Type *SourceTy = BuiltinID == NEON::BI__builtin_neon_vfmaq_lane_v ?
4883       llvm::VectorType::get(VTy->getElementType(), VTy->getNumElements() / 2) :
4884       VTy;
4885     llvm::Constant *cst = cast<Constant>(Ops[3]);
4886     Value *SV = llvm::ConstantVector::getSplat(VTy->getNumElements(), cst);
4887     Ops[1] = Builder.CreateBitCast(Ops[1], SourceTy);
4888     Ops[1] = Builder.CreateShuffleVector(Ops[1], Ops[1], SV, "lane");
4889 
4890     Ops.pop_back();
4891     Int = Intrinsic::fma;
4892     return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "fmla");
4893   }
4894   case NEON::BI__builtin_neon_vfma_laneq_v: {
4895     llvm::VectorType *VTy = cast<llvm::VectorType>(Ty);
4896     // v1f64 fma should be mapped to Neon scalar f64 fma
4897     if (VTy && VTy->getElementType() == DoubleTy) {
4898       Ops[0] = Builder.CreateBitCast(Ops[0], DoubleTy);
4899       Ops[1] = Builder.CreateBitCast(Ops[1], DoubleTy);
4900       llvm::Type *VTy = GetNeonType(this,
4901         NeonTypeFlags(NeonTypeFlags::Float64, false, true));
4902       Ops[2] = Builder.CreateBitCast(Ops[2], VTy);
4903       Ops[2] = Builder.CreateExtractElement(Ops[2], Ops[3], "extract");
4904       Value *F = CGM.getIntrinsic(Intrinsic::fma, DoubleTy);
4905       Value *Result = Builder.CreateCall(F, {Ops[1], Ops[2], Ops[0]});
4906       return Builder.CreateBitCast(Result, Ty);
4907     }
4908     Value *F = CGM.getIntrinsic(Intrinsic::fma, Ty);
4909     Ops[0] = Builder.CreateBitCast(Ops[0], Ty);
4910     Ops[1] = Builder.CreateBitCast(Ops[1], Ty);
4911 
4912     llvm::Type *STy = llvm::VectorType::get(VTy->getElementType(),
4913                                             VTy->getNumElements() * 2);
4914     Ops[2] = Builder.CreateBitCast(Ops[2], STy);
4915     Value* SV = llvm::ConstantVector::getSplat(VTy->getNumElements(),
4916                                                cast<ConstantInt>(Ops[3]));
4917     Ops[2] = Builder.CreateShuffleVector(Ops[2], Ops[2], SV, "lane");
4918 
4919     return Builder.CreateCall(F, {Ops[2], Ops[1], Ops[0]});
4920   }
4921   case NEON::BI__builtin_neon_vfmaq_laneq_v: {
4922     Value *F = CGM.getIntrinsic(Intrinsic::fma, Ty);
4923     Ops[0] = Builder.CreateBitCast(Ops[0], Ty);
4924     Ops[1] = Builder.CreateBitCast(Ops[1], Ty);
4925 
4926     Ops[2] = Builder.CreateBitCast(Ops[2], Ty);
4927     Ops[2] = EmitNeonSplat(Ops[2], cast<ConstantInt>(Ops[3]));
4928     return Builder.CreateCall(F, {Ops[2], Ops[1], Ops[0]});
4929   }
4930   case NEON::BI__builtin_neon_vfmas_lane_f32:
4931   case NEON::BI__builtin_neon_vfmas_laneq_f32:
4932   case NEON::BI__builtin_neon_vfmad_lane_f64:
4933   case NEON::BI__builtin_neon_vfmad_laneq_f64: {
4934     Ops.push_back(EmitScalarExpr(E->getArg(3)));
4935     llvm::Type *Ty = ConvertType(E->getCallReturnType(getContext()));
4936     Value *F = CGM.getIntrinsic(Intrinsic::fma, Ty);
4937     Ops[2] = Builder.CreateExtractElement(Ops[2], Ops[3], "extract");
4938     return Builder.CreateCall(F, {Ops[1], Ops[2], Ops[0]});
4939   }
4940   case NEON::BI__builtin_neon_vfms_v:
4941   case NEON::BI__builtin_neon_vfmsq_v: {  // Only used for FP types
4942     // FIXME: probably remove when we no longer support aarch64_simd.h
4943     // (arm_neon.h delegates to vfma).
4944 
4945     // The ARM builtins (and instructions) have the addend as the first
4946     // operand, but the 'fma' intrinsics have it last. Swap it around here.
4947     Value *Subtrahend = Ops[0];
4948     Value *Multiplicand = Ops[2];
4949     Ops[0] = Multiplicand;
4950     Ops[2] = Subtrahend;
4951     Ops[1] = Builder.CreateBitCast(Ops[1], VTy);
4952     Ops[1] = Builder.CreateFNeg(Ops[1]);
4953     Int = Intrinsic::fma;
4954     return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "fmls");
4955   }
4956   case NEON::BI__builtin_neon_vmull_v:
4957     // FIXME: improve sharing scheme to cope with 3 alternative LLVM intrinsics.
4958     Int = usgn ? Intrinsic::aarch64_neon_umull : Intrinsic::aarch64_neon_smull;
4959     if (Type.isPoly()) Int = Intrinsic::aarch64_neon_pmull;
4960     return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vmull");
4961   case NEON::BI__builtin_neon_vmax_v:
4962   case NEON::BI__builtin_neon_vmaxq_v:
4963     // FIXME: improve sharing scheme to cope with 3 alternative LLVM intrinsics.
4964     Int = usgn ? Intrinsic::aarch64_neon_umax : Intrinsic::aarch64_neon_smax;
4965     if (Ty->isFPOrFPVectorTy()) Int = Intrinsic::aarch64_neon_fmax;
4966     return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vmax");
4967   case NEON::BI__builtin_neon_vmin_v:
4968   case NEON::BI__builtin_neon_vminq_v:
4969     // FIXME: improve sharing scheme to cope with 3 alternative LLVM intrinsics.
4970     Int = usgn ? Intrinsic::aarch64_neon_umin : Intrinsic::aarch64_neon_smin;
4971     if (Ty->isFPOrFPVectorTy()) Int = Intrinsic::aarch64_neon_fmin;
4972     return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vmin");
4973   case NEON::BI__builtin_neon_vabd_v:
4974   case NEON::BI__builtin_neon_vabdq_v:
4975     // FIXME: improve sharing scheme to cope with 3 alternative LLVM intrinsics.
4976     Int = usgn ? Intrinsic::aarch64_neon_uabd : Intrinsic::aarch64_neon_sabd;
4977     if (Ty->isFPOrFPVectorTy()) Int = Intrinsic::aarch64_neon_fabd;
4978     return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vabd");
4979   case NEON::BI__builtin_neon_vpadal_v:
4980   case NEON::BI__builtin_neon_vpadalq_v: {
4981     unsigned ArgElts = VTy->getNumElements();
4982     llvm::IntegerType *EltTy = cast<IntegerType>(VTy->getElementType());
4983     unsigned BitWidth = EltTy->getBitWidth();
4984     llvm::Type *ArgTy = llvm::VectorType::get(
4985         llvm::IntegerType::get(getLLVMContext(), BitWidth/2), 2*ArgElts);
4986     llvm::Type* Tys[2] = { VTy, ArgTy };
4987     Int = usgn ? Intrinsic::aarch64_neon_uaddlp : Intrinsic::aarch64_neon_saddlp;
4988     SmallVector<llvm::Value*, 1> TmpOps;
4989     TmpOps.push_back(Ops[1]);
4990     Function *F = CGM.getIntrinsic(Int, Tys);
4991     llvm::Value *tmp = EmitNeonCall(F, TmpOps, "vpadal");
4992     llvm::Value *addend = Builder.CreateBitCast(Ops[0], tmp->getType());
4993     return Builder.CreateAdd(tmp, addend);
4994   }
4995   case NEON::BI__builtin_neon_vpmin_v:
4996   case NEON::BI__builtin_neon_vpminq_v:
4997     // FIXME: improve sharing scheme to cope with 3 alternative LLVM intrinsics.
4998     Int = usgn ? Intrinsic::aarch64_neon_uminp : Intrinsic::aarch64_neon_sminp;
4999     if (Ty->isFPOrFPVectorTy()) Int = Intrinsic::aarch64_neon_fminp;
5000     return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vpmin");
5001   case NEON::BI__builtin_neon_vpmax_v:
5002   case NEON::BI__builtin_neon_vpmaxq_v:
5003     // FIXME: improve sharing scheme to cope with 3 alternative LLVM intrinsics.
5004     Int = usgn ? Intrinsic::aarch64_neon_umaxp : Intrinsic::aarch64_neon_smaxp;
5005     if (Ty->isFPOrFPVectorTy()) Int = Intrinsic::aarch64_neon_fmaxp;
5006     return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vpmax");
5007   case NEON::BI__builtin_neon_vminnm_v:
5008   case NEON::BI__builtin_neon_vminnmq_v:
5009     Int = Intrinsic::aarch64_neon_fminnm;
5010     return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vminnm");
5011   case NEON::BI__builtin_neon_vmaxnm_v:
5012   case NEON::BI__builtin_neon_vmaxnmq_v:
5013     Int = Intrinsic::aarch64_neon_fmaxnm;
5014     return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vmaxnm");
5015   case NEON::BI__builtin_neon_vrecpss_f32: {
5016     Ops.push_back(EmitScalarExpr(E->getArg(1)));
5017     return EmitNeonCall(CGM.getIntrinsic(Intrinsic::aarch64_neon_frecps, FloatTy),
5018                         Ops, "vrecps");
5019   }
5020   case NEON::BI__builtin_neon_vrecpsd_f64: {
5021     Ops.push_back(EmitScalarExpr(E->getArg(1)));
5022     return EmitNeonCall(CGM.getIntrinsic(Intrinsic::aarch64_neon_frecps, DoubleTy),
5023                         Ops, "vrecps");
5024   }
5025   case NEON::BI__builtin_neon_vqshrun_n_v:
5026     Int = Intrinsic::aarch64_neon_sqshrun;
5027     return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vqshrun_n");
5028   case NEON::BI__builtin_neon_vqrshrun_n_v:
5029     Int = Intrinsic::aarch64_neon_sqrshrun;
5030     return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vqrshrun_n");
5031   case NEON::BI__builtin_neon_vqshrn_n_v:
5032     Int = usgn ? Intrinsic::aarch64_neon_uqshrn : Intrinsic::aarch64_neon_sqshrn;
5033     return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vqshrn_n");
5034   case NEON::BI__builtin_neon_vrshrn_n_v:
5035     Int = Intrinsic::aarch64_neon_rshrn;
5036     return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vrshrn_n");
5037   case NEON::BI__builtin_neon_vqrshrn_n_v:
5038     Int = usgn ? Intrinsic::aarch64_neon_uqrshrn : Intrinsic::aarch64_neon_sqrshrn;
5039     return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vqrshrn_n");
5040   case NEON::BI__builtin_neon_vrnda_v:
5041   case NEON::BI__builtin_neon_vrndaq_v: {
5042     Int = Intrinsic::round;
5043     return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vrnda");
5044   }
5045   case NEON::BI__builtin_neon_vrndi_v:
5046   case NEON::BI__builtin_neon_vrndiq_v: {
5047     Int = Intrinsic::nearbyint;
5048     return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vrndi");
5049   }
5050   case NEON::BI__builtin_neon_vrndm_v:
5051   case NEON::BI__builtin_neon_vrndmq_v: {
5052     Int = Intrinsic::floor;
5053     return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vrndm");
5054   }
5055   case NEON::BI__builtin_neon_vrndn_v:
5056   case NEON::BI__builtin_neon_vrndnq_v: {
5057     Int = Intrinsic::aarch64_neon_frintn;
5058     return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vrndn");
5059   }
5060   case NEON::BI__builtin_neon_vrndp_v:
5061   case NEON::BI__builtin_neon_vrndpq_v: {
5062     Int = Intrinsic::ceil;
5063     return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vrndp");
5064   }
5065   case NEON::BI__builtin_neon_vrndx_v:
5066   case NEON::BI__builtin_neon_vrndxq_v: {
5067     Int = Intrinsic::rint;
5068     return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vrndx");
5069   }
5070   case NEON::BI__builtin_neon_vrnd_v:
5071   case NEON::BI__builtin_neon_vrndq_v: {
5072     Int = Intrinsic::trunc;
5073     return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vrndz");
5074   }
5075   case NEON::BI__builtin_neon_vceqz_v:
5076   case NEON::BI__builtin_neon_vceqzq_v:
5077     return EmitAArch64CompareBuiltinExpr(Ops[0], Ty, ICmpInst::FCMP_OEQ,
5078                                          ICmpInst::ICMP_EQ, "vceqz");
5079   case NEON::BI__builtin_neon_vcgez_v:
5080   case NEON::BI__builtin_neon_vcgezq_v:
5081     return EmitAArch64CompareBuiltinExpr(Ops[0], Ty, ICmpInst::FCMP_OGE,
5082                                          ICmpInst::ICMP_SGE, "vcgez");
5083   case NEON::BI__builtin_neon_vclez_v:
5084   case NEON::BI__builtin_neon_vclezq_v:
5085     return EmitAArch64CompareBuiltinExpr(Ops[0], Ty, ICmpInst::FCMP_OLE,
5086                                          ICmpInst::ICMP_SLE, "vclez");
5087   case NEON::BI__builtin_neon_vcgtz_v:
5088   case NEON::BI__builtin_neon_vcgtzq_v:
5089     return EmitAArch64CompareBuiltinExpr(Ops[0], Ty, ICmpInst::FCMP_OGT,
5090                                          ICmpInst::ICMP_SGT, "vcgtz");
5091   case NEON::BI__builtin_neon_vcltz_v:
5092   case NEON::BI__builtin_neon_vcltzq_v:
5093     return EmitAArch64CompareBuiltinExpr(Ops[0], Ty, ICmpInst::FCMP_OLT,
5094                                          ICmpInst::ICMP_SLT, "vcltz");
5095   case NEON::BI__builtin_neon_vcvt_f64_v:
5096   case NEON::BI__builtin_neon_vcvtq_f64_v:
5097     Ops[0] = Builder.CreateBitCast(Ops[0], Ty);
5098     Ty = GetNeonType(this, NeonTypeFlags(NeonTypeFlags::Float64, false, quad));
5099     return usgn ? Builder.CreateUIToFP(Ops[0], Ty, "vcvt")
5100                 : Builder.CreateSIToFP(Ops[0], Ty, "vcvt");
5101   case NEON::BI__builtin_neon_vcvt_f64_f32: {
5102     assert(Type.getEltType() == NeonTypeFlags::Float64 && quad &&
5103            "unexpected vcvt_f64_f32 builtin");
5104     NeonTypeFlags SrcFlag = NeonTypeFlags(NeonTypeFlags::Float32, false, false);
5105     Ops[0] = Builder.CreateBitCast(Ops[0], GetNeonType(this, SrcFlag));
5106 
5107     return Builder.CreateFPExt(Ops[0], Ty, "vcvt");
5108   }
5109   case NEON::BI__builtin_neon_vcvt_f32_f64: {
5110     assert(Type.getEltType() == NeonTypeFlags::Float32 &&
5111            "unexpected vcvt_f32_f64 builtin");
5112     NeonTypeFlags SrcFlag = NeonTypeFlags(NeonTypeFlags::Float64, false, true);
5113     Ops[0] = Builder.CreateBitCast(Ops[0], GetNeonType(this, SrcFlag));
5114 
5115     return Builder.CreateFPTrunc(Ops[0], Ty, "vcvt");
5116   }
5117   case NEON::BI__builtin_neon_vcvt_s32_v:
5118   case NEON::BI__builtin_neon_vcvt_u32_v:
5119   case NEON::BI__builtin_neon_vcvt_s64_v:
5120   case NEON::BI__builtin_neon_vcvt_u64_v:
5121   case NEON::BI__builtin_neon_vcvtq_s32_v:
5122   case NEON::BI__builtin_neon_vcvtq_u32_v:
5123   case NEON::BI__builtin_neon_vcvtq_s64_v:
5124   case NEON::BI__builtin_neon_vcvtq_u64_v: {
5125     Ops[0] = Builder.CreateBitCast(Ops[0], GetFloatNeonType(this, Type));
5126     if (usgn)
5127       return Builder.CreateFPToUI(Ops[0], Ty);
5128     return Builder.CreateFPToSI(Ops[0], Ty);
5129   }
5130   case NEON::BI__builtin_neon_vcvta_s32_v:
5131   case NEON::BI__builtin_neon_vcvtaq_s32_v:
5132   case NEON::BI__builtin_neon_vcvta_u32_v:
5133   case NEON::BI__builtin_neon_vcvtaq_u32_v:
5134   case NEON::BI__builtin_neon_vcvta_s64_v:
5135   case NEON::BI__builtin_neon_vcvtaq_s64_v:
5136   case NEON::BI__builtin_neon_vcvta_u64_v:
5137   case NEON::BI__builtin_neon_vcvtaq_u64_v: {
5138     Int = usgn ? Intrinsic::aarch64_neon_fcvtau : Intrinsic::aarch64_neon_fcvtas;
5139     llvm::Type *Tys[2] = { Ty, GetFloatNeonType(this, Type) };
5140     return EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vcvta");
5141   }
5142   case NEON::BI__builtin_neon_vcvtm_s32_v:
5143   case NEON::BI__builtin_neon_vcvtmq_s32_v:
5144   case NEON::BI__builtin_neon_vcvtm_u32_v:
5145   case NEON::BI__builtin_neon_vcvtmq_u32_v:
5146   case NEON::BI__builtin_neon_vcvtm_s64_v:
5147   case NEON::BI__builtin_neon_vcvtmq_s64_v:
5148   case NEON::BI__builtin_neon_vcvtm_u64_v:
5149   case NEON::BI__builtin_neon_vcvtmq_u64_v: {
5150     Int = usgn ? Intrinsic::aarch64_neon_fcvtmu : Intrinsic::aarch64_neon_fcvtms;
5151     llvm::Type *Tys[2] = { Ty, GetFloatNeonType(this, Type) };
5152     return EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vcvtm");
5153   }
5154   case NEON::BI__builtin_neon_vcvtn_s32_v:
5155   case NEON::BI__builtin_neon_vcvtnq_s32_v:
5156   case NEON::BI__builtin_neon_vcvtn_u32_v:
5157   case NEON::BI__builtin_neon_vcvtnq_u32_v:
5158   case NEON::BI__builtin_neon_vcvtn_s64_v:
5159   case NEON::BI__builtin_neon_vcvtnq_s64_v:
5160   case NEON::BI__builtin_neon_vcvtn_u64_v:
5161   case NEON::BI__builtin_neon_vcvtnq_u64_v: {
5162     Int = usgn ? Intrinsic::aarch64_neon_fcvtnu : Intrinsic::aarch64_neon_fcvtns;
5163     llvm::Type *Tys[2] = { Ty, GetFloatNeonType(this, Type) };
5164     return EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vcvtn");
5165   }
5166   case NEON::BI__builtin_neon_vcvtp_s32_v:
5167   case NEON::BI__builtin_neon_vcvtpq_s32_v:
5168   case NEON::BI__builtin_neon_vcvtp_u32_v:
5169   case NEON::BI__builtin_neon_vcvtpq_u32_v:
5170   case NEON::BI__builtin_neon_vcvtp_s64_v:
5171   case NEON::BI__builtin_neon_vcvtpq_s64_v:
5172   case NEON::BI__builtin_neon_vcvtp_u64_v:
5173   case NEON::BI__builtin_neon_vcvtpq_u64_v: {
5174     Int = usgn ? Intrinsic::aarch64_neon_fcvtpu : Intrinsic::aarch64_neon_fcvtps;
5175     llvm::Type *Tys[2] = { Ty, GetFloatNeonType(this, Type) };
5176     return EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vcvtp");
5177   }
5178   case NEON::BI__builtin_neon_vmulx_v:
5179   case NEON::BI__builtin_neon_vmulxq_v: {
5180     Int = Intrinsic::aarch64_neon_fmulx;
5181     return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vmulx");
5182   }
5183   case NEON::BI__builtin_neon_vmul_lane_v:
5184   case NEON::BI__builtin_neon_vmul_laneq_v: {
5185     // v1f64 vmul_lane should be mapped to Neon scalar mul lane
5186     bool Quad = false;
5187     if (BuiltinID == NEON::BI__builtin_neon_vmul_laneq_v)
5188       Quad = true;
5189     Ops[0] = Builder.CreateBitCast(Ops[0], DoubleTy);
5190     llvm::Type *VTy = GetNeonType(this,
5191       NeonTypeFlags(NeonTypeFlags::Float64, false, Quad));
5192     Ops[1] = Builder.CreateBitCast(Ops[1], VTy);
5193     Ops[1] = Builder.CreateExtractElement(Ops[1], Ops[2], "extract");
5194     Value *Result = Builder.CreateFMul(Ops[0], Ops[1]);
5195     return Builder.CreateBitCast(Result, Ty);
5196   }
5197   case NEON::BI__builtin_neon_vnegd_s64:
5198     return Builder.CreateNeg(EmitScalarExpr(E->getArg(0)), "vnegd");
5199   case NEON::BI__builtin_neon_vpmaxnm_v:
5200   case NEON::BI__builtin_neon_vpmaxnmq_v: {
5201     Int = Intrinsic::aarch64_neon_fmaxnmp;
5202     return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vpmaxnm");
5203   }
5204   case NEON::BI__builtin_neon_vpminnm_v:
5205   case NEON::BI__builtin_neon_vpminnmq_v: {
5206     Int = Intrinsic::aarch64_neon_fminnmp;
5207     return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vpminnm");
5208   }
5209   case NEON::BI__builtin_neon_vsqrt_v:
5210   case NEON::BI__builtin_neon_vsqrtq_v: {
5211     Int = Intrinsic::sqrt;
5212     Ops[0] = Builder.CreateBitCast(Ops[0], Ty);
5213     return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vsqrt");
5214   }
5215   case NEON::BI__builtin_neon_vrbit_v:
5216   case NEON::BI__builtin_neon_vrbitq_v: {
5217     Int = Intrinsic::aarch64_neon_rbit;
5218     return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vrbit");
5219   }
5220   case NEON::BI__builtin_neon_vaddv_u8:
5221     // FIXME: These are handled by the AArch64 scalar code.
5222     usgn = true;
5223     // FALLTHROUGH
5224   case NEON::BI__builtin_neon_vaddv_s8: {
5225     Int = usgn ? Intrinsic::aarch64_neon_uaddv : Intrinsic::aarch64_neon_saddv;
5226     Ty = Int32Ty;
5227     VTy = llvm::VectorType::get(Int8Ty, 8);
5228     llvm::Type *Tys[2] = { Ty, VTy };
5229     Ops.push_back(EmitScalarExpr(E->getArg(0)));
5230     Ops[0] = EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vaddv");
5231     return Builder.CreateTrunc(Ops[0], Int8Ty);
5232   }
5233   case NEON::BI__builtin_neon_vaddv_u16:
5234     usgn = true;
5235     // FALLTHROUGH
5236   case NEON::BI__builtin_neon_vaddv_s16: {
5237     Int = usgn ? Intrinsic::aarch64_neon_uaddv : Intrinsic::aarch64_neon_saddv;
5238     Ty = Int32Ty;
5239     VTy = llvm::VectorType::get(Int16Ty, 4);
5240     llvm::Type *Tys[2] = { Ty, VTy };
5241     Ops.push_back(EmitScalarExpr(E->getArg(0)));
5242     Ops[0] = EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vaddv");
5243     return Builder.CreateTrunc(Ops[0], Int16Ty);
5244   }
5245   case NEON::BI__builtin_neon_vaddvq_u8:
5246     usgn = true;
5247     // FALLTHROUGH
5248   case NEON::BI__builtin_neon_vaddvq_s8: {
5249     Int = usgn ? Intrinsic::aarch64_neon_uaddv : Intrinsic::aarch64_neon_saddv;
5250     Ty = Int32Ty;
5251     VTy = llvm::VectorType::get(Int8Ty, 16);
5252     llvm::Type *Tys[2] = { Ty, VTy };
5253     Ops.push_back(EmitScalarExpr(E->getArg(0)));
5254     Ops[0] = EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vaddv");
5255     return Builder.CreateTrunc(Ops[0], Int8Ty);
5256   }
5257   case NEON::BI__builtin_neon_vaddvq_u16:
5258     usgn = true;
5259     // FALLTHROUGH
5260   case NEON::BI__builtin_neon_vaddvq_s16: {
5261     Int = usgn ? Intrinsic::aarch64_neon_uaddv : Intrinsic::aarch64_neon_saddv;
5262     Ty = Int32Ty;
5263     VTy = llvm::VectorType::get(Int16Ty, 8);
5264     llvm::Type *Tys[2] = { Ty, VTy };
5265     Ops.push_back(EmitScalarExpr(E->getArg(0)));
5266     Ops[0] = EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vaddv");
5267     return Builder.CreateTrunc(Ops[0], Int16Ty);
5268   }
5269   case NEON::BI__builtin_neon_vmaxv_u8: {
5270     Int = Intrinsic::aarch64_neon_umaxv;
5271     Ty = Int32Ty;
5272     VTy = llvm::VectorType::get(Int8Ty, 8);
5273     llvm::Type *Tys[2] = { Ty, VTy };
5274     Ops.push_back(EmitScalarExpr(E->getArg(0)));
5275     Ops[0] = EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vmaxv");
5276     return Builder.CreateTrunc(Ops[0], Int8Ty);
5277   }
5278   case NEON::BI__builtin_neon_vmaxv_u16: {
5279     Int = Intrinsic::aarch64_neon_umaxv;
5280     Ty = Int32Ty;
5281     VTy = llvm::VectorType::get(Int16Ty, 4);
5282     llvm::Type *Tys[2] = { Ty, VTy };
5283     Ops.push_back(EmitScalarExpr(E->getArg(0)));
5284     Ops[0] = EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vmaxv");
5285     return Builder.CreateTrunc(Ops[0], Int16Ty);
5286   }
5287   case NEON::BI__builtin_neon_vmaxvq_u8: {
5288     Int = Intrinsic::aarch64_neon_umaxv;
5289     Ty = Int32Ty;
5290     VTy = llvm::VectorType::get(Int8Ty, 16);
5291     llvm::Type *Tys[2] = { Ty, VTy };
5292     Ops.push_back(EmitScalarExpr(E->getArg(0)));
5293     Ops[0] = EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vmaxv");
5294     return Builder.CreateTrunc(Ops[0], Int8Ty);
5295   }
5296   case NEON::BI__builtin_neon_vmaxvq_u16: {
5297     Int = Intrinsic::aarch64_neon_umaxv;
5298     Ty = Int32Ty;
5299     VTy = llvm::VectorType::get(Int16Ty, 8);
5300     llvm::Type *Tys[2] = { Ty, VTy };
5301     Ops.push_back(EmitScalarExpr(E->getArg(0)));
5302     Ops[0] = EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vmaxv");
5303     return Builder.CreateTrunc(Ops[0], Int16Ty);
5304   }
5305   case NEON::BI__builtin_neon_vmaxv_s8: {
5306     Int = Intrinsic::aarch64_neon_smaxv;
5307     Ty = Int32Ty;
5308     VTy = llvm::VectorType::get(Int8Ty, 8);
5309     llvm::Type *Tys[2] = { Ty, VTy };
5310     Ops.push_back(EmitScalarExpr(E->getArg(0)));
5311     Ops[0] = EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vmaxv");
5312     return Builder.CreateTrunc(Ops[0], Int8Ty);
5313   }
5314   case NEON::BI__builtin_neon_vmaxv_s16: {
5315     Int = Intrinsic::aarch64_neon_smaxv;
5316     Ty = Int32Ty;
5317     VTy = llvm::VectorType::get(Int16Ty, 4);
5318     llvm::Type *Tys[2] = { Ty, VTy };
5319     Ops.push_back(EmitScalarExpr(E->getArg(0)));
5320     Ops[0] = EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vmaxv");
5321     return Builder.CreateTrunc(Ops[0], Int16Ty);
5322   }
5323   case NEON::BI__builtin_neon_vmaxvq_s8: {
5324     Int = Intrinsic::aarch64_neon_smaxv;
5325     Ty = Int32Ty;
5326     VTy = llvm::VectorType::get(Int8Ty, 16);
5327     llvm::Type *Tys[2] = { Ty, VTy };
5328     Ops.push_back(EmitScalarExpr(E->getArg(0)));
5329     Ops[0] = EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vmaxv");
5330     return Builder.CreateTrunc(Ops[0], Int8Ty);
5331   }
5332   case NEON::BI__builtin_neon_vmaxvq_s16: {
5333     Int = Intrinsic::aarch64_neon_smaxv;
5334     Ty = Int32Ty;
5335     VTy = llvm::VectorType::get(Int16Ty, 8);
5336     llvm::Type *Tys[2] = { Ty, VTy };
5337     Ops.push_back(EmitScalarExpr(E->getArg(0)));
5338     Ops[0] = EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vmaxv");
5339     return Builder.CreateTrunc(Ops[0], Int16Ty);
5340   }
5341   case NEON::BI__builtin_neon_vminv_u8: {
5342     Int = Intrinsic::aarch64_neon_uminv;
5343     Ty = Int32Ty;
5344     VTy = llvm::VectorType::get(Int8Ty, 8);
5345     llvm::Type *Tys[2] = { Ty, VTy };
5346     Ops.push_back(EmitScalarExpr(E->getArg(0)));
5347     Ops[0] = EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vminv");
5348     return Builder.CreateTrunc(Ops[0], Int8Ty);
5349   }
5350   case NEON::BI__builtin_neon_vminv_u16: {
5351     Int = Intrinsic::aarch64_neon_uminv;
5352     Ty = Int32Ty;
5353     VTy = llvm::VectorType::get(Int16Ty, 4);
5354     llvm::Type *Tys[2] = { Ty, VTy };
5355     Ops.push_back(EmitScalarExpr(E->getArg(0)));
5356     Ops[0] = EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vminv");
5357     return Builder.CreateTrunc(Ops[0], Int16Ty);
5358   }
5359   case NEON::BI__builtin_neon_vminvq_u8: {
5360     Int = Intrinsic::aarch64_neon_uminv;
5361     Ty = Int32Ty;
5362     VTy = llvm::VectorType::get(Int8Ty, 16);
5363     llvm::Type *Tys[2] = { Ty, VTy };
5364     Ops.push_back(EmitScalarExpr(E->getArg(0)));
5365     Ops[0] = EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vminv");
5366     return Builder.CreateTrunc(Ops[0], Int8Ty);
5367   }
5368   case NEON::BI__builtin_neon_vminvq_u16: {
5369     Int = Intrinsic::aarch64_neon_uminv;
5370     Ty = Int32Ty;
5371     VTy = llvm::VectorType::get(Int16Ty, 8);
5372     llvm::Type *Tys[2] = { Ty, VTy };
5373     Ops.push_back(EmitScalarExpr(E->getArg(0)));
5374     Ops[0] = EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vminv");
5375     return Builder.CreateTrunc(Ops[0], Int16Ty);
5376   }
5377   case NEON::BI__builtin_neon_vminv_s8: {
5378     Int = Intrinsic::aarch64_neon_sminv;
5379     Ty = Int32Ty;
5380     VTy = llvm::VectorType::get(Int8Ty, 8);
5381     llvm::Type *Tys[2] = { Ty, VTy };
5382     Ops.push_back(EmitScalarExpr(E->getArg(0)));
5383     Ops[0] = EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vminv");
5384     return Builder.CreateTrunc(Ops[0], Int8Ty);
5385   }
5386   case NEON::BI__builtin_neon_vminv_s16: {
5387     Int = Intrinsic::aarch64_neon_sminv;
5388     Ty = Int32Ty;
5389     VTy = llvm::VectorType::get(Int16Ty, 4);
5390     llvm::Type *Tys[2] = { Ty, VTy };
5391     Ops.push_back(EmitScalarExpr(E->getArg(0)));
5392     Ops[0] = EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vminv");
5393     return Builder.CreateTrunc(Ops[0], Int16Ty);
5394   }
5395   case NEON::BI__builtin_neon_vminvq_s8: {
5396     Int = Intrinsic::aarch64_neon_sminv;
5397     Ty = Int32Ty;
5398     VTy = llvm::VectorType::get(Int8Ty, 16);
5399     llvm::Type *Tys[2] = { Ty, VTy };
5400     Ops.push_back(EmitScalarExpr(E->getArg(0)));
5401     Ops[0] = EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vminv");
5402     return Builder.CreateTrunc(Ops[0], Int8Ty);
5403   }
5404   case NEON::BI__builtin_neon_vminvq_s16: {
5405     Int = Intrinsic::aarch64_neon_sminv;
5406     Ty = Int32Ty;
5407     VTy = llvm::VectorType::get(Int16Ty, 8);
5408     llvm::Type *Tys[2] = { Ty, VTy };
5409     Ops.push_back(EmitScalarExpr(E->getArg(0)));
5410     Ops[0] = EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vminv");
5411     return Builder.CreateTrunc(Ops[0], Int16Ty);
5412   }
5413   case NEON::BI__builtin_neon_vmul_n_f64: {
5414     Ops[0] = Builder.CreateBitCast(Ops[0], DoubleTy);
5415     Value *RHS = Builder.CreateBitCast(EmitScalarExpr(E->getArg(1)), DoubleTy);
5416     return Builder.CreateFMul(Ops[0], RHS);
5417   }
5418   case NEON::BI__builtin_neon_vaddlv_u8: {
5419     Int = Intrinsic::aarch64_neon_uaddlv;
5420     Ty = Int32Ty;
5421     VTy = llvm::VectorType::get(Int8Ty, 8);
5422     llvm::Type *Tys[2] = { Ty, VTy };
5423     Ops.push_back(EmitScalarExpr(E->getArg(0)));
5424     Ops[0] = EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vaddlv");
5425     return Builder.CreateTrunc(Ops[0], Int16Ty);
5426   }
5427   case NEON::BI__builtin_neon_vaddlv_u16: {
5428     Int = Intrinsic::aarch64_neon_uaddlv;
5429     Ty = Int32Ty;
5430     VTy = llvm::VectorType::get(Int16Ty, 4);
5431     llvm::Type *Tys[2] = { Ty, VTy };
5432     Ops.push_back(EmitScalarExpr(E->getArg(0)));
5433     return EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vaddlv");
5434   }
5435   case NEON::BI__builtin_neon_vaddlvq_u8: {
5436     Int = Intrinsic::aarch64_neon_uaddlv;
5437     Ty = Int32Ty;
5438     VTy = llvm::VectorType::get(Int8Ty, 16);
5439     llvm::Type *Tys[2] = { Ty, VTy };
5440     Ops.push_back(EmitScalarExpr(E->getArg(0)));
5441     Ops[0] = EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vaddlv");
5442     return Builder.CreateTrunc(Ops[0], Int16Ty);
5443   }
5444   case NEON::BI__builtin_neon_vaddlvq_u16: {
5445     Int = Intrinsic::aarch64_neon_uaddlv;
5446     Ty = Int32Ty;
5447     VTy = llvm::VectorType::get(Int16Ty, 8);
5448     llvm::Type *Tys[2] = { Ty, VTy };
5449     Ops.push_back(EmitScalarExpr(E->getArg(0)));
5450     return EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vaddlv");
5451   }
5452   case NEON::BI__builtin_neon_vaddlv_s8: {
5453     Int = Intrinsic::aarch64_neon_saddlv;
5454     Ty = Int32Ty;
5455     VTy = llvm::VectorType::get(Int8Ty, 8);
5456     llvm::Type *Tys[2] = { Ty, VTy };
5457     Ops.push_back(EmitScalarExpr(E->getArg(0)));
5458     Ops[0] = EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vaddlv");
5459     return Builder.CreateTrunc(Ops[0], Int16Ty);
5460   }
5461   case NEON::BI__builtin_neon_vaddlv_s16: {
5462     Int = Intrinsic::aarch64_neon_saddlv;
5463     Ty = Int32Ty;
5464     VTy = llvm::VectorType::get(Int16Ty, 4);
5465     llvm::Type *Tys[2] = { Ty, VTy };
5466     Ops.push_back(EmitScalarExpr(E->getArg(0)));
5467     return EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vaddlv");
5468   }
5469   case NEON::BI__builtin_neon_vaddlvq_s8: {
5470     Int = Intrinsic::aarch64_neon_saddlv;
5471     Ty = Int32Ty;
5472     VTy = llvm::VectorType::get(Int8Ty, 16);
5473     llvm::Type *Tys[2] = { Ty, VTy };
5474     Ops.push_back(EmitScalarExpr(E->getArg(0)));
5475     Ops[0] = EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vaddlv");
5476     return Builder.CreateTrunc(Ops[0], Int16Ty);
5477   }
5478   case NEON::BI__builtin_neon_vaddlvq_s16: {
5479     Int = Intrinsic::aarch64_neon_saddlv;
5480     Ty = Int32Ty;
5481     VTy = llvm::VectorType::get(Int16Ty, 8);
5482     llvm::Type *Tys[2] = { Ty, VTy };
5483     Ops.push_back(EmitScalarExpr(E->getArg(0)));
5484     return EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vaddlv");
5485   }
5486   case NEON::BI__builtin_neon_vsri_n_v:
5487   case NEON::BI__builtin_neon_vsriq_n_v: {
5488     Int = Intrinsic::aarch64_neon_vsri;
5489     llvm::Function *Intrin = CGM.getIntrinsic(Int, Ty);
5490     return EmitNeonCall(Intrin, Ops, "vsri_n");
5491   }
5492   case NEON::BI__builtin_neon_vsli_n_v:
5493   case NEON::BI__builtin_neon_vsliq_n_v: {
5494     Int = Intrinsic::aarch64_neon_vsli;
5495     llvm::Function *Intrin = CGM.getIntrinsic(Int, Ty);
5496     return EmitNeonCall(Intrin, Ops, "vsli_n");
5497   }
5498   case NEON::BI__builtin_neon_vsra_n_v:
5499   case NEON::BI__builtin_neon_vsraq_n_v:
5500     Ops[0] = Builder.CreateBitCast(Ops[0], Ty);
5501     Ops[1] = EmitNeonRShiftImm(Ops[1], Ops[2], Ty, usgn, "vsra_n");
5502     return Builder.CreateAdd(Ops[0], Ops[1]);
5503   case NEON::BI__builtin_neon_vrsra_n_v:
5504   case NEON::BI__builtin_neon_vrsraq_n_v: {
5505     Int = usgn ? Intrinsic::aarch64_neon_urshl : Intrinsic::aarch64_neon_srshl;
5506     SmallVector<llvm::Value*,2> TmpOps;
5507     TmpOps.push_back(Ops[1]);
5508     TmpOps.push_back(Ops[2]);
5509     Function* F = CGM.getIntrinsic(Int, Ty);
5510     llvm::Value *tmp = EmitNeonCall(F, TmpOps, "vrshr_n", 1, true);
5511     Ops[0] = Builder.CreateBitCast(Ops[0], VTy);
5512     return Builder.CreateAdd(Ops[0], tmp);
5513   }
5514     // FIXME: Sharing loads & stores with 32-bit is complicated by the absence
5515     // of an Align parameter here.
5516   case NEON::BI__builtin_neon_vld1_x2_v:
5517   case NEON::BI__builtin_neon_vld1q_x2_v:
5518   case NEON::BI__builtin_neon_vld1_x3_v:
5519   case NEON::BI__builtin_neon_vld1q_x3_v:
5520   case NEON::BI__builtin_neon_vld1_x4_v:
5521   case NEON::BI__builtin_neon_vld1q_x4_v: {
5522     llvm::Type *PTy = llvm::PointerType::getUnqual(VTy->getVectorElementType());
5523     Ops[1] = Builder.CreateBitCast(Ops[1], PTy);
5524     llvm::Type *Tys[2] = { VTy, PTy };
5525     unsigned Int;
5526     switch (BuiltinID) {
5527     case NEON::BI__builtin_neon_vld1_x2_v:
5528     case NEON::BI__builtin_neon_vld1q_x2_v:
5529       Int = Intrinsic::aarch64_neon_ld1x2;
5530       break;
5531     case NEON::BI__builtin_neon_vld1_x3_v:
5532     case NEON::BI__builtin_neon_vld1q_x3_v:
5533       Int = Intrinsic::aarch64_neon_ld1x3;
5534       break;
5535     case NEON::BI__builtin_neon_vld1_x4_v:
5536     case NEON::BI__builtin_neon_vld1q_x4_v:
5537       Int = Intrinsic::aarch64_neon_ld1x4;
5538       break;
5539     }
5540     Function *F = CGM.getIntrinsic(Int, Tys);
5541     Ops[1] = Builder.CreateCall(F, Ops[1], "vld1xN");
5542     Ty = llvm::PointerType::getUnqual(Ops[1]->getType());
5543     Ops[0] = Builder.CreateBitCast(Ops[0], Ty);
5544     return Builder.CreateDefaultAlignedStore(Ops[1], Ops[0]);
5545   }
5546   case NEON::BI__builtin_neon_vst1_x2_v:
5547   case NEON::BI__builtin_neon_vst1q_x2_v:
5548   case NEON::BI__builtin_neon_vst1_x3_v:
5549   case NEON::BI__builtin_neon_vst1q_x3_v:
5550   case NEON::BI__builtin_neon_vst1_x4_v:
5551   case NEON::BI__builtin_neon_vst1q_x4_v: {
5552     llvm::Type *PTy = llvm::PointerType::getUnqual(VTy->getVectorElementType());
5553     llvm::Type *Tys[2] = { VTy, PTy };
5554     unsigned Int;
5555     switch (BuiltinID) {
5556     case NEON::BI__builtin_neon_vst1_x2_v:
5557     case NEON::BI__builtin_neon_vst1q_x2_v:
5558       Int = Intrinsic::aarch64_neon_st1x2;
5559       break;
5560     case NEON::BI__builtin_neon_vst1_x3_v:
5561     case NEON::BI__builtin_neon_vst1q_x3_v:
5562       Int = Intrinsic::aarch64_neon_st1x3;
5563       break;
5564     case NEON::BI__builtin_neon_vst1_x4_v:
5565     case NEON::BI__builtin_neon_vst1q_x4_v:
5566       Int = Intrinsic::aarch64_neon_st1x4;
5567       break;
5568     }
5569     std::rotate(Ops.begin(), Ops.begin() + 1, Ops.end());
5570     return EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "");
5571   }
5572   case NEON::BI__builtin_neon_vld1_v:
5573   case NEON::BI__builtin_neon_vld1q_v:
5574     Ops[0] = Builder.CreateBitCast(Ops[0], llvm::PointerType::getUnqual(VTy));
5575     return Builder.CreateDefaultAlignedLoad(Ops[0]);
5576   case NEON::BI__builtin_neon_vst1_v:
5577   case NEON::BI__builtin_neon_vst1q_v:
5578     Ops[0] = Builder.CreateBitCast(Ops[0], llvm::PointerType::getUnqual(VTy));
5579     Ops[1] = Builder.CreateBitCast(Ops[1], VTy);
5580     return Builder.CreateDefaultAlignedStore(Ops[1], Ops[0]);
5581   case NEON::BI__builtin_neon_vld1_lane_v:
5582   case NEON::BI__builtin_neon_vld1q_lane_v:
5583     Ops[1] = Builder.CreateBitCast(Ops[1], Ty);
5584     Ty = llvm::PointerType::getUnqual(VTy->getElementType());
5585     Ops[0] = Builder.CreateBitCast(Ops[0], Ty);
5586     Ops[0] = Builder.CreateDefaultAlignedLoad(Ops[0]);
5587     return Builder.CreateInsertElement(Ops[1], Ops[0], Ops[2], "vld1_lane");
5588   case NEON::BI__builtin_neon_vld1_dup_v:
5589   case NEON::BI__builtin_neon_vld1q_dup_v: {
5590     Value *V = UndefValue::get(Ty);
5591     Ty = llvm::PointerType::getUnqual(VTy->getElementType());
5592     Ops[0] = Builder.CreateBitCast(Ops[0], Ty);
5593     Ops[0] = Builder.CreateDefaultAlignedLoad(Ops[0]);
5594     llvm::Constant *CI = ConstantInt::get(Int32Ty, 0);
5595     Ops[0] = Builder.CreateInsertElement(V, Ops[0], CI);
5596     return EmitNeonSplat(Ops[0], CI);
5597   }
5598   case NEON::BI__builtin_neon_vst1_lane_v:
5599   case NEON::BI__builtin_neon_vst1q_lane_v:
5600     Ops[1] = Builder.CreateBitCast(Ops[1], Ty);
5601     Ops[1] = Builder.CreateExtractElement(Ops[1], Ops[2]);
5602     Ty = llvm::PointerType::getUnqual(Ops[1]->getType());
5603     return Builder.CreateDefaultAlignedStore(Ops[1],
5604                                              Builder.CreateBitCast(Ops[0], Ty));
5605   case NEON::BI__builtin_neon_vld2_v:
5606   case NEON::BI__builtin_neon_vld2q_v: {
5607     llvm::Type *PTy = llvm::PointerType::getUnqual(VTy);
5608     Ops[1] = Builder.CreateBitCast(Ops[1], PTy);
5609     llvm::Type *Tys[2] = { VTy, PTy };
5610     Function *F = CGM.getIntrinsic(Intrinsic::aarch64_neon_ld2, Tys);
5611     Ops[1] = Builder.CreateCall(F, Ops[1], "vld2");
5612     Ops[0] = Builder.CreateBitCast(Ops[0],
5613                 llvm::PointerType::getUnqual(Ops[1]->getType()));
5614     return Builder.CreateDefaultAlignedStore(Ops[1], Ops[0]);
5615   }
5616   case NEON::BI__builtin_neon_vld3_v:
5617   case NEON::BI__builtin_neon_vld3q_v: {
5618     llvm::Type *PTy = llvm::PointerType::getUnqual(VTy);
5619     Ops[1] = Builder.CreateBitCast(Ops[1], PTy);
5620     llvm::Type *Tys[2] = { VTy, PTy };
5621     Function *F = CGM.getIntrinsic(Intrinsic::aarch64_neon_ld3, Tys);
5622     Ops[1] = Builder.CreateCall(F, Ops[1], "vld3");
5623     Ops[0] = Builder.CreateBitCast(Ops[0],
5624                 llvm::PointerType::getUnqual(Ops[1]->getType()));
5625     return Builder.CreateDefaultAlignedStore(Ops[1], Ops[0]);
5626   }
5627   case NEON::BI__builtin_neon_vld4_v:
5628   case NEON::BI__builtin_neon_vld4q_v: {
5629     llvm::Type *PTy = llvm::PointerType::getUnqual(VTy);
5630     Ops[1] = Builder.CreateBitCast(Ops[1], PTy);
5631     llvm::Type *Tys[2] = { VTy, PTy };
5632     Function *F = CGM.getIntrinsic(Intrinsic::aarch64_neon_ld4, Tys);
5633     Ops[1] = Builder.CreateCall(F, Ops[1], "vld4");
5634     Ops[0] = Builder.CreateBitCast(Ops[0],
5635                 llvm::PointerType::getUnqual(Ops[1]->getType()));
5636     return Builder.CreateDefaultAlignedStore(Ops[1], Ops[0]);
5637   }
5638   case NEON::BI__builtin_neon_vld2_dup_v:
5639   case NEON::BI__builtin_neon_vld2q_dup_v: {
5640     llvm::Type *PTy =
5641       llvm::PointerType::getUnqual(VTy->getElementType());
5642     Ops[1] = Builder.CreateBitCast(Ops[1], PTy);
5643     llvm::Type *Tys[2] = { VTy, PTy };
5644     Function *F = CGM.getIntrinsic(Intrinsic::aarch64_neon_ld2r, Tys);
5645     Ops[1] = Builder.CreateCall(F, Ops[1], "vld2");
5646     Ops[0] = Builder.CreateBitCast(Ops[0],
5647                 llvm::PointerType::getUnqual(Ops[1]->getType()));
5648     return Builder.CreateDefaultAlignedStore(Ops[1], Ops[0]);
5649   }
5650   case NEON::BI__builtin_neon_vld3_dup_v:
5651   case NEON::BI__builtin_neon_vld3q_dup_v: {
5652     llvm::Type *PTy =
5653       llvm::PointerType::getUnqual(VTy->getElementType());
5654     Ops[1] = Builder.CreateBitCast(Ops[1], PTy);
5655     llvm::Type *Tys[2] = { VTy, PTy };
5656     Function *F = CGM.getIntrinsic(Intrinsic::aarch64_neon_ld3r, Tys);
5657     Ops[1] = Builder.CreateCall(F, Ops[1], "vld3");
5658     Ops[0] = Builder.CreateBitCast(Ops[0],
5659                 llvm::PointerType::getUnqual(Ops[1]->getType()));
5660     return Builder.CreateDefaultAlignedStore(Ops[1], Ops[0]);
5661   }
5662   case NEON::BI__builtin_neon_vld4_dup_v:
5663   case NEON::BI__builtin_neon_vld4q_dup_v: {
5664     llvm::Type *PTy =
5665       llvm::PointerType::getUnqual(VTy->getElementType());
5666     Ops[1] = Builder.CreateBitCast(Ops[1], PTy);
5667     llvm::Type *Tys[2] = { VTy, PTy };
5668     Function *F = CGM.getIntrinsic(Intrinsic::aarch64_neon_ld4r, Tys);
5669     Ops[1] = Builder.CreateCall(F, Ops[1], "vld4");
5670     Ops[0] = Builder.CreateBitCast(Ops[0],
5671                 llvm::PointerType::getUnqual(Ops[1]->getType()));
5672     return Builder.CreateDefaultAlignedStore(Ops[1], Ops[0]);
5673   }
5674   case NEON::BI__builtin_neon_vld2_lane_v:
5675   case NEON::BI__builtin_neon_vld2q_lane_v: {
5676     llvm::Type *Tys[2] = { VTy, Ops[1]->getType() };
5677     Function *F = CGM.getIntrinsic(Intrinsic::aarch64_neon_ld2lane, Tys);
5678     Ops.push_back(Ops[1]);
5679     Ops.erase(Ops.begin()+1);
5680     Ops[1] = Builder.CreateBitCast(Ops[1], Ty);
5681     Ops[2] = Builder.CreateBitCast(Ops[2], Ty);
5682     Ops[3] = Builder.CreateZExt(Ops[3], Int64Ty);
5683     Ops[1] = Builder.CreateCall(F, makeArrayRef(Ops).slice(1), "vld2_lane");
5684     Ty = llvm::PointerType::getUnqual(Ops[1]->getType());
5685     Ops[0] = Builder.CreateBitCast(Ops[0], Ty);
5686     return Builder.CreateDefaultAlignedStore(Ops[1], Ops[0]);
5687   }
5688   case NEON::BI__builtin_neon_vld3_lane_v:
5689   case NEON::BI__builtin_neon_vld3q_lane_v: {
5690     llvm::Type *Tys[2] = { VTy, Ops[1]->getType() };
5691     Function *F = CGM.getIntrinsic(Intrinsic::aarch64_neon_ld3lane, Tys);
5692     Ops.push_back(Ops[1]);
5693     Ops.erase(Ops.begin()+1);
5694     Ops[1] = Builder.CreateBitCast(Ops[1], Ty);
5695     Ops[2] = Builder.CreateBitCast(Ops[2], Ty);
5696     Ops[3] = Builder.CreateBitCast(Ops[3], Ty);
5697     Ops[4] = Builder.CreateZExt(Ops[4], Int64Ty);
5698     Ops[1] = Builder.CreateCall(F, makeArrayRef(Ops).slice(1), "vld3_lane");
5699     Ty = llvm::PointerType::getUnqual(Ops[1]->getType());
5700     Ops[0] = Builder.CreateBitCast(Ops[0], Ty);
5701     return Builder.CreateDefaultAlignedStore(Ops[1], Ops[0]);
5702   }
5703   case NEON::BI__builtin_neon_vld4_lane_v:
5704   case NEON::BI__builtin_neon_vld4q_lane_v: {
5705     llvm::Type *Tys[2] = { VTy, Ops[1]->getType() };
5706     Function *F = CGM.getIntrinsic(Intrinsic::aarch64_neon_ld4lane, Tys);
5707     Ops.push_back(Ops[1]);
5708     Ops.erase(Ops.begin()+1);
5709     Ops[1] = Builder.CreateBitCast(Ops[1], Ty);
5710     Ops[2] = Builder.CreateBitCast(Ops[2], Ty);
5711     Ops[3] = Builder.CreateBitCast(Ops[3], Ty);
5712     Ops[4] = Builder.CreateBitCast(Ops[4], Ty);
5713     Ops[5] = Builder.CreateZExt(Ops[5], Int64Ty);
5714     Ops[1] = Builder.CreateCall(F, makeArrayRef(Ops).slice(1), "vld4_lane");
5715     Ty = llvm::PointerType::getUnqual(Ops[1]->getType());
5716     Ops[0] = Builder.CreateBitCast(Ops[0], Ty);
5717     return Builder.CreateDefaultAlignedStore(Ops[1], Ops[0]);
5718   }
5719   case NEON::BI__builtin_neon_vst2_v:
5720   case NEON::BI__builtin_neon_vst2q_v: {
5721     Ops.push_back(Ops[0]);
5722     Ops.erase(Ops.begin());
5723     llvm::Type *Tys[2] = { VTy, Ops[2]->getType() };
5724     return EmitNeonCall(CGM.getIntrinsic(Intrinsic::aarch64_neon_st2, Tys),
5725                         Ops, "");
5726   }
5727   case NEON::BI__builtin_neon_vst2_lane_v:
5728   case NEON::BI__builtin_neon_vst2q_lane_v: {
5729     Ops.push_back(Ops[0]);
5730     Ops.erase(Ops.begin());
5731     Ops[2] = Builder.CreateZExt(Ops[2], Int64Ty);
5732     llvm::Type *Tys[2] = { VTy, Ops[3]->getType() };
5733     return EmitNeonCall(CGM.getIntrinsic(Intrinsic::aarch64_neon_st2lane, Tys),
5734                         Ops, "");
5735   }
5736   case NEON::BI__builtin_neon_vst3_v:
5737   case NEON::BI__builtin_neon_vst3q_v: {
5738     Ops.push_back(Ops[0]);
5739     Ops.erase(Ops.begin());
5740     llvm::Type *Tys[2] = { VTy, Ops[3]->getType() };
5741     return EmitNeonCall(CGM.getIntrinsic(Intrinsic::aarch64_neon_st3, Tys),
5742                         Ops, "");
5743   }
5744   case NEON::BI__builtin_neon_vst3_lane_v:
5745   case NEON::BI__builtin_neon_vst3q_lane_v: {
5746     Ops.push_back(Ops[0]);
5747     Ops.erase(Ops.begin());
5748     Ops[3] = Builder.CreateZExt(Ops[3], Int64Ty);
5749     llvm::Type *Tys[2] = { VTy, Ops[4]->getType() };
5750     return EmitNeonCall(CGM.getIntrinsic(Intrinsic::aarch64_neon_st3lane, Tys),
5751                         Ops, "");
5752   }
5753   case NEON::BI__builtin_neon_vst4_v:
5754   case NEON::BI__builtin_neon_vst4q_v: {
5755     Ops.push_back(Ops[0]);
5756     Ops.erase(Ops.begin());
5757     llvm::Type *Tys[2] = { VTy, Ops[4]->getType() };
5758     return EmitNeonCall(CGM.getIntrinsic(Intrinsic::aarch64_neon_st4, Tys),
5759                         Ops, "");
5760   }
5761   case NEON::BI__builtin_neon_vst4_lane_v:
5762   case NEON::BI__builtin_neon_vst4q_lane_v: {
5763     Ops.push_back(Ops[0]);
5764     Ops.erase(Ops.begin());
5765     Ops[4] = Builder.CreateZExt(Ops[4], Int64Ty);
5766     llvm::Type *Tys[2] = { VTy, Ops[5]->getType() };
5767     return EmitNeonCall(CGM.getIntrinsic(Intrinsic::aarch64_neon_st4lane, Tys),
5768                         Ops, "");
5769   }
5770   case NEON::BI__builtin_neon_vtrn_v:
5771   case NEON::BI__builtin_neon_vtrnq_v: {
5772     Ops[0] = Builder.CreateBitCast(Ops[0], llvm::PointerType::getUnqual(Ty));
5773     Ops[1] = Builder.CreateBitCast(Ops[1], Ty);
5774     Ops[2] = Builder.CreateBitCast(Ops[2], Ty);
5775     Value *SV = nullptr;
5776 
5777     for (unsigned vi = 0; vi != 2; ++vi) {
5778       SmallVector<Constant*, 16> Indices;
5779       for (unsigned i = 0, e = VTy->getNumElements(); i != e; i += 2) {
5780         Indices.push_back(ConstantInt::get(Int32Ty, i+vi));
5781         Indices.push_back(ConstantInt::get(Int32Ty, i+e+vi));
5782       }
5783       Value *Addr = Builder.CreateConstInBoundsGEP1_32(Ty, Ops[0], vi);
5784       SV = llvm::ConstantVector::get(Indices);
5785       SV = Builder.CreateShuffleVector(Ops[1], Ops[2], SV, "vtrn");
5786       SV = Builder.CreateDefaultAlignedStore(SV, Addr);
5787     }
5788     return SV;
5789   }
5790   case NEON::BI__builtin_neon_vuzp_v:
5791   case NEON::BI__builtin_neon_vuzpq_v: {
5792     Ops[0] = Builder.CreateBitCast(Ops[0], llvm::PointerType::getUnqual(Ty));
5793     Ops[1] = Builder.CreateBitCast(Ops[1], Ty);
5794     Ops[2] = Builder.CreateBitCast(Ops[2], Ty);
5795     Value *SV = nullptr;
5796 
5797     for (unsigned vi = 0; vi != 2; ++vi) {
5798       SmallVector<Constant*, 16> Indices;
5799       for (unsigned i = 0, e = VTy->getNumElements(); i != e; ++i)
5800         Indices.push_back(ConstantInt::get(Int32Ty, 2*i+vi));
5801 
5802       Value *Addr = Builder.CreateConstInBoundsGEP1_32(Ty, Ops[0], vi);
5803       SV = llvm::ConstantVector::get(Indices);
5804       SV = Builder.CreateShuffleVector(Ops[1], Ops[2], SV, "vuzp");
5805       SV = Builder.CreateDefaultAlignedStore(SV, Addr);
5806     }
5807     return SV;
5808   }
5809   case NEON::BI__builtin_neon_vzip_v:
5810   case NEON::BI__builtin_neon_vzipq_v: {
5811     Ops[0] = Builder.CreateBitCast(Ops[0], llvm::PointerType::getUnqual(Ty));
5812     Ops[1] = Builder.CreateBitCast(Ops[1], Ty);
5813     Ops[2] = Builder.CreateBitCast(Ops[2], Ty);
5814     Value *SV = nullptr;
5815 
5816     for (unsigned vi = 0; vi != 2; ++vi) {
5817       SmallVector<Constant*, 16> Indices;
5818       for (unsigned i = 0, e = VTy->getNumElements(); i != e; i += 2) {
5819         Indices.push_back(ConstantInt::get(Int32Ty, (i + vi*e) >> 1));
5820         Indices.push_back(ConstantInt::get(Int32Ty, ((i + vi*e) >> 1)+e));
5821       }
5822       Value *Addr = Builder.CreateConstInBoundsGEP1_32(Ty, Ops[0], vi);
5823       SV = llvm::ConstantVector::get(Indices);
5824       SV = Builder.CreateShuffleVector(Ops[1], Ops[2], SV, "vzip");
5825       SV = Builder.CreateDefaultAlignedStore(SV, Addr);
5826     }
5827     return SV;
5828   }
5829   case NEON::BI__builtin_neon_vqtbl1q_v: {
5830     return EmitNeonCall(CGM.getIntrinsic(Intrinsic::aarch64_neon_tbl1, Ty),
5831                         Ops, "vtbl1");
5832   }
5833   case NEON::BI__builtin_neon_vqtbl2q_v: {
5834     return EmitNeonCall(CGM.getIntrinsic(Intrinsic::aarch64_neon_tbl2, Ty),
5835                         Ops, "vtbl2");
5836   }
5837   case NEON::BI__builtin_neon_vqtbl3q_v: {
5838     return EmitNeonCall(CGM.getIntrinsic(Intrinsic::aarch64_neon_tbl3, Ty),
5839                         Ops, "vtbl3");
5840   }
5841   case NEON::BI__builtin_neon_vqtbl4q_v: {
5842     return EmitNeonCall(CGM.getIntrinsic(Intrinsic::aarch64_neon_tbl4, Ty),
5843                         Ops, "vtbl4");
5844   }
5845   case NEON::BI__builtin_neon_vqtbx1q_v: {
5846     return EmitNeonCall(CGM.getIntrinsic(Intrinsic::aarch64_neon_tbx1, Ty),
5847                         Ops, "vtbx1");
5848   }
5849   case NEON::BI__builtin_neon_vqtbx2q_v: {
5850     return EmitNeonCall(CGM.getIntrinsic(Intrinsic::aarch64_neon_tbx2, Ty),
5851                         Ops, "vtbx2");
5852   }
5853   case NEON::BI__builtin_neon_vqtbx3q_v: {
5854     return EmitNeonCall(CGM.getIntrinsic(Intrinsic::aarch64_neon_tbx3, Ty),
5855                         Ops, "vtbx3");
5856   }
5857   case NEON::BI__builtin_neon_vqtbx4q_v: {
5858     return EmitNeonCall(CGM.getIntrinsic(Intrinsic::aarch64_neon_tbx4, Ty),
5859                         Ops, "vtbx4");
5860   }
5861   case NEON::BI__builtin_neon_vsqadd_v:
5862   case NEON::BI__builtin_neon_vsqaddq_v: {
5863     Int = Intrinsic::aarch64_neon_usqadd;
5864     return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vsqadd");
5865   }
5866   case NEON::BI__builtin_neon_vuqadd_v:
5867   case NEON::BI__builtin_neon_vuqaddq_v: {
5868     Int = Intrinsic::aarch64_neon_suqadd;
5869     return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vuqadd");
5870   }
5871   }
5872 }
5873 
5874 llvm::Value *CodeGenFunction::
5875 BuildVector(ArrayRef<llvm::Value*> Ops) {
5876   assert((Ops.size() & (Ops.size() - 1)) == 0 &&
5877          "Not a power-of-two sized vector!");
5878   bool AllConstants = true;
5879   for (unsigned i = 0, e = Ops.size(); i != e && AllConstants; ++i)
5880     AllConstants &= isa<Constant>(Ops[i]);
5881 
5882   // If this is a constant vector, create a ConstantVector.
5883   if (AllConstants) {
5884     SmallVector<llvm::Constant*, 16> CstOps;
5885     for (unsigned i = 0, e = Ops.size(); i != e; ++i)
5886       CstOps.push_back(cast<Constant>(Ops[i]));
5887     return llvm::ConstantVector::get(CstOps);
5888   }
5889 
5890   // Otherwise, insertelement the values to build the vector.
5891   Value *Result =
5892     llvm::UndefValue::get(llvm::VectorType::get(Ops[0]->getType(), Ops.size()));
5893 
5894   for (unsigned i = 0, e = Ops.size(); i != e; ++i)
5895     Result = Builder.CreateInsertElement(Result, Ops[i], Builder.getInt32(i));
5896 
5897   return Result;
5898 }
5899 
5900 Value *CodeGenFunction::EmitX86BuiltinExpr(unsigned BuiltinID,
5901                                            const CallExpr *E) {
5902   if (BuiltinID == X86::BI__builtin_ms_va_start ||
5903       BuiltinID == X86::BI__builtin_ms_va_end)
5904     return EmitVAStartEnd(EmitMSVAListRef(E->getArg(0)).getPointer(),
5905                           BuiltinID == X86::BI__builtin_ms_va_start);
5906   if (BuiltinID == X86::BI__builtin_ms_va_copy) {
5907     // Lower this manually. We can't reliably determine whether or not any
5908     // given va_copy() is for a Win64 va_list from the calling convention
5909     // alone, because it's legal to do this from a System V ABI function.
5910     // With opaque pointer types, we won't have enough information in LLVM
5911     // IR to determine this from the argument types, either. Best to do it
5912     // now, while we have enough information.
5913     Address DestAddr = EmitMSVAListRef(E->getArg(0));
5914     Address SrcAddr = EmitMSVAListRef(E->getArg(1));
5915 
5916     llvm::Type *BPP = Int8PtrPtrTy;
5917 
5918     DestAddr = Address(Builder.CreateBitCast(DestAddr.getPointer(), BPP, "cp"),
5919                        DestAddr.getAlignment());
5920     SrcAddr = Address(Builder.CreateBitCast(SrcAddr.getPointer(), BPP, "ap"),
5921                       SrcAddr.getAlignment());
5922 
5923     Value *ArgPtr = Builder.CreateLoad(SrcAddr, "ap.val");
5924     return Builder.CreateStore(ArgPtr, DestAddr);
5925   }
5926 
5927   SmallVector<Value*, 4> Ops;
5928 
5929   // Find out if any arguments are required to be integer constant expressions.
5930   unsigned ICEArguments = 0;
5931   ASTContext::GetBuiltinTypeError Error;
5932   getContext().GetBuiltinType(BuiltinID, Error, &ICEArguments);
5933   assert(Error == ASTContext::GE_None && "Should not codegen an error");
5934 
5935   for (unsigned i = 0, e = E->getNumArgs(); i != e; i++) {
5936     // If this is a normal argument, just emit it as a scalar.
5937     if ((ICEArguments & (1 << i)) == 0) {
5938       Ops.push_back(EmitScalarExpr(E->getArg(i)));
5939       continue;
5940     }
5941 
5942     // If this is required to be a constant, constant fold it so that we know
5943     // that the generated intrinsic gets a ConstantInt.
5944     llvm::APSInt Result;
5945     bool IsConst = E->getArg(i)->isIntegerConstantExpr(Result, getContext());
5946     assert(IsConst && "Constant arg isn't actually constant?"); (void)IsConst;
5947     Ops.push_back(llvm::ConstantInt::get(getLLVMContext(), Result));
5948   }
5949 
5950   switch (BuiltinID) {
5951   default: return nullptr;
5952   case X86::BI__builtin_cpu_supports: {
5953     const Expr *FeatureExpr = E->getArg(0)->IgnoreParenCasts();
5954     StringRef FeatureStr = cast<StringLiteral>(FeatureExpr)->getString();
5955 
5956     // TODO: When/if this becomes more than x86 specific then use a TargetInfo
5957     // based mapping.
5958     // Processor features and mapping to processor feature value.
5959     enum X86Features {
5960       CMOV = 0,
5961       MMX,
5962       POPCNT,
5963       SSE,
5964       SSE2,
5965       SSE3,
5966       SSSE3,
5967       SSE4_1,
5968       SSE4_2,
5969       AVX,
5970       AVX2,
5971       SSE4_A,
5972       FMA4,
5973       XOP,
5974       FMA,
5975       AVX512F,
5976       BMI,
5977       BMI2,
5978       MAX
5979     };
5980 
5981     X86Features Feature = StringSwitch<X86Features>(FeatureStr)
5982                               .Case("cmov", X86Features::CMOV)
5983                               .Case("mmx", X86Features::MMX)
5984                               .Case("popcnt", X86Features::POPCNT)
5985                               .Case("sse", X86Features::SSE)
5986                               .Case("sse2", X86Features::SSE2)
5987                               .Case("sse3", X86Features::SSE3)
5988                               .Case("sse4.1", X86Features::SSE4_1)
5989                               .Case("sse4.2", X86Features::SSE4_2)
5990                               .Case("avx", X86Features::AVX)
5991                               .Case("avx2", X86Features::AVX2)
5992                               .Case("sse4a", X86Features::SSE4_A)
5993                               .Case("fma4", X86Features::FMA4)
5994                               .Case("xop", X86Features::XOP)
5995                               .Case("fma", X86Features::FMA)
5996                               .Case("avx512f", X86Features::AVX512F)
5997                               .Case("bmi", X86Features::BMI)
5998                               .Case("bmi2", X86Features::BMI2)
5999                               .Default(X86Features::MAX);
6000     assert(Feature != X86Features::MAX && "Invalid feature!");
6001 
6002     // Matching the struct layout from the compiler-rt/libgcc structure that is
6003     // filled in:
6004     // unsigned int __cpu_vendor;
6005     // unsigned int __cpu_type;
6006     // unsigned int __cpu_subtype;
6007     // unsigned int __cpu_features[1];
6008     llvm::Type *STy = llvm::StructType::get(
6009         Int32Ty, Int32Ty, Int32Ty, llvm::ArrayType::get(Int32Ty, 1), nullptr);
6010 
6011     // Grab the global __cpu_model.
6012     llvm::Constant *CpuModel = CGM.CreateRuntimeVariable(STy, "__cpu_model");
6013 
6014     // Grab the first (0th) element from the field __cpu_features off of the
6015     // global in the struct STy.
6016     Value *Idxs[] = {
6017       ConstantInt::get(Int32Ty, 0),
6018       ConstantInt::get(Int32Ty, 3),
6019       ConstantInt::get(Int32Ty, 0)
6020     };
6021     Value *CpuFeatures = Builder.CreateGEP(STy, CpuModel, Idxs);
6022     Value *Features = Builder.CreateAlignedLoad(CpuFeatures,
6023                                                 CharUnits::fromQuantity(4));
6024 
6025     // Check the value of the bit corresponding to the feature requested.
6026     Value *Bitset = Builder.CreateAnd(
6027         Features, llvm::ConstantInt::get(Int32Ty, 1 << Feature));
6028     return Builder.CreateICmpNE(Bitset, llvm::ConstantInt::get(Int32Ty, 0));
6029   }
6030   case X86::BI_mm_prefetch: {
6031     Value *Address = Ops[0];
6032     Value *RW = ConstantInt::get(Int32Ty, 0);
6033     Value *Locality = Ops[1];
6034     Value *Data = ConstantInt::get(Int32Ty, 1);
6035     Value *F = CGM.getIntrinsic(Intrinsic::prefetch);
6036     return Builder.CreateCall(F, {Address, RW, Locality, Data});
6037   }
6038   case X86::BI__builtin_ia32_undef128:
6039   case X86::BI__builtin_ia32_undef256:
6040   case X86::BI__builtin_ia32_undef512:
6041     return UndefValue::get(ConvertType(E->getType()));
6042   case X86::BI__builtin_ia32_vec_init_v8qi:
6043   case X86::BI__builtin_ia32_vec_init_v4hi:
6044   case X86::BI__builtin_ia32_vec_init_v2si:
6045     return Builder.CreateBitCast(BuildVector(Ops),
6046                                  llvm::Type::getX86_MMXTy(getLLVMContext()));
6047   case X86::BI__builtin_ia32_vec_ext_v2si:
6048     return Builder.CreateExtractElement(Ops[0],
6049                                   llvm::ConstantInt::get(Ops[1]->getType(), 0));
6050   case X86::BI__builtin_ia32_ldmxcsr: {
6051     Address Tmp = CreateMemTemp(E->getArg(0)->getType());
6052     Builder.CreateStore(Ops[0], Tmp);
6053     return Builder.CreateCall(CGM.getIntrinsic(Intrinsic::x86_sse_ldmxcsr),
6054                           Builder.CreateBitCast(Tmp.getPointer(), Int8PtrTy));
6055   }
6056   case X86::BI__builtin_ia32_stmxcsr: {
6057     Address Tmp = CreateMemTemp(E->getType());
6058     Builder.CreateCall(CGM.getIntrinsic(Intrinsic::x86_sse_stmxcsr),
6059                        Builder.CreateBitCast(Tmp.getPointer(), Int8PtrTy));
6060     return Builder.CreateLoad(Tmp, "stmxcsr");
6061   }
6062   case X86::BI__builtin_ia32_storehps:
6063   case X86::BI__builtin_ia32_storelps: {
6064     llvm::Type *PtrTy = llvm::PointerType::getUnqual(Int64Ty);
6065     llvm::Type *VecTy = llvm::VectorType::get(Int64Ty, 2);
6066 
6067     // cast val v2i64
6068     Ops[1] = Builder.CreateBitCast(Ops[1], VecTy, "cast");
6069 
6070     // extract (0, 1)
6071     unsigned Index = BuiltinID == X86::BI__builtin_ia32_storelps ? 0 : 1;
6072     llvm::Value *Idx = llvm::ConstantInt::get(SizeTy, Index);
6073     Ops[1] = Builder.CreateExtractElement(Ops[1], Idx, "extract");
6074 
6075     // cast pointer to i64 & store
6076     Ops[0] = Builder.CreateBitCast(Ops[0], PtrTy);
6077     return Builder.CreateDefaultAlignedStore(Ops[1], Ops[0]);
6078   }
6079   case X86::BI__builtin_ia32_palignr128:
6080   case X86::BI__builtin_ia32_palignr256: {
6081     unsigned ShiftVal = cast<llvm::ConstantInt>(Ops[2])->getZExtValue();
6082 
6083     unsigned NumElts =
6084       cast<llvm::VectorType>(Ops[0]->getType())->getNumElements();
6085     assert(NumElts % 16 == 0);
6086     unsigned NumLanes = NumElts / 16;
6087     unsigned NumLaneElts = NumElts / NumLanes;
6088 
6089     // If palignr is shifting the pair of vectors more than the size of two
6090     // lanes, emit zero.
6091     if (ShiftVal >= (2 * NumLaneElts))
6092       return llvm::Constant::getNullValue(ConvertType(E->getType()));
6093 
6094     // If palignr is shifting the pair of input vectors more than one lane,
6095     // but less than two lanes, convert to shifting in zeroes.
6096     if (ShiftVal > NumLaneElts) {
6097       ShiftVal -= NumLaneElts;
6098       Ops[1] = Ops[0];
6099       Ops[0] = llvm::Constant::getNullValue(Ops[0]->getType());
6100     }
6101 
6102     uint32_t Indices[32];
6103     // 256-bit palignr operates on 128-bit lanes so we need to handle that
6104     for (unsigned l = 0; l != NumElts; l += NumLaneElts) {
6105       for (unsigned i = 0; i != NumLaneElts; ++i) {
6106         unsigned Idx = ShiftVal + i;
6107         if (Idx >= NumLaneElts)
6108           Idx += NumElts - NumLaneElts; // End of lane, switch operand.
6109         Indices[l + i] = Idx + l;
6110       }
6111     }
6112 
6113     Value *SV = llvm::ConstantDataVector::get(getLLVMContext(),
6114                                               makeArrayRef(Indices, NumElts));
6115     return Builder.CreateShuffleVector(Ops[1], Ops[0], SV, "palignr");
6116   }
6117   case X86::BI__builtin_ia32_pslldqi256: {
6118     // Shift value is in bits so divide by 8.
6119     unsigned shiftVal = cast<llvm::ConstantInt>(Ops[1])->getZExtValue() >> 3;
6120 
6121     // If pslldq is shifting the vector more than 15 bytes, emit zero.
6122     if (shiftVal >= 16)
6123       return llvm::Constant::getNullValue(ConvertType(E->getType()));
6124 
6125     uint32_t Indices[32];
6126     // 256-bit pslldq operates on 128-bit lanes so we need to handle that
6127     for (unsigned l = 0; l != 32; l += 16) {
6128       for (unsigned i = 0; i != 16; ++i) {
6129         unsigned Idx = 32 + i - shiftVal;
6130         if (Idx < 32) Idx -= 16; // end of lane, switch operand.
6131         Indices[l + i] = Idx + l;
6132       }
6133     }
6134 
6135     llvm::Type *VecTy = llvm::VectorType::get(Int8Ty, 32);
6136     Ops[0] = Builder.CreateBitCast(Ops[0], VecTy, "cast");
6137     Value *Zero = llvm::Constant::getNullValue(VecTy);
6138 
6139     Value *SV = llvm::ConstantDataVector::get(getLLVMContext(), Indices);
6140     SV = Builder.CreateShuffleVector(Zero, Ops[0], SV, "pslldq");
6141     llvm::Type *ResultType = ConvertType(E->getType());
6142     return Builder.CreateBitCast(SV, ResultType, "cast");
6143   }
6144   case X86::BI__builtin_ia32_psrldqi256: {
6145     // Shift value is in bits so divide by 8.
6146     unsigned shiftVal = cast<llvm::ConstantInt>(Ops[1])->getZExtValue() >> 3;
6147 
6148     // If psrldq is shifting the vector more than 15 bytes, emit zero.
6149     if (shiftVal >= 16)
6150       return llvm::Constant::getNullValue(ConvertType(E->getType()));
6151 
6152     uint32_t Indices[32];
6153     // 256-bit psrldq operates on 128-bit lanes so we need to handle that
6154     for (unsigned l = 0; l != 32; l += 16) {
6155       for (unsigned i = 0; i != 16; ++i) {
6156         unsigned Idx = i + shiftVal;
6157         if (Idx >= 16) Idx += 16; // end of lane, switch operand.
6158         Indices[l + i] = Idx + l;
6159       }
6160     }
6161 
6162     llvm::Type *VecTy = llvm::VectorType::get(Int8Ty, 32);
6163     Ops[0] = Builder.CreateBitCast(Ops[0], VecTy, "cast");
6164     Value *Zero = llvm::Constant::getNullValue(VecTy);
6165 
6166     Value *SV = llvm::ConstantDataVector::get(getLLVMContext(), Indices);
6167     SV = Builder.CreateShuffleVector(Ops[0], Zero, SV, "psrldq");
6168     llvm::Type *ResultType = ConvertType(E->getType());
6169     return Builder.CreateBitCast(SV, ResultType, "cast");
6170   }
6171   case X86::BI__builtin_ia32_movntps:
6172   case X86::BI__builtin_ia32_movntps256:
6173   case X86::BI__builtin_ia32_movntpd:
6174   case X86::BI__builtin_ia32_movntpd256:
6175   case X86::BI__builtin_ia32_movntdq:
6176   case X86::BI__builtin_ia32_movntdq256:
6177   case X86::BI__builtin_ia32_movnti:
6178   case X86::BI__builtin_ia32_movnti64: {
6179     llvm::MDNode *Node = llvm::MDNode::get(
6180         getLLVMContext(), llvm::ConstantAsMetadata::get(Builder.getInt32(1)));
6181 
6182     // Convert the type of the pointer to a pointer to the stored type.
6183     Value *BC = Builder.CreateBitCast(Ops[0],
6184                                 llvm::PointerType::getUnqual(Ops[1]->getType()),
6185                                       "cast");
6186     StoreInst *SI = Builder.CreateDefaultAlignedStore(Ops[1], BC);
6187     SI->setMetadata(CGM.getModule().getMDKindID("nontemporal"), Node);
6188 
6189     // If the operand is an integer, we can't assume alignment. Otherwise,
6190     // assume natural alignment.
6191     QualType ArgTy = E->getArg(1)->getType();
6192     unsigned Align;
6193     if (ArgTy->isIntegerType())
6194       Align = 1;
6195     else
6196       Align = getContext().getTypeSizeInChars(ArgTy).getQuantity();
6197     SI->setAlignment(Align);
6198     return SI;
6199   }
6200   // 3DNow!
6201   case X86::BI__builtin_ia32_pswapdsf:
6202   case X86::BI__builtin_ia32_pswapdsi: {
6203     llvm::Type *MMXTy = llvm::Type::getX86_MMXTy(getLLVMContext());
6204     Ops[0] = Builder.CreateBitCast(Ops[0], MMXTy, "cast");
6205     llvm::Function *F = CGM.getIntrinsic(Intrinsic::x86_3dnowa_pswapd);
6206     return Builder.CreateCall(F, Ops, "pswapd");
6207   }
6208   case X86::BI__builtin_ia32_rdrand16_step:
6209   case X86::BI__builtin_ia32_rdrand32_step:
6210   case X86::BI__builtin_ia32_rdrand64_step:
6211   case X86::BI__builtin_ia32_rdseed16_step:
6212   case X86::BI__builtin_ia32_rdseed32_step:
6213   case X86::BI__builtin_ia32_rdseed64_step: {
6214     Intrinsic::ID ID;
6215     switch (BuiltinID) {
6216     default: llvm_unreachable("Unsupported intrinsic!");
6217     case X86::BI__builtin_ia32_rdrand16_step:
6218       ID = Intrinsic::x86_rdrand_16;
6219       break;
6220     case X86::BI__builtin_ia32_rdrand32_step:
6221       ID = Intrinsic::x86_rdrand_32;
6222       break;
6223     case X86::BI__builtin_ia32_rdrand64_step:
6224       ID = Intrinsic::x86_rdrand_64;
6225       break;
6226     case X86::BI__builtin_ia32_rdseed16_step:
6227       ID = Intrinsic::x86_rdseed_16;
6228       break;
6229     case X86::BI__builtin_ia32_rdseed32_step:
6230       ID = Intrinsic::x86_rdseed_32;
6231       break;
6232     case X86::BI__builtin_ia32_rdseed64_step:
6233       ID = Intrinsic::x86_rdseed_64;
6234       break;
6235     }
6236 
6237     Value *Call = Builder.CreateCall(CGM.getIntrinsic(ID));
6238     Builder.CreateDefaultAlignedStore(Builder.CreateExtractValue(Call, 0),
6239                                       Ops[0]);
6240     return Builder.CreateExtractValue(Call, 1);
6241   }
6242   // SSE comparison intrisics
6243   case X86::BI__builtin_ia32_cmpeqps:
6244   case X86::BI__builtin_ia32_cmpltps:
6245   case X86::BI__builtin_ia32_cmpleps:
6246   case X86::BI__builtin_ia32_cmpunordps:
6247   case X86::BI__builtin_ia32_cmpneqps:
6248   case X86::BI__builtin_ia32_cmpnltps:
6249   case X86::BI__builtin_ia32_cmpnleps:
6250   case X86::BI__builtin_ia32_cmpordps:
6251   case X86::BI__builtin_ia32_cmpeqss:
6252   case X86::BI__builtin_ia32_cmpltss:
6253   case X86::BI__builtin_ia32_cmpless:
6254   case X86::BI__builtin_ia32_cmpunordss:
6255   case X86::BI__builtin_ia32_cmpneqss:
6256   case X86::BI__builtin_ia32_cmpnltss:
6257   case X86::BI__builtin_ia32_cmpnless:
6258   case X86::BI__builtin_ia32_cmpordss:
6259   case X86::BI__builtin_ia32_cmpeqpd:
6260   case X86::BI__builtin_ia32_cmpltpd:
6261   case X86::BI__builtin_ia32_cmplepd:
6262   case X86::BI__builtin_ia32_cmpunordpd:
6263   case X86::BI__builtin_ia32_cmpneqpd:
6264   case X86::BI__builtin_ia32_cmpnltpd:
6265   case X86::BI__builtin_ia32_cmpnlepd:
6266   case X86::BI__builtin_ia32_cmpordpd:
6267   case X86::BI__builtin_ia32_cmpeqsd:
6268   case X86::BI__builtin_ia32_cmpltsd:
6269   case X86::BI__builtin_ia32_cmplesd:
6270   case X86::BI__builtin_ia32_cmpunordsd:
6271   case X86::BI__builtin_ia32_cmpneqsd:
6272   case X86::BI__builtin_ia32_cmpnltsd:
6273   case X86::BI__builtin_ia32_cmpnlesd:
6274   case X86::BI__builtin_ia32_cmpordsd:
6275     // These exist so that the builtin that takes an immediate can be bounds
6276     // checked by clang to avoid passing bad immediates to the backend. Since
6277     // AVX has a larger immediate than SSE we would need separate builtins to
6278     // do the different bounds checking. Rather than create a clang specific
6279     // SSE only builtin, this implements eight separate builtins to match gcc
6280     // implementation.
6281 
6282     // Choose the immediate.
6283     unsigned Imm;
6284     switch (BuiltinID) {
6285     default: llvm_unreachable("Unsupported intrinsic!");
6286     case X86::BI__builtin_ia32_cmpeqps:
6287     case X86::BI__builtin_ia32_cmpeqss:
6288     case X86::BI__builtin_ia32_cmpeqpd:
6289     case X86::BI__builtin_ia32_cmpeqsd:
6290       Imm = 0;
6291       break;
6292     case X86::BI__builtin_ia32_cmpltps:
6293     case X86::BI__builtin_ia32_cmpltss:
6294     case X86::BI__builtin_ia32_cmpltpd:
6295     case X86::BI__builtin_ia32_cmpltsd:
6296       Imm = 1;
6297       break;
6298     case X86::BI__builtin_ia32_cmpleps:
6299     case X86::BI__builtin_ia32_cmpless:
6300     case X86::BI__builtin_ia32_cmplepd:
6301     case X86::BI__builtin_ia32_cmplesd:
6302       Imm = 2;
6303       break;
6304     case X86::BI__builtin_ia32_cmpunordps:
6305     case X86::BI__builtin_ia32_cmpunordss:
6306     case X86::BI__builtin_ia32_cmpunordpd:
6307     case X86::BI__builtin_ia32_cmpunordsd:
6308       Imm = 3;
6309       break;
6310     case X86::BI__builtin_ia32_cmpneqps:
6311     case X86::BI__builtin_ia32_cmpneqss:
6312     case X86::BI__builtin_ia32_cmpneqpd:
6313     case X86::BI__builtin_ia32_cmpneqsd:
6314       Imm = 4;
6315       break;
6316     case X86::BI__builtin_ia32_cmpnltps:
6317     case X86::BI__builtin_ia32_cmpnltss:
6318     case X86::BI__builtin_ia32_cmpnltpd:
6319     case X86::BI__builtin_ia32_cmpnltsd:
6320       Imm = 5;
6321       break;
6322     case X86::BI__builtin_ia32_cmpnleps:
6323     case X86::BI__builtin_ia32_cmpnless:
6324     case X86::BI__builtin_ia32_cmpnlepd:
6325     case X86::BI__builtin_ia32_cmpnlesd:
6326       Imm = 6;
6327       break;
6328     case X86::BI__builtin_ia32_cmpordps:
6329     case X86::BI__builtin_ia32_cmpordss:
6330     case X86::BI__builtin_ia32_cmpordpd:
6331     case X86::BI__builtin_ia32_cmpordsd:
6332       Imm = 7;
6333       break;
6334     }
6335 
6336     // Choose the intrinsic ID.
6337     const char *name;
6338     Intrinsic::ID ID;
6339     switch (BuiltinID) {
6340     default: llvm_unreachable("Unsupported intrinsic!");
6341     case X86::BI__builtin_ia32_cmpeqps:
6342     case X86::BI__builtin_ia32_cmpltps:
6343     case X86::BI__builtin_ia32_cmpleps:
6344     case X86::BI__builtin_ia32_cmpunordps:
6345     case X86::BI__builtin_ia32_cmpneqps:
6346     case X86::BI__builtin_ia32_cmpnltps:
6347     case X86::BI__builtin_ia32_cmpnleps:
6348     case X86::BI__builtin_ia32_cmpordps:
6349       name = "cmpps";
6350       ID = Intrinsic::x86_sse_cmp_ps;
6351       break;
6352     case X86::BI__builtin_ia32_cmpeqss:
6353     case X86::BI__builtin_ia32_cmpltss:
6354     case X86::BI__builtin_ia32_cmpless:
6355     case X86::BI__builtin_ia32_cmpunordss:
6356     case X86::BI__builtin_ia32_cmpneqss:
6357     case X86::BI__builtin_ia32_cmpnltss:
6358     case X86::BI__builtin_ia32_cmpnless:
6359     case X86::BI__builtin_ia32_cmpordss:
6360       name = "cmpss";
6361       ID = Intrinsic::x86_sse_cmp_ss;
6362       break;
6363     case X86::BI__builtin_ia32_cmpeqpd:
6364     case X86::BI__builtin_ia32_cmpltpd:
6365     case X86::BI__builtin_ia32_cmplepd:
6366     case X86::BI__builtin_ia32_cmpunordpd:
6367     case X86::BI__builtin_ia32_cmpneqpd:
6368     case X86::BI__builtin_ia32_cmpnltpd:
6369     case X86::BI__builtin_ia32_cmpnlepd:
6370     case X86::BI__builtin_ia32_cmpordpd:
6371       name = "cmppd";
6372       ID = Intrinsic::x86_sse2_cmp_pd;
6373       break;
6374     case X86::BI__builtin_ia32_cmpeqsd:
6375     case X86::BI__builtin_ia32_cmpltsd:
6376     case X86::BI__builtin_ia32_cmplesd:
6377     case X86::BI__builtin_ia32_cmpunordsd:
6378     case X86::BI__builtin_ia32_cmpneqsd:
6379     case X86::BI__builtin_ia32_cmpnltsd:
6380     case X86::BI__builtin_ia32_cmpnlesd:
6381     case X86::BI__builtin_ia32_cmpordsd:
6382       name = "cmpsd";
6383       ID = Intrinsic::x86_sse2_cmp_sd;
6384       break;
6385     }
6386 
6387     Ops.push_back(llvm::ConstantInt::get(Int8Ty, Imm));
6388     llvm::Function *F = CGM.getIntrinsic(ID);
6389     return Builder.CreateCall(F, Ops, name);
6390   }
6391 }
6392 
6393 
6394 Value *CodeGenFunction::EmitPPCBuiltinExpr(unsigned BuiltinID,
6395                                            const CallExpr *E) {
6396   SmallVector<Value*, 4> Ops;
6397 
6398   for (unsigned i = 0, e = E->getNumArgs(); i != e; i++)
6399     Ops.push_back(EmitScalarExpr(E->getArg(i)));
6400 
6401   Intrinsic::ID ID = Intrinsic::not_intrinsic;
6402 
6403   switch (BuiltinID) {
6404   default: return nullptr;
6405 
6406   // __builtin_ppc_get_timebase is GCC 4.8+'s PowerPC-specific name for what we
6407   // call __builtin_readcyclecounter.
6408   case PPC::BI__builtin_ppc_get_timebase:
6409     return Builder.CreateCall(CGM.getIntrinsic(Intrinsic::readcyclecounter));
6410 
6411   // vec_ld, vec_lvsl, vec_lvsr
6412   case PPC::BI__builtin_altivec_lvx:
6413   case PPC::BI__builtin_altivec_lvxl:
6414   case PPC::BI__builtin_altivec_lvebx:
6415   case PPC::BI__builtin_altivec_lvehx:
6416   case PPC::BI__builtin_altivec_lvewx:
6417   case PPC::BI__builtin_altivec_lvsl:
6418   case PPC::BI__builtin_altivec_lvsr:
6419   case PPC::BI__builtin_vsx_lxvd2x:
6420   case PPC::BI__builtin_vsx_lxvw4x:
6421   {
6422     Ops[1] = Builder.CreateBitCast(Ops[1], Int8PtrTy);
6423 
6424     Ops[0] = Builder.CreateGEP(Ops[1], Ops[0]);
6425     Ops.pop_back();
6426 
6427     switch (BuiltinID) {
6428     default: llvm_unreachable("Unsupported ld/lvsl/lvsr intrinsic!");
6429     case PPC::BI__builtin_altivec_lvx:
6430       ID = Intrinsic::ppc_altivec_lvx;
6431       break;
6432     case PPC::BI__builtin_altivec_lvxl:
6433       ID = Intrinsic::ppc_altivec_lvxl;
6434       break;
6435     case PPC::BI__builtin_altivec_lvebx:
6436       ID = Intrinsic::ppc_altivec_lvebx;
6437       break;
6438     case PPC::BI__builtin_altivec_lvehx:
6439       ID = Intrinsic::ppc_altivec_lvehx;
6440       break;
6441     case PPC::BI__builtin_altivec_lvewx:
6442       ID = Intrinsic::ppc_altivec_lvewx;
6443       break;
6444     case PPC::BI__builtin_altivec_lvsl:
6445       ID = Intrinsic::ppc_altivec_lvsl;
6446       break;
6447     case PPC::BI__builtin_altivec_lvsr:
6448       ID = Intrinsic::ppc_altivec_lvsr;
6449       break;
6450     case PPC::BI__builtin_vsx_lxvd2x:
6451       ID = Intrinsic::ppc_vsx_lxvd2x;
6452       break;
6453     case PPC::BI__builtin_vsx_lxvw4x:
6454       ID = Intrinsic::ppc_vsx_lxvw4x;
6455       break;
6456     }
6457     llvm::Function *F = CGM.getIntrinsic(ID);
6458     return Builder.CreateCall(F, Ops, "");
6459   }
6460 
6461   // vec_st
6462   case PPC::BI__builtin_altivec_stvx:
6463   case PPC::BI__builtin_altivec_stvxl:
6464   case PPC::BI__builtin_altivec_stvebx:
6465   case PPC::BI__builtin_altivec_stvehx:
6466   case PPC::BI__builtin_altivec_stvewx:
6467   case PPC::BI__builtin_vsx_stxvd2x:
6468   case PPC::BI__builtin_vsx_stxvw4x:
6469   {
6470     Ops[2] = Builder.CreateBitCast(Ops[2], Int8PtrTy);
6471     Ops[1] = Builder.CreateGEP(Ops[2], Ops[1]);
6472     Ops.pop_back();
6473 
6474     switch (BuiltinID) {
6475     default: llvm_unreachable("Unsupported st intrinsic!");
6476     case PPC::BI__builtin_altivec_stvx:
6477       ID = Intrinsic::ppc_altivec_stvx;
6478       break;
6479     case PPC::BI__builtin_altivec_stvxl:
6480       ID = Intrinsic::ppc_altivec_stvxl;
6481       break;
6482     case PPC::BI__builtin_altivec_stvebx:
6483       ID = Intrinsic::ppc_altivec_stvebx;
6484       break;
6485     case PPC::BI__builtin_altivec_stvehx:
6486       ID = Intrinsic::ppc_altivec_stvehx;
6487       break;
6488     case PPC::BI__builtin_altivec_stvewx:
6489       ID = Intrinsic::ppc_altivec_stvewx;
6490       break;
6491     case PPC::BI__builtin_vsx_stxvd2x:
6492       ID = Intrinsic::ppc_vsx_stxvd2x;
6493       break;
6494     case PPC::BI__builtin_vsx_stxvw4x:
6495       ID = Intrinsic::ppc_vsx_stxvw4x;
6496       break;
6497     }
6498     llvm::Function *F = CGM.getIntrinsic(ID);
6499     return Builder.CreateCall(F, Ops, "");
6500   }
6501   // Square root
6502   case PPC::BI__builtin_vsx_xvsqrtsp:
6503   case PPC::BI__builtin_vsx_xvsqrtdp: {
6504     llvm::Type *ResultType = ConvertType(E->getType());
6505     Value *X = EmitScalarExpr(E->getArg(0));
6506     ID = Intrinsic::sqrt;
6507     llvm::Function *F = CGM.getIntrinsic(ID, ResultType);
6508     return Builder.CreateCall(F, X);
6509   }
6510   // Count leading zeros
6511   case PPC::BI__builtin_altivec_vclzb:
6512   case PPC::BI__builtin_altivec_vclzh:
6513   case PPC::BI__builtin_altivec_vclzw:
6514   case PPC::BI__builtin_altivec_vclzd: {
6515     llvm::Type *ResultType = ConvertType(E->getType());
6516     Value *X = EmitScalarExpr(E->getArg(0));
6517     Value *Undef = ConstantInt::get(Builder.getInt1Ty(), false);
6518     Function *F = CGM.getIntrinsic(Intrinsic::ctlz, ResultType);
6519     return Builder.CreateCall(F, {X, Undef});
6520   }
6521   // Copy sign
6522   case PPC::BI__builtin_vsx_xvcpsgnsp:
6523   case PPC::BI__builtin_vsx_xvcpsgndp: {
6524     llvm::Type *ResultType = ConvertType(E->getType());
6525     Value *X = EmitScalarExpr(E->getArg(0));
6526     Value *Y = EmitScalarExpr(E->getArg(1));
6527     ID = Intrinsic::copysign;
6528     llvm::Function *F = CGM.getIntrinsic(ID, ResultType);
6529     return Builder.CreateCall(F, {X, Y});
6530   }
6531   // Rounding/truncation
6532   case PPC::BI__builtin_vsx_xvrspip:
6533   case PPC::BI__builtin_vsx_xvrdpip:
6534   case PPC::BI__builtin_vsx_xvrdpim:
6535   case PPC::BI__builtin_vsx_xvrspim:
6536   case PPC::BI__builtin_vsx_xvrdpi:
6537   case PPC::BI__builtin_vsx_xvrspi:
6538   case PPC::BI__builtin_vsx_xvrdpic:
6539   case PPC::BI__builtin_vsx_xvrspic:
6540   case PPC::BI__builtin_vsx_xvrdpiz:
6541   case PPC::BI__builtin_vsx_xvrspiz: {
6542     llvm::Type *ResultType = ConvertType(E->getType());
6543     Value *X = EmitScalarExpr(E->getArg(0));
6544     if (BuiltinID == PPC::BI__builtin_vsx_xvrdpim ||
6545         BuiltinID == PPC::BI__builtin_vsx_xvrspim)
6546       ID = Intrinsic::floor;
6547     else if (BuiltinID == PPC::BI__builtin_vsx_xvrdpi ||
6548              BuiltinID == PPC::BI__builtin_vsx_xvrspi)
6549       ID = Intrinsic::round;
6550     else if (BuiltinID == PPC::BI__builtin_vsx_xvrdpic ||
6551              BuiltinID == PPC::BI__builtin_vsx_xvrspic)
6552       ID = Intrinsic::nearbyint;
6553     else if (BuiltinID == PPC::BI__builtin_vsx_xvrdpip ||
6554              BuiltinID == PPC::BI__builtin_vsx_xvrspip)
6555       ID = Intrinsic::ceil;
6556     else if (BuiltinID == PPC::BI__builtin_vsx_xvrdpiz ||
6557              BuiltinID == PPC::BI__builtin_vsx_xvrspiz)
6558       ID = Intrinsic::trunc;
6559     llvm::Function *F = CGM.getIntrinsic(ID, ResultType);
6560     return Builder.CreateCall(F, X);
6561   }
6562   // FMA variations
6563   case PPC::BI__builtin_vsx_xvmaddadp:
6564   case PPC::BI__builtin_vsx_xvmaddasp:
6565   case PPC::BI__builtin_vsx_xvnmaddadp:
6566   case PPC::BI__builtin_vsx_xvnmaddasp:
6567   case PPC::BI__builtin_vsx_xvmsubadp:
6568   case PPC::BI__builtin_vsx_xvmsubasp:
6569   case PPC::BI__builtin_vsx_xvnmsubadp:
6570   case PPC::BI__builtin_vsx_xvnmsubasp: {
6571     llvm::Type *ResultType = ConvertType(E->getType());
6572     Value *X = EmitScalarExpr(E->getArg(0));
6573     Value *Y = EmitScalarExpr(E->getArg(1));
6574     Value *Z = EmitScalarExpr(E->getArg(2));
6575     Value *Zero = llvm::ConstantFP::getZeroValueForNegation(ResultType);
6576     llvm::Function *F = CGM.getIntrinsic(Intrinsic::fma, ResultType);
6577     switch (BuiltinID) {
6578       case PPC::BI__builtin_vsx_xvmaddadp:
6579       case PPC::BI__builtin_vsx_xvmaddasp:
6580         return Builder.CreateCall(F, {X, Y, Z});
6581       case PPC::BI__builtin_vsx_xvnmaddadp:
6582       case PPC::BI__builtin_vsx_xvnmaddasp:
6583         return Builder.CreateFSub(Zero,
6584                                   Builder.CreateCall(F, {X, Y, Z}), "sub");
6585       case PPC::BI__builtin_vsx_xvmsubadp:
6586       case PPC::BI__builtin_vsx_xvmsubasp:
6587         return Builder.CreateCall(F,
6588                                   {X, Y, Builder.CreateFSub(Zero, Z, "sub")});
6589       case PPC::BI__builtin_vsx_xvnmsubadp:
6590       case PPC::BI__builtin_vsx_xvnmsubasp:
6591         Value *FsubRes =
6592           Builder.CreateCall(F, {X, Y, Builder.CreateFSub(Zero, Z, "sub")});
6593         return Builder.CreateFSub(Zero, FsubRes, "sub");
6594     }
6595     llvm_unreachable("Unknown FMA operation");
6596     return nullptr; // Suppress no-return warning
6597   }
6598   }
6599 }
6600 
6601 // Emit an intrinsic that has 1 float or double.
6602 static Value *emitUnaryFPBuiltin(CodeGenFunction &CGF,
6603                                  const CallExpr *E,
6604                                  unsigned IntrinsicID) {
6605   llvm::Value *Src0 = CGF.EmitScalarExpr(E->getArg(0));
6606 
6607   Value *F = CGF.CGM.getIntrinsic(IntrinsicID, Src0->getType());
6608   return CGF.Builder.CreateCall(F, Src0);
6609 }
6610 
6611 // Emit an intrinsic that has 3 float or double operands.
6612 static Value *emitTernaryFPBuiltin(CodeGenFunction &CGF,
6613                                    const CallExpr *E,
6614                                    unsigned IntrinsicID) {
6615   llvm::Value *Src0 = CGF.EmitScalarExpr(E->getArg(0));
6616   llvm::Value *Src1 = CGF.EmitScalarExpr(E->getArg(1));
6617   llvm::Value *Src2 = CGF.EmitScalarExpr(E->getArg(2));
6618 
6619   Value *F = CGF.CGM.getIntrinsic(IntrinsicID, Src0->getType());
6620   return CGF.Builder.CreateCall(F, {Src0, Src1, Src2});
6621 }
6622 
6623 // Emit an intrinsic that has 1 float or double operand, and 1 integer.
6624 static Value *emitFPIntBuiltin(CodeGenFunction &CGF,
6625                                const CallExpr *E,
6626                                unsigned IntrinsicID) {
6627   llvm::Value *Src0 = CGF.EmitScalarExpr(E->getArg(0));
6628   llvm::Value *Src1 = CGF.EmitScalarExpr(E->getArg(1));
6629 
6630   Value *F = CGF.CGM.getIntrinsic(IntrinsicID, Src0->getType());
6631   return CGF.Builder.CreateCall(F, {Src0, Src1});
6632 }
6633 
6634 Value *CodeGenFunction::EmitAMDGPUBuiltinExpr(unsigned BuiltinID,
6635                                               const CallExpr *E) {
6636   switch (BuiltinID) {
6637   case AMDGPU::BI__builtin_amdgpu_div_scale:
6638   case AMDGPU::BI__builtin_amdgpu_div_scalef: {
6639     // Translate from the intrinsics's struct return to the builtin's out
6640     // argument.
6641 
6642     Address FlagOutPtr = EmitPointerWithAlignment(E->getArg(3));
6643 
6644     llvm::Value *X = EmitScalarExpr(E->getArg(0));
6645     llvm::Value *Y = EmitScalarExpr(E->getArg(1));
6646     llvm::Value *Z = EmitScalarExpr(E->getArg(2));
6647 
6648     llvm::Value *Callee = CGM.getIntrinsic(Intrinsic::AMDGPU_div_scale,
6649                                            X->getType());
6650 
6651     llvm::Value *Tmp = Builder.CreateCall(Callee, {X, Y, Z});
6652 
6653     llvm::Value *Result = Builder.CreateExtractValue(Tmp, 0);
6654     llvm::Value *Flag = Builder.CreateExtractValue(Tmp, 1);
6655 
6656     llvm::Type *RealFlagType
6657       = FlagOutPtr.getPointer()->getType()->getPointerElementType();
6658 
6659     llvm::Value *FlagExt = Builder.CreateZExt(Flag, RealFlagType);
6660     Builder.CreateStore(FlagExt, FlagOutPtr);
6661     return Result;
6662   }
6663   case AMDGPU::BI__builtin_amdgpu_div_fmas:
6664   case AMDGPU::BI__builtin_amdgpu_div_fmasf: {
6665     llvm::Value *Src0 = EmitScalarExpr(E->getArg(0));
6666     llvm::Value *Src1 = EmitScalarExpr(E->getArg(1));
6667     llvm::Value *Src2 = EmitScalarExpr(E->getArg(2));
6668     llvm::Value *Src3 = EmitScalarExpr(E->getArg(3));
6669 
6670     llvm::Value *F = CGM.getIntrinsic(Intrinsic::AMDGPU_div_fmas,
6671                                       Src0->getType());
6672     llvm::Value *Src3ToBool = Builder.CreateIsNotNull(Src3);
6673     return Builder.CreateCall(F, {Src0, Src1, Src2, Src3ToBool});
6674   }
6675   case AMDGPU::BI__builtin_amdgpu_div_fixup:
6676   case AMDGPU::BI__builtin_amdgpu_div_fixupf:
6677     return emitTernaryFPBuiltin(*this, E, Intrinsic::AMDGPU_div_fixup);
6678   case AMDGPU::BI__builtin_amdgpu_trig_preop:
6679   case AMDGPU::BI__builtin_amdgpu_trig_preopf:
6680     return emitFPIntBuiltin(*this, E, Intrinsic::AMDGPU_trig_preop);
6681   case AMDGPU::BI__builtin_amdgpu_rcp:
6682   case AMDGPU::BI__builtin_amdgpu_rcpf:
6683     return emitUnaryFPBuiltin(*this, E, Intrinsic::AMDGPU_rcp);
6684   case AMDGPU::BI__builtin_amdgpu_rsq:
6685   case AMDGPU::BI__builtin_amdgpu_rsqf:
6686     return emitUnaryFPBuiltin(*this, E, Intrinsic::AMDGPU_rsq);
6687   case AMDGPU::BI__builtin_amdgpu_rsq_clamped:
6688   case AMDGPU::BI__builtin_amdgpu_rsq_clampedf:
6689     return emitUnaryFPBuiltin(*this, E, Intrinsic::AMDGPU_rsq_clamped);
6690   case AMDGPU::BI__builtin_amdgpu_ldexp:
6691   case AMDGPU::BI__builtin_amdgpu_ldexpf:
6692     return emitFPIntBuiltin(*this, E, Intrinsic::AMDGPU_ldexp);
6693   case AMDGPU::BI__builtin_amdgpu_class:
6694   case AMDGPU::BI__builtin_amdgpu_classf:
6695     return emitFPIntBuiltin(*this, E, Intrinsic::AMDGPU_class);
6696    default:
6697     return nullptr;
6698   }
6699 }
6700 
6701 /// Handle a SystemZ function in which the final argument is a pointer
6702 /// to an int that receives the post-instruction CC value.  At the LLVM level
6703 /// this is represented as a function that returns a {result, cc} pair.
6704 static Value *EmitSystemZIntrinsicWithCC(CodeGenFunction &CGF,
6705                                          unsigned IntrinsicID,
6706                                          const CallExpr *E) {
6707   unsigned NumArgs = E->getNumArgs() - 1;
6708   SmallVector<Value *, 8> Args(NumArgs);
6709   for (unsigned I = 0; I < NumArgs; ++I)
6710     Args[I] = CGF.EmitScalarExpr(E->getArg(I));
6711   Address CCPtr = CGF.EmitPointerWithAlignment(E->getArg(NumArgs));
6712   Value *F = CGF.CGM.getIntrinsic(IntrinsicID);
6713   Value *Call = CGF.Builder.CreateCall(F, Args);
6714   Value *CC = CGF.Builder.CreateExtractValue(Call, 1);
6715   CGF.Builder.CreateStore(CC, CCPtr);
6716   return CGF.Builder.CreateExtractValue(Call, 0);
6717 }
6718 
6719 Value *CodeGenFunction::EmitSystemZBuiltinExpr(unsigned BuiltinID,
6720                                                const CallExpr *E) {
6721   switch (BuiltinID) {
6722   case SystemZ::BI__builtin_tbegin: {
6723     Value *TDB = EmitScalarExpr(E->getArg(0));
6724     Value *Control = llvm::ConstantInt::get(Int32Ty, 0xff0c);
6725     Value *F = CGM.getIntrinsic(Intrinsic::s390_tbegin);
6726     return Builder.CreateCall(F, {TDB, Control});
6727   }
6728   case SystemZ::BI__builtin_tbegin_nofloat: {
6729     Value *TDB = EmitScalarExpr(E->getArg(0));
6730     Value *Control = llvm::ConstantInt::get(Int32Ty, 0xff0c);
6731     Value *F = CGM.getIntrinsic(Intrinsic::s390_tbegin_nofloat);
6732     return Builder.CreateCall(F, {TDB, Control});
6733   }
6734   case SystemZ::BI__builtin_tbeginc: {
6735     Value *TDB = llvm::ConstantPointerNull::get(Int8PtrTy);
6736     Value *Control = llvm::ConstantInt::get(Int32Ty, 0xff08);
6737     Value *F = CGM.getIntrinsic(Intrinsic::s390_tbeginc);
6738     return Builder.CreateCall(F, {TDB, Control});
6739   }
6740   case SystemZ::BI__builtin_tabort: {
6741     Value *Data = EmitScalarExpr(E->getArg(0));
6742     Value *F = CGM.getIntrinsic(Intrinsic::s390_tabort);
6743     return Builder.CreateCall(F, Builder.CreateSExt(Data, Int64Ty, "tabort"));
6744   }
6745   case SystemZ::BI__builtin_non_tx_store: {
6746     Value *Address = EmitScalarExpr(E->getArg(0));
6747     Value *Data = EmitScalarExpr(E->getArg(1));
6748     Value *F = CGM.getIntrinsic(Intrinsic::s390_ntstg);
6749     return Builder.CreateCall(F, {Data, Address});
6750   }
6751 
6752   // Vector builtins.  Note that most vector builtins are mapped automatically
6753   // to target-specific LLVM intrinsics.  The ones handled specially here can
6754   // be represented via standard LLVM IR, which is preferable to enable common
6755   // LLVM optimizations.
6756 
6757   case SystemZ::BI__builtin_s390_vpopctb:
6758   case SystemZ::BI__builtin_s390_vpopcth:
6759   case SystemZ::BI__builtin_s390_vpopctf:
6760   case SystemZ::BI__builtin_s390_vpopctg: {
6761     llvm::Type *ResultType = ConvertType(E->getType());
6762     Value *X = EmitScalarExpr(E->getArg(0));
6763     Function *F = CGM.getIntrinsic(Intrinsic::ctpop, ResultType);
6764     return Builder.CreateCall(F, X);
6765   }
6766 
6767   case SystemZ::BI__builtin_s390_vclzb:
6768   case SystemZ::BI__builtin_s390_vclzh:
6769   case SystemZ::BI__builtin_s390_vclzf:
6770   case SystemZ::BI__builtin_s390_vclzg: {
6771     llvm::Type *ResultType = ConvertType(E->getType());
6772     Value *X = EmitScalarExpr(E->getArg(0));
6773     Value *Undef = ConstantInt::get(Builder.getInt1Ty(), false);
6774     Function *F = CGM.getIntrinsic(Intrinsic::ctlz, ResultType);
6775     return Builder.CreateCall(F, {X, Undef});
6776   }
6777 
6778   case SystemZ::BI__builtin_s390_vctzb:
6779   case SystemZ::BI__builtin_s390_vctzh:
6780   case SystemZ::BI__builtin_s390_vctzf:
6781   case SystemZ::BI__builtin_s390_vctzg: {
6782     llvm::Type *ResultType = ConvertType(E->getType());
6783     Value *X = EmitScalarExpr(E->getArg(0));
6784     Value *Undef = ConstantInt::get(Builder.getInt1Ty(), false);
6785     Function *F = CGM.getIntrinsic(Intrinsic::cttz, ResultType);
6786     return Builder.CreateCall(F, {X, Undef});
6787   }
6788 
6789   case SystemZ::BI__builtin_s390_vfsqdb: {
6790     llvm::Type *ResultType = ConvertType(E->getType());
6791     Value *X = EmitScalarExpr(E->getArg(0));
6792     Function *F = CGM.getIntrinsic(Intrinsic::sqrt, ResultType);
6793     return Builder.CreateCall(F, X);
6794   }
6795   case SystemZ::BI__builtin_s390_vfmadb: {
6796     llvm::Type *ResultType = ConvertType(E->getType());
6797     Value *X = EmitScalarExpr(E->getArg(0));
6798     Value *Y = EmitScalarExpr(E->getArg(1));
6799     Value *Z = EmitScalarExpr(E->getArg(2));
6800     Function *F = CGM.getIntrinsic(Intrinsic::fma, ResultType);
6801     return Builder.CreateCall(F, {X, Y, Z});
6802   }
6803   case SystemZ::BI__builtin_s390_vfmsdb: {
6804     llvm::Type *ResultType = ConvertType(E->getType());
6805     Value *X = EmitScalarExpr(E->getArg(0));
6806     Value *Y = EmitScalarExpr(E->getArg(1));
6807     Value *Z = EmitScalarExpr(E->getArg(2));
6808     Value *Zero = llvm::ConstantFP::getZeroValueForNegation(ResultType);
6809     Function *F = CGM.getIntrinsic(Intrinsic::fma, ResultType);
6810     return Builder.CreateCall(F, {X, Y, Builder.CreateFSub(Zero, Z, "sub")});
6811   }
6812   case SystemZ::BI__builtin_s390_vflpdb: {
6813     llvm::Type *ResultType = ConvertType(E->getType());
6814     Value *X = EmitScalarExpr(E->getArg(0));
6815     Function *F = CGM.getIntrinsic(Intrinsic::fabs, ResultType);
6816     return Builder.CreateCall(F, X);
6817   }
6818   case SystemZ::BI__builtin_s390_vflndb: {
6819     llvm::Type *ResultType = ConvertType(E->getType());
6820     Value *X = EmitScalarExpr(E->getArg(0));
6821     Value *Zero = llvm::ConstantFP::getZeroValueForNegation(ResultType);
6822     Function *F = CGM.getIntrinsic(Intrinsic::fabs, ResultType);
6823     return Builder.CreateFSub(Zero, Builder.CreateCall(F, X), "sub");
6824   }
6825   case SystemZ::BI__builtin_s390_vfidb: {
6826     llvm::Type *ResultType = ConvertType(E->getType());
6827     Value *X = EmitScalarExpr(E->getArg(0));
6828     // Constant-fold the M4 and M5 mask arguments.
6829     llvm::APSInt M4, M5;
6830     bool IsConstM4 = E->getArg(1)->isIntegerConstantExpr(M4, getContext());
6831     bool IsConstM5 = E->getArg(2)->isIntegerConstantExpr(M5, getContext());
6832     assert(IsConstM4 && IsConstM5 && "Constant arg isn't actually constant?");
6833     (void)IsConstM4; (void)IsConstM5;
6834     // Check whether this instance of vfidb can be represented via a LLVM
6835     // standard intrinsic.  We only support some combinations of M4 and M5.
6836     Intrinsic::ID ID = Intrinsic::not_intrinsic;
6837     switch (M4.getZExtValue()) {
6838     default: break;
6839     case 0:  // IEEE-inexact exception allowed
6840       switch (M5.getZExtValue()) {
6841       default: break;
6842       case 0: ID = Intrinsic::rint; break;
6843       }
6844       break;
6845     case 4:  // IEEE-inexact exception suppressed
6846       switch (M5.getZExtValue()) {
6847       default: break;
6848       case 0: ID = Intrinsic::nearbyint; break;
6849       case 1: ID = Intrinsic::round; break;
6850       case 5: ID = Intrinsic::trunc; break;
6851       case 6: ID = Intrinsic::ceil; break;
6852       case 7: ID = Intrinsic::floor; break;
6853       }
6854       break;
6855     }
6856     if (ID != Intrinsic::not_intrinsic) {
6857       Function *F = CGM.getIntrinsic(ID, ResultType);
6858       return Builder.CreateCall(F, X);
6859     }
6860     Function *F = CGM.getIntrinsic(Intrinsic::s390_vfidb);
6861     Value *M4Value = llvm::ConstantInt::get(getLLVMContext(), M4);
6862     Value *M5Value = llvm::ConstantInt::get(getLLVMContext(), M5);
6863     return Builder.CreateCall(F, {X, M4Value, M5Value});
6864   }
6865 
6866   // Vector intrisincs that output the post-instruction CC value.
6867 
6868 #define INTRINSIC_WITH_CC(NAME) \
6869     case SystemZ::BI__builtin_##NAME: \
6870       return EmitSystemZIntrinsicWithCC(*this, Intrinsic::NAME, E)
6871 
6872   INTRINSIC_WITH_CC(s390_vpkshs);
6873   INTRINSIC_WITH_CC(s390_vpksfs);
6874   INTRINSIC_WITH_CC(s390_vpksgs);
6875 
6876   INTRINSIC_WITH_CC(s390_vpklshs);
6877   INTRINSIC_WITH_CC(s390_vpklsfs);
6878   INTRINSIC_WITH_CC(s390_vpklsgs);
6879 
6880   INTRINSIC_WITH_CC(s390_vceqbs);
6881   INTRINSIC_WITH_CC(s390_vceqhs);
6882   INTRINSIC_WITH_CC(s390_vceqfs);
6883   INTRINSIC_WITH_CC(s390_vceqgs);
6884 
6885   INTRINSIC_WITH_CC(s390_vchbs);
6886   INTRINSIC_WITH_CC(s390_vchhs);
6887   INTRINSIC_WITH_CC(s390_vchfs);
6888   INTRINSIC_WITH_CC(s390_vchgs);
6889 
6890   INTRINSIC_WITH_CC(s390_vchlbs);
6891   INTRINSIC_WITH_CC(s390_vchlhs);
6892   INTRINSIC_WITH_CC(s390_vchlfs);
6893   INTRINSIC_WITH_CC(s390_vchlgs);
6894 
6895   INTRINSIC_WITH_CC(s390_vfaebs);
6896   INTRINSIC_WITH_CC(s390_vfaehs);
6897   INTRINSIC_WITH_CC(s390_vfaefs);
6898 
6899   INTRINSIC_WITH_CC(s390_vfaezbs);
6900   INTRINSIC_WITH_CC(s390_vfaezhs);
6901   INTRINSIC_WITH_CC(s390_vfaezfs);
6902 
6903   INTRINSIC_WITH_CC(s390_vfeebs);
6904   INTRINSIC_WITH_CC(s390_vfeehs);
6905   INTRINSIC_WITH_CC(s390_vfeefs);
6906 
6907   INTRINSIC_WITH_CC(s390_vfeezbs);
6908   INTRINSIC_WITH_CC(s390_vfeezhs);
6909   INTRINSIC_WITH_CC(s390_vfeezfs);
6910 
6911   INTRINSIC_WITH_CC(s390_vfenebs);
6912   INTRINSIC_WITH_CC(s390_vfenehs);
6913   INTRINSIC_WITH_CC(s390_vfenefs);
6914 
6915   INTRINSIC_WITH_CC(s390_vfenezbs);
6916   INTRINSIC_WITH_CC(s390_vfenezhs);
6917   INTRINSIC_WITH_CC(s390_vfenezfs);
6918 
6919   INTRINSIC_WITH_CC(s390_vistrbs);
6920   INTRINSIC_WITH_CC(s390_vistrhs);
6921   INTRINSIC_WITH_CC(s390_vistrfs);
6922 
6923   INTRINSIC_WITH_CC(s390_vstrcbs);
6924   INTRINSIC_WITH_CC(s390_vstrchs);
6925   INTRINSIC_WITH_CC(s390_vstrcfs);
6926 
6927   INTRINSIC_WITH_CC(s390_vstrczbs);
6928   INTRINSIC_WITH_CC(s390_vstrczhs);
6929   INTRINSIC_WITH_CC(s390_vstrczfs);
6930 
6931   INTRINSIC_WITH_CC(s390_vfcedbs);
6932   INTRINSIC_WITH_CC(s390_vfchdbs);
6933   INTRINSIC_WITH_CC(s390_vfchedbs);
6934 
6935   INTRINSIC_WITH_CC(s390_vftcidb);
6936 
6937 #undef INTRINSIC_WITH_CC
6938 
6939   default:
6940     return nullptr;
6941   }
6942 }
6943 
6944 Value *CodeGenFunction::EmitNVPTXBuiltinExpr(unsigned BuiltinID,
6945                                              const CallExpr *E) {
6946   switch (BuiltinID) {
6947   case NVPTX::BI__nvvm_atom_add_gen_i:
6948   case NVPTX::BI__nvvm_atom_add_gen_l:
6949   case NVPTX::BI__nvvm_atom_add_gen_ll:
6950     return MakeBinaryAtomicValue(*this, llvm::AtomicRMWInst::Add, E);
6951 
6952   case NVPTX::BI__nvvm_atom_sub_gen_i:
6953   case NVPTX::BI__nvvm_atom_sub_gen_l:
6954   case NVPTX::BI__nvvm_atom_sub_gen_ll:
6955     return MakeBinaryAtomicValue(*this, llvm::AtomicRMWInst::Sub, E);
6956 
6957   case NVPTX::BI__nvvm_atom_and_gen_i:
6958   case NVPTX::BI__nvvm_atom_and_gen_l:
6959   case NVPTX::BI__nvvm_atom_and_gen_ll:
6960     return MakeBinaryAtomicValue(*this, llvm::AtomicRMWInst::And, E);
6961 
6962   case NVPTX::BI__nvvm_atom_or_gen_i:
6963   case NVPTX::BI__nvvm_atom_or_gen_l:
6964   case NVPTX::BI__nvvm_atom_or_gen_ll:
6965     return MakeBinaryAtomicValue(*this, llvm::AtomicRMWInst::Or, E);
6966 
6967   case NVPTX::BI__nvvm_atom_xor_gen_i:
6968   case NVPTX::BI__nvvm_atom_xor_gen_l:
6969   case NVPTX::BI__nvvm_atom_xor_gen_ll:
6970     return MakeBinaryAtomicValue(*this, llvm::AtomicRMWInst::Xor, E);
6971 
6972   case NVPTX::BI__nvvm_atom_xchg_gen_i:
6973   case NVPTX::BI__nvvm_atom_xchg_gen_l:
6974   case NVPTX::BI__nvvm_atom_xchg_gen_ll:
6975     return MakeBinaryAtomicValue(*this, llvm::AtomicRMWInst::Xchg, E);
6976 
6977   case NVPTX::BI__nvvm_atom_max_gen_i:
6978   case NVPTX::BI__nvvm_atom_max_gen_l:
6979   case NVPTX::BI__nvvm_atom_max_gen_ll:
6980     return MakeBinaryAtomicValue(*this, llvm::AtomicRMWInst::Max, E);
6981 
6982   case NVPTX::BI__nvvm_atom_max_gen_ui:
6983   case NVPTX::BI__nvvm_atom_max_gen_ul:
6984   case NVPTX::BI__nvvm_atom_max_gen_ull:
6985     return MakeBinaryAtomicValue(*this, llvm::AtomicRMWInst::UMax, E);
6986 
6987   case NVPTX::BI__nvvm_atom_min_gen_i:
6988   case NVPTX::BI__nvvm_atom_min_gen_l:
6989   case NVPTX::BI__nvvm_atom_min_gen_ll:
6990     return MakeBinaryAtomicValue(*this, llvm::AtomicRMWInst::Min, E);
6991 
6992   case NVPTX::BI__nvvm_atom_min_gen_ui:
6993   case NVPTX::BI__nvvm_atom_min_gen_ul:
6994   case NVPTX::BI__nvvm_atom_min_gen_ull:
6995     return MakeBinaryAtomicValue(*this, llvm::AtomicRMWInst::UMin, E);
6996 
6997   case NVPTX::BI__nvvm_atom_cas_gen_i:
6998   case NVPTX::BI__nvvm_atom_cas_gen_l:
6999   case NVPTX::BI__nvvm_atom_cas_gen_ll:
7000     return MakeAtomicCmpXchgValue(*this, E, true);
7001 
7002   case NVPTX::BI__nvvm_atom_add_gen_f: {
7003     Value *Ptr = EmitScalarExpr(E->getArg(0));
7004     Value *Val = EmitScalarExpr(E->getArg(1));
7005     // atomicrmw only deals with integer arguments so we need to use
7006     // LLVM's nvvm_atomic_load_add_f32 intrinsic for that.
7007     Value *FnALAF32 =
7008         CGM.getIntrinsic(Intrinsic::nvvm_atomic_load_add_f32, Ptr->getType());
7009     return Builder.CreateCall(FnALAF32, {Ptr, Val});
7010   }
7011 
7012   default:
7013     return nullptr;
7014   }
7015 }
7016 
7017 Value *CodeGenFunction::EmitWebAssemblyBuiltinExpr(unsigned BuiltinID,
7018                                                    const CallExpr *E) {
7019   switch (BuiltinID) {
7020   case WebAssembly::BI__builtin_wasm_page_size: {
7021     llvm::Type *ResultType = ConvertType(E->getType());
7022     Value *Callee = CGM.getIntrinsic(Intrinsic::wasm_page_size, ResultType);
7023     return Builder.CreateCall(Callee);
7024   }
7025 
7026   default:
7027     return nullptr;
7028   }
7029 }
7030