1 //===---- CGBuiltin.cpp - Emit LLVM Code for builtins ---------------------===//
2 //
3 //                     The LLVM Compiler Infrastructure
4 //
5 // This file is distributed under the University of Illinois Open Source
6 // License. See LICENSE.TXT for details.
7 //
8 //===----------------------------------------------------------------------===//
9 //
10 // This contains code to emit Builtin calls as LLVM code.
11 //
12 //===----------------------------------------------------------------------===//
13 
14 #include "CodeGenFunction.h"
15 #include "CGCXXABI.h"
16 #include "CGObjCRuntime.h"
17 #include "CodeGenModule.h"
18 #include "TargetInfo.h"
19 #include "clang/AST/ASTContext.h"
20 #include "clang/AST/Decl.h"
21 #include "clang/Basic/TargetBuiltins.h"
22 #include "clang/Basic/TargetInfo.h"
23 #include "clang/CodeGen/CGFunctionInfo.h"
24 #include "llvm/ADT/StringExtras.h"
25 #include "llvm/IR/CallSite.h"
26 #include "llvm/IR/DataLayout.h"
27 #include "llvm/IR/InlineAsm.h"
28 #include "llvm/IR/Intrinsics.h"
29 #include <sstream>
30 
31 using namespace clang;
32 using namespace CodeGen;
33 using namespace llvm;
34 
35 /// getBuiltinLibFunction - Given a builtin id for a function like
36 /// "__builtin_fabsf", return a Function* for "fabsf".
37 llvm::Value *CodeGenModule::getBuiltinLibFunction(const FunctionDecl *FD,
38                                                   unsigned BuiltinID) {
39   assert(Context.BuiltinInfo.isLibFunction(BuiltinID));
40 
41   // Get the name, skip over the __builtin_ prefix (if necessary).
42   StringRef Name;
43   GlobalDecl D(FD);
44 
45   // If the builtin has been declared explicitly with an assembler label,
46   // use the mangled name. This differs from the plain label on platforms
47   // that prefix labels.
48   if (FD->hasAttr<AsmLabelAttr>())
49     Name = getMangledName(D);
50   else
51     Name = Context.BuiltinInfo.getName(BuiltinID) + 10;
52 
53   llvm::FunctionType *Ty =
54     cast<llvm::FunctionType>(getTypes().ConvertType(FD->getType()));
55 
56   return GetOrCreateLLVMFunction(Name, Ty, D, /*ForVTable=*/false);
57 }
58 
59 /// Emit the conversions required to turn the given value into an
60 /// integer of the given size.
61 static Value *EmitToInt(CodeGenFunction &CGF, llvm::Value *V,
62                         QualType T, llvm::IntegerType *IntType) {
63   V = CGF.EmitToMemory(V, T);
64 
65   if (V->getType()->isPointerTy())
66     return CGF.Builder.CreatePtrToInt(V, IntType);
67 
68   assert(V->getType() == IntType);
69   return V;
70 }
71 
72 static Value *EmitFromInt(CodeGenFunction &CGF, llvm::Value *V,
73                           QualType T, llvm::Type *ResultType) {
74   V = CGF.EmitFromMemory(V, T);
75 
76   if (ResultType->isPointerTy())
77     return CGF.Builder.CreateIntToPtr(V, ResultType);
78 
79   assert(V->getType() == ResultType);
80   return V;
81 }
82 
83 /// Utility to insert an atomic instruction based on Instrinsic::ID
84 /// and the expression node.
85 static Value *MakeBinaryAtomicValue(CodeGenFunction &CGF,
86                                     llvm::AtomicRMWInst::BinOp Kind,
87                                     const CallExpr *E) {
88   QualType T = E->getType();
89   assert(E->getArg(0)->getType()->isPointerType());
90   assert(CGF.getContext().hasSameUnqualifiedType(T,
91                                   E->getArg(0)->getType()->getPointeeType()));
92   assert(CGF.getContext().hasSameUnqualifiedType(T, E->getArg(1)->getType()));
93 
94   llvm::Value *DestPtr = CGF.EmitScalarExpr(E->getArg(0));
95   unsigned AddrSpace = DestPtr->getType()->getPointerAddressSpace();
96 
97   llvm::IntegerType *IntType =
98     llvm::IntegerType::get(CGF.getLLVMContext(),
99                            CGF.getContext().getTypeSize(T));
100   llvm::Type *IntPtrType = IntType->getPointerTo(AddrSpace);
101 
102   llvm::Value *Args[2];
103   Args[0] = CGF.Builder.CreateBitCast(DestPtr, IntPtrType);
104   Args[1] = CGF.EmitScalarExpr(E->getArg(1));
105   llvm::Type *ValueType = Args[1]->getType();
106   Args[1] = EmitToInt(CGF, Args[1], T, IntType);
107 
108   llvm::Value *Result =
109       CGF.Builder.CreateAtomicRMW(Kind, Args[0], Args[1],
110                                   llvm::SequentiallyConsistent);
111   return EmitFromInt(CGF, Result, T, ValueType);
112 }
113 
114 static Value *EmitNontemporalStore(CodeGenFunction &CGF, const CallExpr *E) {
115   Value *Val = CGF.EmitScalarExpr(E->getArg(0));
116   Value *Address = CGF.EmitScalarExpr(E->getArg(1));
117 
118   // Convert the type of the pointer to a pointer to the stored type.
119   Val = CGF.EmitToMemory(Val, E->getArg(0)->getType());
120   Value *BC = CGF.Builder.CreateBitCast(
121       Address, llvm::PointerType::getUnqual(Val->getType()), "cast");
122   LValue LV = CGF.MakeNaturalAlignAddrLValue(BC, E->getArg(0)->getType());
123   LV.setNontemporal(true);
124   CGF.EmitStoreOfScalar(Val, LV, false);
125   return nullptr;
126 }
127 
128 static Value *EmitNontemporalLoad(CodeGenFunction &CGF, const CallExpr *E) {
129   Value *Address = CGF.EmitScalarExpr(E->getArg(0));
130 
131   LValue LV = CGF.MakeNaturalAlignAddrLValue(Address, E->getType());
132   LV.setNontemporal(true);
133   return CGF.EmitLoadOfScalar(LV, E->getExprLoc());
134 }
135 
136 static RValue EmitBinaryAtomic(CodeGenFunction &CGF,
137                                llvm::AtomicRMWInst::BinOp Kind,
138                                const CallExpr *E) {
139   return RValue::get(MakeBinaryAtomicValue(CGF, Kind, E));
140 }
141 
142 /// Utility to insert an atomic instruction based Instrinsic::ID and
143 /// the expression node, where the return value is the result of the
144 /// operation.
145 static RValue EmitBinaryAtomicPost(CodeGenFunction &CGF,
146                                    llvm::AtomicRMWInst::BinOp Kind,
147                                    const CallExpr *E,
148                                    Instruction::BinaryOps Op,
149                                    bool Invert = false) {
150   QualType T = E->getType();
151   assert(E->getArg(0)->getType()->isPointerType());
152   assert(CGF.getContext().hasSameUnqualifiedType(T,
153                                   E->getArg(0)->getType()->getPointeeType()));
154   assert(CGF.getContext().hasSameUnqualifiedType(T, E->getArg(1)->getType()));
155 
156   llvm::Value *DestPtr = CGF.EmitScalarExpr(E->getArg(0));
157   unsigned AddrSpace = DestPtr->getType()->getPointerAddressSpace();
158 
159   llvm::IntegerType *IntType =
160     llvm::IntegerType::get(CGF.getLLVMContext(),
161                            CGF.getContext().getTypeSize(T));
162   llvm::Type *IntPtrType = IntType->getPointerTo(AddrSpace);
163 
164   llvm::Value *Args[2];
165   Args[1] = CGF.EmitScalarExpr(E->getArg(1));
166   llvm::Type *ValueType = Args[1]->getType();
167   Args[1] = EmitToInt(CGF, Args[1], T, IntType);
168   Args[0] = CGF.Builder.CreateBitCast(DestPtr, IntPtrType);
169 
170   llvm::Value *Result =
171       CGF.Builder.CreateAtomicRMW(Kind, Args[0], Args[1],
172                                   llvm::SequentiallyConsistent);
173   Result = CGF.Builder.CreateBinOp(Op, Result, Args[1]);
174   if (Invert)
175     Result = CGF.Builder.CreateBinOp(llvm::Instruction::Xor, Result,
176                                      llvm::ConstantInt::get(IntType, -1));
177   Result = EmitFromInt(CGF, Result, T, ValueType);
178   return RValue::get(Result);
179 }
180 
181 /// @brief Utility to insert an atomic cmpxchg instruction.
182 ///
183 /// @param CGF The current codegen function.
184 /// @param E   Builtin call expression to convert to cmpxchg.
185 ///            arg0 - address to operate on
186 ///            arg1 - value to compare with
187 ///            arg2 - new value
188 /// @param ReturnBool Specifies whether to return success flag of
189 ///                   cmpxchg result or the old value.
190 ///
191 /// @returns result of cmpxchg, according to ReturnBool
192 static Value *MakeAtomicCmpXchgValue(CodeGenFunction &CGF, const CallExpr *E,
193                                      bool ReturnBool) {
194   QualType T = ReturnBool ? E->getArg(1)->getType() : E->getType();
195   llvm::Value *DestPtr = CGF.EmitScalarExpr(E->getArg(0));
196   unsigned AddrSpace = DestPtr->getType()->getPointerAddressSpace();
197 
198   llvm::IntegerType *IntType = llvm::IntegerType::get(
199       CGF.getLLVMContext(), CGF.getContext().getTypeSize(T));
200   llvm::Type *IntPtrType = IntType->getPointerTo(AddrSpace);
201 
202   Value *Args[3];
203   Args[0] = CGF.Builder.CreateBitCast(DestPtr, IntPtrType);
204   Args[1] = CGF.EmitScalarExpr(E->getArg(1));
205   llvm::Type *ValueType = Args[1]->getType();
206   Args[1] = EmitToInt(CGF, Args[1], T, IntType);
207   Args[2] = EmitToInt(CGF, CGF.EmitScalarExpr(E->getArg(2)), T, IntType);
208 
209   Value *Pair = CGF.Builder.CreateAtomicCmpXchg(Args[0], Args[1], Args[2],
210                                                 llvm::SequentiallyConsistent,
211                                                 llvm::SequentiallyConsistent);
212   if (ReturnBool)
213     // Extract boolean success flag and zext it to int.
214     return CGF.Builder.CreateZExt(CGF.Builder.CreateExtractValue(Pair, 1),
215                                   CGF.ConvertType(E->getType()));
216   else
217     // Extract old value and emit it using the same type as compare value.
218     return EmitFromInt(CGF, CGF.Builder.CreateExtractValue(Pair, 0), T,
219                        ValueType);
220 }
221 
222 /// EmitFAbs - Emit a call to @llvm.fabs().
223 static Value *EmitFAbs(CodeGenFunction &CGF, Value *V) {
224   Value *F = CGF.CGM.getIntrinsic(Intrinsic::fabs, V->getType());
225   llvm::CallInst *Call = CGF.Builder.CreateCall(F, V);
226   Call->setDoesNotAccessMemory();
227   return Call;
228 }
229 
230 /// Emit the computation of the sign bit for a floating point value. Returns
231 /// the i1 sign bit value.
232 static Value *EmitSignBit(CodeGenFunction &CGF, Value *V) {
233   LLVMContext &C = CGF.CGM.getLLVMContext();
234 
235   llvm::Type *Ty = V->getType();
236   int Width = Ty->getPrimitiveSizeInBits();
237   llvm::Type *IntTy = llvm::IntegerType::get(C, Width);
238   V = CGF.Builder.CreateBitCast(V, IntTy);
239   if (Ty->isPPC_FP128Ty()) {
240     // The higher-order double comes first, and so we need to truncate the
241     // pair to extract the overall sign. The order of the pair is the same
242     // in both little- and big-Endian modes.
243     Width >>= 1;
244     IntTy = llvm::IntegerType::get(C, Width);
245     V = CGF.Builder.CreateTrunc(V, IntTy);
246   }
247   Value *Zero = llvm::Constant::getNullValue(IntTy);
248   return CGF.Builder.CreateICmpSLT(V, Zero);
249 }
250 
251 static RValue emitLibraryCall(CodeGenFunction &CGF, const FunctionDecl *Fn,
252                               const CallExpr *E, llvm::Value *calleeValue) {
253   return CGF.EmitCall(E->getCallee()->getType(), calleeValue, E,
254                       ReturnValueSlot(), Fn);
255 }
256 
257 /// \brief Emit a call to llvm.{sadd,uadd,ssub,usub,smul,umul}.with.overflow.*
258 /// depending on IntrinsicID.
259 ///
260 /// \arg CGF The current codegen function.
261 /// \arg IntrinsicID The ID for the Intrinsic we wish to generate.
262 /// \arg X The first argument to the llvm.*.with.overflow.*.
263 /// \arg Y The second argument to the llvm.*.with.overflow.*.
264 /// \arg Carry The carry returned by the llvm.*.with.overflow.*.
265 /// \returns The result (i.e. sum/product) returned by the intrinsic.
266 static llvm::Value *EmitOverflowIntrinsic(CodeGenFunction &CGF,
267                                           const llvm::Intrinsic::ID IntrinsicID,
268                                           llvm::Value *X, llvm::Value *Y,
269                                           llvm::Value *&Carry) {
270   // Make sure we have integers of the same width.
271   assert(X->getType() == Y->getType() &&
272          "Arguments must be the same type. (Did you forget to make sure both "
273          "arguments have the same integer width?)");
274 
275   llvm::Value *Callee = CGF.CGM.getIntrinsic(IntrinsicID, X->getType());
276   llvm::Value *Tmp = CGF.Builder.CreateCall(Callee, {X, Y});
277   Carry = CGF.Builder.CreateExtractValue(Tmp, 1);
278   return CGF.Builder.CreateExtractValue(Tmp, 0);
279 }
280 
281 Value *CodeGenFunction::EmitVAStartEnd(Value *ArgValue, bool IsStart) {
282   llvm::Type *DestType = Int8PtrTy;
283   if (ArgValue->getType() != DestType)
284     ArgValue =
285         Builder.CreateBitCast(ArgValue, DestType, ArgValue->getName().data());
286 
287   Intrinsic::ID inst = IsStart ? Intrinsic::vastart : Intrinsic::vaend;
288   return Builder.CreateCall(CGM.getIntrinsic(inst), ArgValue);
289 }
290 
291 RValue CodeGenFunction::EmitBuiltinExpr(const FunctionDecl *FD,
292                                         unsigned BuiltinID, const CallExpr *E,
293                                         ReturnValueSlot ReturnValue) {
294   // See if we can constant fold this builtin.  If so, don't emit it at all.
295   Expr::EvalResult Result;
296   if (E->EvaluateAsRValue(Result, CGM.getContext()) &&
297       !Result.hasSideEffects()) {
298     if (Result.Val.isInt())
299       return RValue::get(llvm::ConstantInt::get(getLLVMContext(),
300                                                 Result.Val.getInt()));
301     if (Result.Val.isFloat())
302       return RValue::get(llvm::ConstantFP::get(getLLVMContext(),
303                                                Result.Val.getFloat()));
304   }
305 
306   switch (BuiltinID) {
307   default: break;  // Handle intrinsics and libm functions below.
308   case Builtin::BI__builtin___CFStringMakeConstantString:
309   case Builtin::BI__builtin___NSStringMakeConstantString:
310     return RValue::get(CGM.EmitConstantExpr(E, E->getType(), nullptr));
311   case Builtin::BI__builtin_stdarg_start:
312   case Builtin::BI__builtin_va_start:
313   case Builtin::BI__va_start:
314   case Builtin::BI__builtin_va_end:
315     return RValue::get(
316         EmitVAStartEnd(BuiltinID == Builtin::BI__va_start
317                            ? EmitScalarExpr(E->getArg(0))
318                            : EmitVAListRef(E->getArg(0)).getPointer(),
319                        BuiltinID != Builtin::BI__builtin_va_end));
320   case Builtin::BI__builtin_va_copy: {
321     Value *DstPtr = EmitVAListRef(E->getArg(0)).getPointer();
322     Value *SrcPtr = EmitVAListRef(E->getArg(1)).getPointer();
323 
324     llvm::Type *Type = Int8PtrTy;
325 
326     DstPtr = Builder.CreateBitCast(DstPtr, Type);
327     SrcPtr = Builder.CreateBitCast(SrcPtr, Type);
328     return RValue::get(Builder.CreateCall(CGM.getIntrinsic(Intrinsic::vacopy),
329                                           {DstPtr, SrcPtr}));
330   }
331   case Builtin::BI__builtin_abs:
332   case Builtin::BI__builtin_labs:
333   case Builtin::BI__builtin_llabs: {
334     Value *ArgValue = EmitScalarExpr(E->getArg(0));
335 
336     Value *NegOp = Builder.CreateNeg(ArgValue, "neg");
337     Value *CmpResult =
338     Builder.CreateICmpSGE(ArgValue,
339                           llvm::Constant::getNullValue(ArgValue->getType()),
340                                                             "abscond");
341     Value *Result =
342       Builder.CreateSelect(CmpResult, ArgValue, NegOp, "abs");
343 
344     return RValue::get(Result);
345   }
346   case Builtin::BI__builtin_fabs:
347   case Builtin::BI__builtin_fabsf:
348   case Builtin::BI__builtin_fabsl: {
349     Value *Arg1 = EmitScalarExpr(E->getArg(0));
350     Value *Result = EmitFAbs(*this, Arg1);
351     return RValue::get(Result);
352   }
353   case Builtin::BI__builtin_fmod:
354   case Builtin::BI__builtin_fmodf:
355   case Builtin::BI__builtin_fmodl: {
356     Value *Arg1 = EmitScalarExpr(E->getArg(0));
357     Value *Arg2 = EmitScalarExpr(E->getArg(1));
358     Value *Result = Builder.CreateFRem(Arg1, Arg2, "fmod");
359     return RValue::get(Result);
360   }
361 
362   case Builtin::BI__builtin_conj:
363   case Builtin::BI__builtin_conjf:
364   case Builtin::BI__builtin_conjl: {
365     ComplexPairTy ComplexVal = EmitComplexExpr(E->getArg(0));
366     Value *Real = ComplexVal.first;
367     Value *Imag = ComplexVal.second;
368     Value *Zero =
369       Imag->getType()->isFPOrFPVectorTy()
370         ? llvm::ConstantFP::getZeroValueForNegation(Imag->getType())
371         : llvm::Constant::getNullValue(Imag->getType());
372 
373     Imag = Builder.CreateFSub(Zero, Imag, "sub");
374     return RValue::getComplex(std::make_pair(Real, Imag));
375   }
376   case Builtin::BI__builtin_creal:
377   case Builtin::BI__builtin_crealf:
378   case Builtin::BI__builtin_creall:
379   case Builtin::BIcreal:
380   case Builtin::BIcrealf:
381   case Builtin::BIcreall: {
382     ComplexPairTy ComplexVal = EmitComplexExpr(E->getArg(0));
383     return RValue::get(ComplexVal.first);
384   }
385 
386   case Builtin::BI__builtin_cimag:
387   case Builtin::BI__builtin_cimagf:
388   case Builtin::BI__builtin_cimagl:
389   case Builtin::BIcimag:
390   case Builtin::BIcimagf:
391   case Builtin::BIcimagl: {
392     ComplexPairTy ComplexVal = EmitComplexExpr(E->getArg(0));
393     return RValue::get(ComplexVal.second);
394   }
395 
396   case Builtin::BI__builtin_ctzs:
397   case Builtin::BI__builtin_ctz:
398   case Builtin::BI__builtin_ctzl:
399   case Builtin::BI__builtin_ctzll: {
400     Value *ArgValue = EmitScalarExpr(E->getArg(0));
401 
402     llvm::Type *ArgType = ArgValue->getType();
403     Value *F = CGM.getIntrinsic(Intrinsic::cttz, ArgType);
404 
405     llvm::Type *ResultType = ConvertType(E->getType());
406     Value *ZeroUndef = Builder.getInt1(getTarget().isCLZForZeroUndef());
407     Value *Result = Builder.CreateCall(F, {ArgValue, ZeroUndef});
408     if (Result->getType() != ResultType)
409       Result = Builder.CreateIntCast(Result, ResultType, /*isSigned*/true,
410                                      "cast");
411     return RValue::get(Result);
412   }
413   case Builtin::BI__builtin_clzs:
414   case Builtin::BI__builtin_clz:
415   case Builtin::BI__builtin_clzl:
416   case Builtin::BI__builtin_clzll: {
417     Value *ArgValue = EmitScalarExpr(E->getArg(0));
418 
419     llvm::Type *ArgType = ArgValue->getType();
420     Value *F = CGM.getIntrinsic(Intrinsic::ctlz, ArgType);
421 
422     llvm::Type *ResultType = ConvertType(E->getType());
423     Value *ZeroUndef = Builder.getInt1(getTarget().isCLZForZeroUndef());
424     Value *Result = Builder.CreateCall(F, {ArgValue, ZeroUndef});
425     if (Result->getType() != ResultType)
426       Result = Builder.CreateIntCast(Result, ResultType, /*isSigned*/true,
427                                      "cast");
428     return RValue::get(Result);
429   }
430   case Builtin::BI__builtin_ffs:
431   case Builtin::BI__builtin_ffsl:
432   case Builtin::BI__builtin_ffsll: {
433     // ffs(x) -> x ? cttz(x) + 1 : 0
434     Value *ArgValue = EmitScalarExpr(E->getArg(0));
435 
436     llvm::Type *ArgType = ArgValue->getType();
437     Value *F = CGM.getIntrinsic(Intrinsic::cttz, ArgType);
438 
439     llvm::Type *ResultType = ConvertType(E->getType());
440     Value *Tmp =
441         Builder.CreateAdd(Builder.CreateCall(F, {ArgValue, Builder.getTrue()}),
442                           llvm::ConstantInt::get(ArgType, 1));
443     Value *Zero = llvm::Constant::getNullValue(ArgType);
444     Value *IsZero = Builder.CreateICmpEQ(ArgValue, Zero, "iszero");
445     Value *Result = Builder.CreateSelect(IsZero, Zero, Tmp, "ffs");
446     if (Result->getType() != ResultType)
447       Result = Builder.CreateIntCast(Result, ResultType, /*isSigned*/true,
448                                      "cast");
449     return RValue::get(Result);
450   }
451   case Builtin::BI__builtin_parity:
452   case Builtin::BI__builtin_parityl:
453   case Builtin::BI__builtin_parityll: {
454     // parity(x) -> ctpop(x) & 1
455     Value *ArgValue = EmitScalarExpr(E->getArg(0));
456 
457     llvm::Type *ArgType = ArgValue->getType();
458     Value *F = CGM.getIntrinsic(Intrinsic::ctpop, ArgType);
459 
460     llvm::Type *ResultType = ConvertType(E->getType());
461     Value *Tmp = Builder.CreateCall(F, ArgValue);
462     Value *Result = Builder.CreateAnd(Tmp, llvm::ConstantInt::get(ArgType, 1));
463     if (Result->getType() != ResultType)
464       Result = Builder.CreateIntCast(Result, ResultType, /*isSigned*/true,
465                                      "cast");
466     return RValue::get(Result);
467   }
468   case Builtin::BI__builtin_popcount:
469   case Builtin::BI__builtin_popcountl:
470   case Builtin::BI__builtin_popcountll: {
471     Value *ArgValue = EmitScalarExpr(E->getArg(0));
472 
473     llvm::Type *ArgType = ArgValue->getType();
474     Value *F = CGM.getIntrinsic(Intrinsic::ctpop, ArgType);
475 
476     llvm::Type *ResultType = ConvertType(E->getType());
477     Value *Result = Builder.CreateCall(F, ArgValue);
478     if (Result->getType() != ResultType)
479       Result = Builder.CreateIntCast(Result, ResultType, /*isSigned*/true,
480                                      "cast");
481     return RValue::get(Result);
482   }
483   case Builtin::BI__builtin_unpredictable: {
484     // Always return the argument of __builtin_unpredictable. LLVM does not
485     // handle this builtin. Metadata for this builtin should be added directly
486     // to instructions such as branches or switches that use it.
487     return RValue::get(EmitScalarExpr(E->getArg(0)));
488   }
489   case Builtin::BI__builtin_expect: {
490     Value *ArgValue = EmitScalarExpr(E->getArg(0));
491     llvm::Type *ArgType = ArgValue->getType();
492 
493     Value *ExpectedValue = EmitScalarExpr(E->getArg(1));
494     // Don't generate llvm.expect on -O0 as the backend won't use it for
495     // anything.
496     // Note, we still IRGen ExpectedValue because it could have side-effects.
497     if (CGM.getCodeGenOpts().OptimizationLevel == 0)
498       return RValue::get(ArgValue);
499 
500     Value *FnExpect = CGM.getIntrinsic(Intrinsic::expect, ArgType);
501     Value *Result =
502         Builder.CreateCall(FnExpect, {ArgValue, ExpectedValue}, "expval");
503     return RValue::get(Result);
504   }
505   case Builtin::BI__builtin_assume_aligned: {
506     Value *PtrValue = EmitScalarExpr(E->getArg(0));
507     Value *OffsetValue =
508       (E->getNumArgs() > 2) ? EmitScalarExpr(E->getArg(2)) : nullptr;
509 
510     Value *AlignmentValue = EmitScalarExpr(E->getArg(1));
511     ConstantInt *AlignmentCI = cast<ConstantInt>(AlignmentValue);
512     unsigned Alignment = (unsigned) AlignmentCI->getZExtValue();
513 
514     EmitAlignmentAssumption(PtrValue, Alignment, OffsetValue);
515     return RValue::get(PtrValue);
516   }
517   case Builtin::BI__assume:
518   case Builtin::BI__builtin_assume: {
519     if (E->getArg(0)->HasSideEffects(getContext()))
520       return RValue::get(nullptr);
521 
522     Value *ArgValue = EmitScalarExpr(E->getArg(0));
523     Value *FnAssume = CGM.getIntrinsic(Intrinsic::assume);
524     return RValue::get(Builder.CreateCall(FnAssume, ArgValue));
525   }
526   case Builtin::BI__builtin_bswap16:
527   case Builtin::BI__builtin_bswap32:
528   case Builtin::BI__builtin_bswap64: {
529     Value *ArgValue = EmitScalarExpr(E->getArg(0));
530     llvm::Type *ArgType = ArgValue->getType();
531     Value *F = CGM.getIntrinsic(Intrinsic::bswap, ArgType);
532     return RValue::get(Builder.CreateCall(F, ArgValue));
533   }
534   case Builtin::BI__builtin_object_size: {
535     // We rely on constant folding to deal with expressions with side effects.
536     assert(!E->getArg(0)->HasSideEffects(getContext()) &&
537            "should have been constant folded");
538 
539     // We pass this builtin onto the optimizer so that it can
540     // figure out the object size in more complex cases.
541     llvm::Type *ResType = ConvertType(E->getType());
542 
543     // LLVM only supports 0 and 2, make sure that we pass along that
544     // as a boolean.
545     Value *Ty = EmitScalarExpr(E->getArg(1));
546     ConstantInt *CI = dyn_cast<ConstantInt>(Ty);
547     assert(CI);
548     uint64_t val = CI->getZExtValue();
549     CI = ConstantInt::get(Builder.getInt1Ty(), (val & 0x2) >> 1);
550     // FIXME: Get right address space.
551     llvm::Type *Tys[] = { ResType, Builder.getInt8PtrTy(0) };
552     Value *F = CGM.getIntrinsic(Intrinsic::objectsize, Tys);
553     return RValue::get(
554         Builder.CreateCall(F, {EmitScalarExpr(E->getArg(0)), CI}));
555   }
556   case Builtin::BI__builtin_prefetch: {
557     Value *Locality, *RW, *Address = EmitScalarExpr(E->getArg(0));
558     // FIXME: Technically these constants should of type 'int', yes?
559     RW = (E->getNumArgs() > 1) ? EmitScalarExpr(E->getArg(1)) :
560       llvm::ConstantInt::get(Int32Ty, 0);
561     Locality = (E->getNumArgs() > 2) ? EmitScalarExpr(E->getArg(2)) :
562       llvm::ConstantInt::get(Int32Ty, 3);
563     Value *Data = llvm::ConstantInt::get(Int32Ty, 1);
564     Value *F = CGM.getIntrinsic(Intrinsic::prefetch);
565     return RValue::get(Builder.CreateCall(F, {Address, RW, Locality, Data}));
566   }
567   case Builtin::BI__builtin_readcyclecounter: {
568     Value *F = CGM.getIntrinsic(Intrinsic::readcyclecounter);
569     return RValue::get(Builder.CreateCall(F));
570   }
571   case Builtin::BI__builtin___clear_cache: {
572     Value *Begin = EmitScalarExpr(E->getArg(0));
573     Value *End = EmitScalarExpr(E->getArg(1));
574     Value *F = CGM.getIntrinsic(Intrinsic::clear_cache);
575     return RValue::get(Builder.CreateCall(F, {Begin, End}));
576   }
577   case Builtin::BI__builtin_trap:
578     return RValue::get(EmitTrapCall(Intrinsic::trap));
579   case Builtin::BI__debugbreak:
580     return RValue::get(EmitTrapCall(Intrinsic::debugtrap));
581   case Builtin::BI__builtin_unreachable: {
582     if (SanOpts.has(SanitizerKind::Unreachable)) {
583       SanitizerScope SanScope(this);
584       EmitCheck(std::make_pair(static_cast<llvm::Value *>(Builder.getFalse()),
585                                SanitizerKind::Unreachable),
586                 "builtin_unreachable", EmitCheckSourceLocation(E->getExprLoc()),
587                 None);
588     } else
589       Builder.CreateUnreachable();
590 
591     // We do need to preserve an insertion point.
592     EmitBlock(createBasicBlock("unreachable.cont"));
593 
594     return RValue::get(nullptr);
595   }
596 
597   case Builtin::BI__builtin_powi:
598   case Builtin::BI__builtin_powif:
599   case Builtin::BI__builtin_powil: {
600     Value *Base = EmitScalarExpr(E->getArg(0));
601     Value *Exponent = EmitScalarExpr(E->getArg(1));
602     llvm::Type *ArgType = Base->getType();
603     Value *F = CGM.getIntrinsic(Intrinsic::powi, ArgType);
604     return RValue::get(Builder.CreateCall(F, {Base, Exponent}));
605   }
606 
607   case Builtin::BI__builtin_isgreater:
608   case Builtin::BI__builtin_isgreaterequal:
609   case Builtin::BI__builtin_isless:
610   case Builtin::BI__builtin_islessequal:
611   case Builtin::BI__builtin_islessgreater:
612   case Builtin::BI__builtin_isunordered: {
613     // Ordered comparisons: we know the arguments to these are matching scalar
614     // floating point values.
615     Value *LHS = EmitScalarExpr(E->getArg(0));
616     Value *RHS = EmitScalarExpr(E->getArg(1));
617 
618     switch (BuiltinID) {
619     default: llvm_unreachable("Unknown ordered comparison");
620     case Builtin::BI__builtin_isgreater:
621       LHS = Builder.CreateFCmpOGT(LHS, RHS, "cmp");
622       break;
623     case Builtin::BI__builtin_isgreaterequal:
624       LHS = Builder.CreateFCmpOGE(LHS, RHS, "cmp");
625       break;
626     case Builtin::BI__builtin_isless:
627       LHS = Builder.CreateFCmpOLT(LHS, RHS, "cmp");
628       break;
629     case Builtin::BI__builtin_islessequal:
630       LHS = Builder.CreateFCmpOLE(LHS, RHS, "cmp");
631       break;
632     case Builtin::BI__builtin_islessgreater:
633       LHS = Builder.CreateFCmpONE(LHS, RHS, "cmp");
634       break;
635     case Builtin::BI__builtin_isunordered:
636       LHS = Builder.CreateFCmpUNO(LHS, RHS, "cmp");
637       break;
638     }
639     // ZExt bool to int type.
640     return RValue::get(Builder.CreateZExt(LHS, ConvertType(E->getType())));
641   }
642   case Builtin::BI__builtin_isnan: {
643     Value *V = EmitScalarExpr(E->getArg(0));
644     V = Builder.CreateFCmpUNO(V, V, "cmp");
645     return RValue::get(Builder.CreateZExt(V, ConvertType(E->getType())));
646   }
647 
648   case Builtin::BI__builtin_isinf: {
649     // isinf(x) --> fabs(x) == infinity
650     Value *V = EmitScalarExpr(E->getArg(0));
651     V = EmitFAbs(*this, V);
652 
653     V = Builder.CreateFCmpOEQ(V, ConstantFP::getInfinity(V->getType()),"isinf");
654     return RValue::get(Builder.CreateZExt(V, ConvertType(E->getType())));
655   }
656 
657   case Builtin::BI__builtin_isinf_sign: {
658     // isinf_sign(x) -> fabs(x) == infinity ? (signbit(x) ? -1 : 1) : 0
659     Value *Arg = EmitScalarExpr(E->getArg(0));
660     Value *AbsArg = EmitFAbs(*this, Arg);
661     Value *IsInf = Builder.CreateFCmpOEQ(
662         AbsArg, ConstantFP::getInfinity(Arg->getType()), "isinf");
663     Value *IsNeg = EmitSignBit(*this, Arg);
664 
665     llvm::Type *IntTy = ConvertType(E->getType());
666     Value *Zero = Constant::getNullValue(IntTy);
667     Value *One = ConstantInt::get(IntTy, 1);
668     Value *NegativeOne = ConstantInt::get(IntTy, -1);
669     Value *SignResult = Builder.CreateSelect(IsNeg, NegativeOne, One);
670     Value *Result = Builder.CreateSelect(IsInf, SignResult, Zero);
671     return RValue::get(Result);
672   }
673 
674   case Builtin::BI__builtin_isnormal: {
675     // isnormal(x) --> x == x && fabsf(x) < infinity && fabsf(x) >= float_min
676     Value *V = EmitScalarExpr(E->getArg(0));
677     Value *Eq = Builder.CreateFCmpOEQ(V, V, "iseq");
678 
679     Value *Abs = EmitFAbs(*this, V);
680     Value *IsLessThanInf =
681       Builder.CreateFCmpULT(Abs, ConstantFP::getInfinity(V->getType()),"isinf");
682     APFloat Smallest = APFloat::getSmallestNormalized(
683                    getContext().getFloatTypeSemantics(E->getArg(0)->getType()));
684     Value *IsNormal =
685       Builder.CreateFCmpUGE(Abs, ConstantFP::get(V->getContext(), Smallest),
686                             "isnormal");
687     V = Builder.CreateAnd(Eq, IsLessThanInf, "and");
688     V = Builder.CreateAnd(V, IsNormal, "and");
689     return RValue::get(Builder.CreateZExt(V, ConvertType(E->getType())));
690   }
691 
692   case Builtin::BI__builtin_isfinite: {
693     // isfinite(x) --> x == x && fabs(x) != infinity;
694     Value *V = EmitScalarExpr(E->getArg(0));
695     Value *Eq = Builder.CreateFCmpOEQ(V, V, "iseq");
696 
697     Value *Abs = EmitFAbs(*this, V);
698     Value *IsNotInf =
699       Builder.CreateFCmpUNE(Abs, ConstantFP::getInfinity(V->getType()),"isinf");
700 
701     V = Builder.CreateAnd(Eq, IsNotInf, "and");
702     return RValue::get(Builder.CreateZExt(V, ConvertType(E->getType())));
703   }
704 
705   case Builtin::BI__builtin_fpclassify: {
706     Value *V = EmitScalarExpr(E->getArg(5));
707     llvm::Type *Ty = ConvertType(E->getArg(5)->getType());
708 
709     // Create Result
710     BasicBlock *Begin = Builder.GetInsertBlock();
711     BasicBlock *End = createBasicBlock("fpclassify_end", this->CurFn);
712     Builder.SetInsertPoint(End);
713     PHINode *Result =
714       Builder.CreatePHI(ConvertType(E->getArg(0)->getType()), 4,
715                         "fpclassify_result");
716 
717     // if (V==0) return FP_ZERO
718     Builder.SetInsertPoint(Begin);
719     Value *IsZero = Builder.CreateFCmpOEQ(V, Constant::getNullValue(Ty),
720                                           "iszero");
721     Value *ZeroLiteral = EmitScalarExpr(E->getArg(4));
722     BasicBlock *NotZero = createBasicBlock("fpclassify_not_zero", this->CurFn);
723     Builder.CreateCondBr(IsZero, End, NotZero);
724     Result->addIncoming(ZeroLiteral, Begin);
725 
726     // if (V != V) return FP_NAN
727     Builder.SetInsertPoint(NotZero);
728     Value *IsNan = Builder.CreateFCmpUNO(V, V, "cmp");
729     Value *NanLiteral = EmitScalarExpr(E->getArg(0));
730     BasicBlock *NotNan = createBasicBlock("fpclassify_not_nan", this->CurFn);
731     Builder.CreateCondBr(IsNan, End, NotNan);
732     Result->addIncoming(NanLiteral, NotZero);
733 
734     // if (fabs(V) == infinity) return FP_INFINITY
735     Builder.SetInsertPoint(NotNan);
736     Value *VAbs = EmitFAbs(*this, V);
737     Value *IsInf =
738       Builder.CreateFCmpOEQ(VAbs, ConstantFP::getInfinity(V->getType()),
739                             "isinf");
740     Value *InfLiteral = EmitScalarExpr(E->getArg(1));
741     BasicBlock *NotInf = createBasicBlock("fpclassify_not_inf", this->CurFn);
742     Builder.CreateCondBr(IsInf, End, NotInf);
743     Result->addIncoming(InfLiteral, NotNan);
744 
745     // if (fabs(V) >= MIN_NORMAL) return FP_NORMAL else FP_SUBNORMAL
746     Builder.SetInsertPoint(NotInf);
747     APFloat Smallest = APFloat::getSmallestNormalized(
748         getContext().getFloatTypeSemantics(E->getArg(5)->getType()));
749     Value *IsNormal =
750       Builder.CreateFCmpUGE(VAbs, ConstantFP::get(V->getContext(), Smallest),
751                             "isnormal");
752     Value *NormalResult =
753       Builder.CreateSelect(IsNormal, EmitScalarExpr(E->getArg(2)),
754                            EmitScalarExpr(E->getArg(3)));
755     Builder.CreateBr(End);
756     Result->addIncoming(NormalResult, NotInf);
757 
758     // return Result
759     Builder.SetInsertPoint(End);
760     return RValue::get(Result);
761   }
762 
763   case Builtin::BIalloca:
764   case Builtin::BI_alloca:
765   case Builtin::BI__builtin_alloca: {
766     Value *Size = EmitScalarExpr(E->getArg(0));
767     return RValue::get(Builder.CreateAlloca(Builder.getInt8Ty(), Size));
768   }
769   case Builtin::BIbzero:
770   case Builtin::BI__builtin_bzero: {
771     Address Dest = EmitPointerWithAlignment(E->getArg(0));
772     Value *SizeVal = EmitScalarExpr(E->getArg(1));
773     EmitNonNullArgCheck(RValue::get(Dest.getPointer()), E->getArg(0)->getType(),
774                         E->getArg(0)->getExprLoc(), FD, 0);
775     Builder.CreateMemSet(Dest, Builder.getInt8(0), SizeVal, false);
776     return RValue::get(Dest.getPointer());
777   }
778   case Builtin::BImemcpy:
779   case Builtin::BI__builtin_memcpy: {
780     Address Dest = EmitPointerWithAlignment(E->getArg(0));
781     Address Src = EmitPointerWithAlignment(E->getArg(1));
782     Value *SizeVal = EmitScalarExpr(E->getArg(2));
783     EmitNonNullArgCheck(RValue::get(Dest.getPointer()), E->getArg(0)->getType(),
784                         E->getArg(0)->getExprLoc(), FD, 0);
785     EmitNonNullArgCheck(RValue::get(Src.getPointer()), E->getArg(1)->getType(),
786                         E->getArg(1)->getExprLoc(), FD, 1);
787     Builder.CreateMemCpy(Dest, Src, SizeVal, false);
788     return RValue::get(Dest.getPointer());
789   }
790 
791   case Builtin::BI__builtin___memcpy_chk: {
792     // fold __builtin_memcpy_chk(x, y, cst1, cst2) to memcpy iff cst1<=cst2.
793     llvm::APSInt Size, DstSize;
794     if (!E->getArg(2)->EvaluateAsInt(Size, CGM.getContext()) ||
795         !E->getArg(3)->EvaluateAsInt(DstSize, CGM.getContext()))
796       break;
797     if (Size.ugt(DstSize))
798       break;
799     Address Dest = EmitPointerWithAlignment(E->getArg(0));
800     Address Src = EmitPointerWithAlignment(E->getArg(1));
801     Value *SizeVal = llvm::ConstantInt::get(Builder.getContext(), Size);
802     Builder.CreateMemCpy(Dest, Src, SizeVal, false);
803     return RValue::get(Dest.getPointer());
804   }
805 
806   case Builtin::BI__builtin_objc_memmove_collectable: {
807     Address DestAddr = EmitPointerWithAlignment(E->getArg(0));
808     Address SrcAddr = EmitPointerWithAlignment(E->getArg(1));
809     Value *SizeVal = EmitScalarExpr(E->getArg(2));
810     CGM.getObjCRuntime().EmitGCMemmoveCollectable(*this,
811                                                   DestAddr, SrcAddr, SizeVal);
812     return RValue::get(DestAddr.getPointer());
813   }
814 
815   case Builtin::BI__builtin___memmove_chk: {
816     // fold __builtin_memmove_chk(x, y, cst1, cst2) to memmove iff cst1<=cst2.
817     llvm::APSInt Size, DstSize;
818     if (!E->getArg(2)->EvaluateAsInt(Size, CGM.getContext()) ||
819         !E->getArg(3)->EvaluateAsInt(DstSize, CGM.getContext()))
820       break;
821     if (Size.ugt(DstSize))
822       break;
823     Address Dest = EmitPointerWithAlignment(E->getArg(0));
824     Address Src = EmitPointerWithAlignment(E->getArg(1));
825     Value *SizeVal = llvm::ConstantInt::get(Builder.getContext(), Size);
826     Builder.CreateMemMove(Dest, Src, SizeVal, false);
827     return RValue::get(Dest.getPointer());
828   }
829 
830   case Builtin::BImemmove:
831   case Builtin::BI__builtin_memmove: {
832     Address Dest = EmitPointerWithAlignment(E->getArg(0));
833     Address Src = EmitPointerWithAlignment(E->getArg(1));
834     Value *SizeVal = EmitScalarExpr(E->getArg(2));
835     EmitNonNullArgCheck(RValue::get(Dest.getPointer()), E->getArg(0)->getType(),
836                         E->getArg(0)->getExprLoc(), FD, 0);
837     EmitNonNullArgCheck(RValue::get(Src.getPointer()), E->getArg(1)->getType(),
838                         E->getArg(1)->getExprLoc(), FD, 1);
839     Builder.CreateMemMove(Dest, Src, SizeVal, false);
840     return RValue::get(Dest.getPointer());
841   }
842   case Builtin::BImemset:
843   case Builtin::BI__builtin_memset: {
844     Address Dest = EmitPointerWithAlignment(E->getArg(0));
845     Value *ByteVal = Builder.CreateTrunc(EmitScalarExpr(E->getArg(1)),
846                                          Builder.getInt8Ty());
847     Value *SizeVal = EmitScalarExpr(E->getArg(2));
848     EmitNonNullArgCheck(RValue::get(Dest.getPointer()), E->getArg(0)->getType(),
849                         E->getArg(0)->getExprLoc(), FD, 0);
850     Builder.CreateMemSet(Dest, ByteVal, SizeVal, false);
851     return RValue::get(Dest.getPointer());
852   }
853   case Builtin::BI__builtin___memset_chk: {
854     // fold __builtin_memset_chk(x, y, cst1, cst2) to memset iff cst1<=cst2.
855     llvm::APSInt Size, DstSize;
856     if (!E->getArg(2)->EvaluateAsInt(Size, CGM.getContext()) ||
857         !E->getArg(3)->EvaluateAsInt(DstSize, CGM.getContext()))
858       break;
859     if (Size.ugt(DstSize))
860       break;
861     Address Dest = EmitPointerWithAlignment(E->getArg(0));
862     Value *ByteVal = Builder.CreateTrunc(EmitScalarExpr(E->getArg(1)),
863                                          Builder.getInt8Ty());
864     Value *SizeVal = llvm::ConstantInt::get(Builder.getContext(), Size);
865     Builder.CreateMemSet(Dest, ByteVal, SizeVal, false);
866     return RValue::get(Dest.getPointer());
867   }
868   case Builtin::BI__builtin_dwarf_cfa: {
869     // The offset in bytes from the first argument to the CFA.
870     //
871     // Why on earth is this in the frontend?  Is there any reason at
872     // all that the backend can't reasonably determine this while
873     // lowering llvm.eh.dwarf.cfa()?
874     //
875     // TODO: If there's a satisfactory reason, add a target hook for
876     // this instead of hard-coding 0, which is correct for most targets.
877     int32_t Offset = 0;
878 
879     Value *F = CGM.getIntrinsic(Intrinsic::eh_dwarf_cfa);
880     return RValue::get(Builder.CreateCall(F,
881                                       llvm::ConstantInt::get(Int32Ty, Offset)));
882   }
883   case Builtin::BI__builtin_return_address: {
884     Value *Depth =
885         CGM.EmitConstantExpr(E->getArg(0), getContext().UnsignedIntTy, this);
886     Value *F = CGM.getIntrinsic(Intrinsic::returnaddress);
887     return RValue::get(Builder.CreateCall(F, Depth));
888   }
889   case Builtin::BI__builtin_frame_address: {
890     Value *Depth =
891         CGM.EmitConstantExpr(E->getArg(0), getContext().UnsignedIntTy, this);
892     Value *F = CGM.getIntrinsic(Intrinsic::frameaddress);
893     return RValue::get(Builder.CreateCall(F, Depth));
894   }
895   case Builtin::BI__builtin_extract_return_addr: {
896     Value *Address = EmitScalarExpr(E->getArg(0));
897     Value *Result = getTargetHooks().decodeReturnAddress(*this, Address);
898     return RValue::get(Result);
899   }
900   case Builtin::BI__builtin_frob_return_addr: {
901     Value *Address = EmitScalarExpr(E->getArg(0));
902     Value *Result = getTargetHooks().encodeReturnAddress(*this, Address);
903     return RValue::get(Result);
904   }
905   case Builtin::BI__builtin_dwarf_sp_column: {
906     llvm::IntegerType *Ty
907       = cast<llvm::IntegerType>(ConvertType(E->getType()));
908     int Column = getTargetHooks().getDwarfEHStackPointer(CGM);
909     if (Column == -1) {
910       CGM.ErrorUnsupported(E, "__builtin_dwarf_sp_column");
911       return RValue::get(llvm::UndefValue::get(Ty));
912     }
913     return RValue::get(llvm::ConstantInt::get(Ty, Column, true));
914   }
915   case Builtin::BI__builtin_init_dwarf_reg_size_table: {
916     Value *Address = EmitScalarExpr(E->getArg(0));
917     if (getTargetHooks().initDwarfEHRegSizeTable(*this, Address))
918       CGM.ErrorUnsupported(E, "__builtin_init_dwarf_reg_size_table");
919     return RValue::get(llvm::UndefValue::get(ConvertType(E->getType())));
920   }
921   case Builtin::BI__builtin_eh_return: {
922     Value *Int = EmitScalarExpr(E->getArg(0));
923     Value *Ptr = EmitScalarExpr(E->getArg(1));
924 
925     llvm::IntegerType *IntTy = cast<llvm::IntegerType>(Int->getType());
926     assert((IntTy->getBitWidth() == 32 || IntTy->getBitWidth() == 64) &&
927            "LLVM's __builtin_eh_return only supports 32- and 64-bit variants");
928     Value *F = CGM.getIntrinsic(IntTy->getBitWidth() == 32
929                                   ? Intrinsic::eh_return_i32
930                                   : Intrinsic::eh_return_i64);
931     Builder.CreateCall(F, {Int, Ptr});
932     Builder.CreateUnreachable();
933 
934     // We do need to preserve an insertion point.
935     EmitBlock(createBasicBlock("builtin_eh_return.cont"));
936 
937     return RValue::get(nullptr);
938   }
939   case Builtin::BI__builtin_unwind_init: {
940     Value *F = CGM.getIntrinsic(Intrinsic::eh_unwind_init);
941     return RValue::get(Builder.CreateCall(F));
942   }
943   case Builtin::BI__builtin_extend_pointer: {
944     // Extends a pointer to the size of an _Unwind_Word, which is
945     // uint64_t on all platforms.  Generally this gets poked into a
946     // register and eventually used as an address, so if the
947     // addressing registers are wider than pointers and the platform
948     // doesn't implicitly ignore high-order bits when doing
949     // addressing, we need to make sure we zext / sext based on
950     // the platform's expectations.
951     //
952     // See: http://gcc.gnu.org/ml/gcc-bugs/2002-02/msg00237.html
953 
954     // Cast the pointer to intptr_t.
955     Value *Ptr = EmitScalarExpr(E->getArg(0));
956     Value *Result = Builder.CreatePtrToInt(Ptr, IntPtrTy, "extend.cast");
957 
958     // If that's 64 bits, we're done.
959     if (IntPtrTy->getBitWidth() == 64)
960       return RValue::get(Result);
961 
962     // Otherwise, ask the codegen data what to do.
963     if (getTargetHooks().extendPointerWithSExt())
964       return RValue::get(Builder.CreateSExt(Result, Int64Ty, "extend.sext"));
965     else
966       return RValue::get(Builder.CreateZExt(Result, Int64Ty, "extend.zext"));
967   }
968   case Builtin::BI__builtin_setjmp: {
969     // Buffer is a void**.
970     Address Buf = EmitPointerWithAlignment(E->getArg(0));
971 
972     // Store the frame pointer to the setjmp buffer.
973     Value *FrameAddr =
974       Builder.CreateCall(CGM.getIntrinsic(Intrinsic::frameaddress),
975                          ConstantInt::get(Int32Ty, 0));
976     Builder.CreateStore(FrameAddr, Buf);
977 
978     // Store the stack pointer to the setjmp buffer.
979     Value *StackAddr =
980         Builder.CreateCall(CGM.getIntrinsic(Intrinsic::stacksave));
981     Address StackSaveSlot =
982       Builder.CreateConstInBoundsGEP(Buf, 2, getPointerSize());
983     Builder.CreateStore(StackAddr, StackSaveSlot);
984 
985     // Call LLVM's EH setjmp, which is lightweight.
986     Value *F = CGM.getIntrinsic(Intrinsic::eh_sjlj_setjmp);
987     Buf = Builder.CreateBitCast(Buf, Int8PtrTy);
988     return RValue::get(Builder.CreateCall(F, Buf.getPointer()));
989   }
990   case Builtin::BI__builtin_longjmp: {
991     Value *Buf = EmitScalarExpr(E->getArg(0));
992     Buf = Builder.CreateBitCast(Buf, Int8PtrTy);
993 
994     // Call LLVM's EH longjmp, which is lightweight.
995     Builder.CreateCall(CGM.getIntrinsic(Intrinsic::eh_sjlj_longjmp), Buf);
996 
997     // longjmp doesn't return; mark this as unreachable.
998     Builder.CreateUnreachable();
999 
1000     // We do need to preserve an insertion point.
1001     EmitBlock(createBasicBlock("longjmp.cont"));
1002 
1003     return RValue::get(nullptr);
1004   }
1005   case Builtin::BI__sync_fetch_and_add:
1006   case Builtin::BI__sync_fetch_and_sub:
1007   case Builtin::BI__sync_fetch_and_or:
1008   case Builtin::BI__sync_fetch_and_and:
1009   case Builtin::BI__sync_fetch_and_xor:
1010   case Builtin::BI__sync_fetch_and_nand:
1011   case Builtin::BI__sync_add_and_fetch:
1012   case Builtin::BI__sync_sub_and_fetch:
1013   case Builtin::BI__sync_and_and_fetch:
1014   case Builtin::BI__sync_or_and_fetch:
1015   case Builtin::BI__sync_xor_and_fetch:
1016   case Builtin::BI__sync_nand_and_fetch:
1017   case Builtin::BI__sync_val_compare_and_swap:
1018   case Builtin::BI__sync_bool_compare_and_swap:
1019   case Builtin::BI__sync_lock_test_and_set:
1020   case Builtin::BI__sync_lock_release:
1021   case Builtin::BI__sync_swap:
1022     llvm_unreachable("Shouldn't make it through sema");
1023   case Builtin::BI__sync_fetch_and_add_1:
1024   case Builtin::BI__sync_fetch_and_add_2:
1025   case Builtin::BI__sync_fetch_and_add_4:
1026   case Builtin::BI__sync_fetch_and_add_8:
1027   case Builtin::BI__sync_fetch_and_add_16:
1028     return EmitBinaryAtomic(*this, llvm::AtomicRMWInst::Add, E);
1029   case Builtin::BI__sync_fetch_and_sub_1:
1030   case Builtin::BI__sync_fetch_and_sub_2:
1031   case Builtin::BI__sync_fetch_and_sub_4:
1032   case Builtin::BI__sync_fetch_and_sub_8:
1033   case Builtin::BI__sync_fetch_and_sub_16:
1034     return EmitBinaryAtomic(*this, llvm::AtomicRMWInst::Sub, E);
1035   case Builtin::BI__sync_fetch_and_or_1:
1036   case Builtin::BI__sync_fetch_and_or_2:
1037   case Builtin::BI__sync_fetch_and_or_4:
1038   case Builtin::BI__sync_fetch_and_or_8:
1039   case Builtin::BI__sync_fetch_and_or_16:
1040     return EmitBinaryAtomic(*this, llvm::AtomicRMWInst::Or, E);
1041   case Builtin::BI__sync_fetch_and_and_1:
1042   case Builtin::BI__sync_fetch_and_and_2:
1043   case Builtin::BI__sync_fetch_and_and_4:
1044   case Builtin::BI__sync_fetch_and_and_8:
1045   case Builtin::BI__sync_fetch_and_and_16:
1046     return EmitBinaryAtomic(*this, llvm::AtomicRMWInst::And, E);
1047   case Builtin::BI__sync_fetch_and_xor_1:
1048   case Builtin::BI__sync_fetch_and_xor_2:
1049   case Builtin::BI__sync_fetch_and_xor_4:
1050   case Builtin::BI__sync_fetch_and_xor_8:
1051   case Builtin::BI__sync_fetch_and_xor_16:
1052     return EmitBinaryAtomic(*this, llvm::AtomicRMWInst::Xor, E);
1053   case Builtin::BI__sync_fetch_and_nand_1:
1054   case Builtin::BI__sync_fetch_and_nand_2:
1055   case Builtin::BI__sync_fetch_and_nand_4:
1056   case Builtin::BI__sync_fetch_and_nand_8:
1057   case Builtin::BI__sync_fetch_and_nand_16:
1058     return EmitBinaryAtomic(*this, llvm::AtomicRMWInst::Nand, E);
1059 
1060   // Clang extensions: not overloaded yet.
1061   case Builtin::BI__sync_fetch_and_min:
1062     return EmitBinaryAtomic(*this, llvm::AtomicRMWInst::Min, E);
1063   case Builtin::BI__sync_fetch_and_max:
1064     return EmitBinaryAtomic(*this, llvm::AtomicRMWInst::Max, E);
1065   case Builtin::BI__sync_fetch_and_umin:
1066     return EmitBinaryAtomic(*this, llvm::AtomicRMWInst::UMin, E);
1067   case Builtin::BI__sync_fetch_and_umax:
1068     return EmitBinaryAtomic(*this, llvm::AtomicRMWInst::UMax, E);
1069 
1070   case Builtin::BI__sync_add_and_fetch_1:
1071   case Builtin::BI__sync_add_and_fetch_2:
1072   case Builtin::BI__sync_add_and_fetch_4:
1073   case Builtin::BI__sync_add_and_fetch_8:
1074   case Builtin::BI__sync_add_and_fetch_16:
1075     return EmitBinaryAtomicPost(*this, llvm::AtomicRMWInst::Add, E,
1076                                 llvm::Instruction::Add);
1077   case Builtin::BI__sync_sub_and_fetch_1:
1078   case Builtin::BI__sync_sub_and_fetch_2:
1079   case Builtin::BI__sync_sub_and_fetch_4:
1080   case Builtin::BI__sync_sub_and_fetch_8:
1081   case Builtin::BI__sync_sub_and_fetch_16:
1082     return EmitBinaryAtomicPost(*this, llvm::AtomicRMWInst::Sub, E,
1083                                 llvm::Instruction::Sub);
1084   case Builtin::BI__sync_and_and_fetch_1:
1085   case Builtin::BI__sync_and_and_fetch_2:
1086   case Builtin::BI__sync_and_and_fetch_4:
1087   case Builtin::BI__sync_and_and_fetch_8:
1088   case Builtin::BI__sync_and_and_fetch_16:
1089     return EmitBinaryAtomicPost(*this, llvm::AtomicRMWInst::And, E,
1090                                 llvm::Instruction::And);
1091   case Builtin::BI__sync_or_and_fetch_1:
1092   case Builtin::BI__sync_or_and_fetch_2:
1093   case Builtin::BI__sync_or_and_fetch_4:
1094   case Builtin::BI__sync_or_and_fetch_8:
1095   case Builtin::BI__sync_or_and_fetch_16:
1096     return EmitBinaryAtomicPost(*this, llvm::AtomicRMWInst::Or, E,
1097                                 llvm::Instruction::Or);
1098   case Builtin::BI__sync_xor_and_fetch_1:
1099   case Builtin::BI__sync_xor_and_fetch_2:
1100   case Builtin::BI__sync_xor_and_fetch_4:
1101   case Builtin::BI__sync_xor_and_fetch_8:
1102   case Builtin::BI__sync_xor_and_fetch_16:
1103     return EmitBinaryAtomicPost(*this, llvm::AtomicRMWInst::Xor, E,
1104                                 llvm::Instruction::Xor);
1105   case Builtin::BI__sync_nand_and_fetch_1:
1106   case Builtin::BI__sync_nand_and_fetch_2:
1107   case Builtin::BI__sync_nand_and_fetch_4:
1108   case Builtin::BI__sync_nand_and_fetch_8:
1109   case Builtin::BI__sync_nand_and_fetch_16:
1110     return EmitBinaryAtomicPost(*this, llvm::AtomicRMWInst::Nand, E,
1111                                 llvm::Instruction::And, true);
1112 
1113   case Builtin::BI__sync_val_compare_and_swap_1:
1114   case Builtin::BI__sync_val_compare_and_swap_2:
1115   case Builtin::BI__sync_val_compare_and_swap_4:
1116   case Builtin::BI__sync_val_compare_and_swap_8:
1117   case Builtin::BI__sync_val_compare_and_swap_16:
1118     return RValue::get(MakeAtomicCmpXchgValue(*this, E, false));
1119 
1120   case Builtin::BI__sync_bool_compare_and_swap_1:
1121   case Builtin::BI__sync_bool_compare_and_swap_2:
1122   case Builtin::BI__sync_bool_compare_and_swap_4:
1123   case Builtin::BI__sync_bool_compare_and_swap_8:
1124   case Builtin::BI__sync_bool_compare_and_swap_16:
1125     return RValue::get(MakeAtomicCmpXchgValue(*this, E, true));
1126 
1127   case Builtin::BI__sync_swap_1:
1128   case Builtin::BI__sync_swap_2:
1129   case Builtin::BI__sync_swap_4:
1130   case Builtin::BI__sync_swap_8:
1131   case Builtin::BI__sync_swap_16:
1132     return EmitBinaryAtomic(*this, llvm::AtomicRMWInst::Xchg, E);
1133 
1134   case Builtin::BI__sync_lock_test_and_set_1:
1135   case Builtin::BI__sync_lock_test_and_set_2:
1136   case Builtin::BI__sync_lock_test_and_set_4:
1137   case Builtin::BI__sync_lock_test_and_set_8:
1138   case Builtin::BI__sync_lock_test_and_set_16:
1139     return EmitBinaryAtomic(*this, llvm::AtomicRMWInst::Xchg, E);
1140 
1141   case Builtin::BI__sync_lock_release_1:
1142   case Builtin::BI__sync_lock_release_2:
1143   case Builtin::BI__sync_lock_release_4:
1144   case Builtin::BI__sync_lock_release_8:
1145   case Builtin::BI__sync_lock_release_16: {
1146     Value *Ptr = EmitScalarExpr(E->getArg(0));
1147     QualType ElTy = E->getArg(0)->getType()->getPointeeType();
1148     CharUnits StoreSize = getContext().getTypeSizeInChars(ElTy);
1149     llvm::Type *ITy = llvm::IntegerType::get(getLLVMContext(),
1150                                              StoreSize.getQuantity() * 8);
1151     Ptr = Builder.CreateBitCast(Ptr, ITy->getPointerTo());
1152     llvm::StoreInst *Store =
1153       Builder.CreateAlignedStore(llvm::Constant::getNullValue(ITy), Ptr,
1154                                  StoreSize);
1155     Store->setAtomic(llvm::Release);
1156     return RValue::get(nullptr);
1157   }
1158 
1159   case Builtin::BI__sync_synchronize: {
1160     // We assume this is supposed to correspond to a C++0x-style
1161     // sequentially-consistent fence (i.e. this is only usable for
1162     // synchonization, not device I/O or anything like that). This intrinsic
1163     // is really badly designed in the sense that in theory, there isn't
1164     // any way to safely use it... but in practice, it mostly works
1165     // to use it with non-atomic loads and stores to get acquire/release
1166     // semantics.
1167     Builder.CreateFence(llvm::SequentiallyConsistent);
1168     return RValue::get(nullptr);
1169   }
1170 
1171   case Builtin::BI__builtin_nontemporal_load:
1172     return RValue::get(EmitNontemporalLoad(*this, E));
1173   case Builtin::BI__builtin_nontemporal_store:
1174     return RValue::get(EmitNontemporalStore(*this, E));
1175   case Builtin::BI__c11_atomic_is_lock_free:
1176   case Builtin::BI__atomic_is_lock_free: {
1177     // Call "bool __atomic_is_lock_free(size_t size, void *ptr)". For the
1178     // __c11 builtin, ptr is 0 (indicating a properly-aligned object), since
1179     // _Atomic(T) is always properly-aligned.
1180     const char *LibCallName = "__atomic_is_lock_free";
1181     CallArgList Args;
1182     Args.add(RValue::get(EmitScalarExpr(E->getArg(0))),
1183              getContext().getSizeType());
1184     if (BuiltinID == Builtin::BI__atomic_is_lock_free)
1185       Args.add(RValue::get(EmitScalarExpr(E->getArg(1))),
1186                getContext().VoidPtrTy);
1187     else
1188       Args.add(RValue::get(llvm::Constant::getNullValue(VoidPtrTy)),
1189                getContext().VoidPtrTy);
1190     const CGFunctionInfo &FuncInfo =
1191         CGM.getTypes().arrangeFreeFunctionCall(E->getType(), Args,
1192                                                FunctionType::ExtInfo(),
1193                                                RequiredArgs::All);
1194     llvm::FunctionType *FTy = CGM.getTypes().GetFunctionType(FuncInfo);
1195     llvm::Constant *Func = CGM.CreateRuntimeFunction(FTy, LibCallName);
1196     return EmitCall(FuncInfo, Func, ReturnValueSlot(), Args);
1197   }
1198 
1199   case Builtin::BI__atomic_test_and_set: {
1200     // Look at the argument type to determine whether this is a volatile
1201     // operation. The parameter type is always volatile.
1202     QualType PtrTy = E->getArg(0)->IgnoreImpCasts()->getType();
1203     bool Volatile =
1204         PtrTy->castAs<PointerType>()->getPointeeType().isVolatileQualified();
1205 
1206     Value *Ptr = EmitScalarExpr(E->getArg(0));
1207     unsigned AddrSpace = Ptr->getType()->getPointerAddressSpace();
1208     Ptr = Builder.CreateBitCast(Ptr, Int8Ty->getPointerTo(AddrSpace));
1209     Value *NewVal = Builder.getInt8(1);
1210     Value *Order = EmitScalarExpr(E->getArg(1));
1211     if (isa<llvm::ConstantInt>(Order)) {
1212       int ord = cast<llvm::ConstantInt>(Order)->getZExtValue();
1213       AtomicRMWInst *Result = nullptr;
1214       switch (ord) {
1215       case 0:  // memory_order_relaxed
1216       default: // invalid order
1217         Result = Builder.CreateAtomicRMW(llvm::AtomicRMWInst::Xchg,
1218                                          Ptr, NewVal,
1219                                          llvm::Monotonic);
1220         break;
1221       case 1:  // memory_order_consume
1222       case 2:  // memory_order_acquire
1223         Result = Builder.CreateAtomicRMW(llvm::AtomicRMWInst::Xchg,
1224                                          Ptr, NewVal,
1225                                          llvm::Acquire);
1226         break;
1227       case 3:  // memory_order_release
1228         Result = Builder.CreateAtomicRMW(llvm::AtomicRMWInst::Xchg,
1229                                          Ptr, NewVal,
1230                                          llvm::Release);
1231         break;
1232       case 4:  // memory_order_acq_rel
1233         Result = Builder.CreateAtomicRMW(llvm::AtomicRMWInst::Xchg,
1234                                          Ptr, NewVal,
1235                                          llvm::AcquireRelease);
1236         break;
1237       case 5:  // memory_order_seq_cst
1238         Result = Builder.CreateAtomicRMW(llvm::AtomicRMWInst::Xchg,
1239                                          Ptr, NewVal,
1240                                          llvm::SequentiallyConsistent);
1241         break;
1242       }
1243       Result->setVolatile(Volatile);
1244       return RValue::get(Builder.CreateIsNotNull(Result, "tobool"));
1245     }
1246 
1247     llvm::BasicBlock *ContBB = createBasicBlock("atomic.continue", CurFn);
1248 
1249     llvm::BasicBlock *BBs[5] = {
1250       createBasicBlock("monotonic", CurFn),
1251       createBasicBlock("acquire", CurFn),
1252       createBasicBlock("release", CurFn),
1253       createBasicBlock("acqrel", CurFn),
1254       createBasicBlock("seqcst", CurFn)
1255     };
1256     llvm::AtomicOrdering Orders[5] = {
1257       llvm::Monotonic, llvm::Acquire, llvm::Release,
1258       llvm::AcquireRelease, llvm::SequentiallyConsistent
1259     };
1260 
1261     Order = Builder.CreateIntCast(Order, Builder.getInt32Ty(), false);
1262     llvm::SwitchInst *SI = Builder.CreateSwitch(Order, BBs[0]);
1263 
1264     Builder.SetInsertPoint(ContBB);
1265     PHINode *Result = Builder.CreatePHI(Int8Ty, 5, "was_set");
1266 
1267     for (unsigned i = 0; i < 5; ++i) {
1268       Builder.SetInsertPoint(BBs[i]);
1269       AtomicRMWInst *RMW = Builder.CreateAtomicRMW(llvm::AtomicRMWInst::Xchg,
1270                                                    Ptr, NewVal, Orders[i]);
1271       RMW->setVolatile(Volatile);
1272       Result->addIncoming(RMW, BBs[i]);
1273       Builder.CreateBr(ContBB);
1274     }
1275 
1276     SI->addCase(Builder.getInt32(0), BBs[0]);
1277     SI->addCase(Builder.getInt32(1), BBs[1]);
1278     SI->addCase(Builder.getInt32(2), BBs[1]);
1279     SI->addCase(Builder.getInt32(3), BBs[2]);
1280     SI->addCase(Builder.getInt32(4), BBs[3]);
1281     SI->addCase(Builder.getInt32(5), BBs[4]);
1282 
1283     Builder.SetInsertPoint(ContBB);
1284     return RValue::get(Builder.CreateIsNotNull(Result, "tobool"));
1285   }
1286 
1287   case Builtin::BI__atomic_clear: {
1288     QualType PtrTy = E->getArg(0)->IgnoreImpCasts()->getType();
1289     bool Volatile =
1290         PtrTy->castAs<PointerType>()->getPointeeType().isVolatileQualified();
1291 
1292     Address Ptr = EmitPointerWithAlignment(E->getArg(0));
1293     unsigned AddrSpace = Ptr.getPointer()->getType()->getPointerAddressSpace();
1294     Ptr = Builder.CreateBitCast(Ptr, Int8Ty->getPointerTo(AddrSpace));
1295     Value *NewVal = Builder.getInt8(0);
1296     Value *Order = EmitScalarExpr(E->getArg(1));
1297     if (isa<llvm::ConstantInt>(Order)) {
1298       int ord = cast<llvm::ConstantInt>(Order)->getZExtValue();
1299       StoreInst *Store = Builder.CreateStore(NewVal, Ptr, Volatile);
1300       switch (ord) {
1301       case 0:  // memory_order_relaxed
1302       default: // invalid order
1303         Store->setOrdering(llvm::Monotonic);
1304         break;
1305       case 3:  // memory_order_release
1306         Store->setOrdering(llvm::Release);
1307         break;
1308       case 5:  // memory_order_seq_cst
1309         Store->setOrdering(llvm::SequentiallyConsistent);
1310         break;
1311       }
1312       return RValue::get(nullptr);
1313     }
1314 
1315     llvm::BasicBlock *ContBB = createBasicBlock("atomic.continue", CurFn);
1316 
1317     llvm::BasicBlock *BBs[3] = {
1318       createBasicBlock("monotonic", CurFn),
1319       createBasicBlock("release", CurFn),
1320       createBasicBlock("seqcst", CurFn)
1321     };
1322     llvm::AtomicOrdering Orders[3] = {
1323       llvm::Monotonic, llvm::Release, llvm::SequentiallyConsistent
1324     };
1325 
1326     Order = Builder.CreateIntCast(Order, Builder.getInt32Ty(), false);
1327     llvm::SwitchInst *SI = Builder.CreateSwitch(Order, BBs[0]);
1328 
1329     for (unsigned i = 0; i < 3; ++i) {
1330       Builder.SetInsertPoint(BBs[i]);
1331       StoreInst *Store = Builder.CreateStore(NewVal, Ptr, Volatile);
1332       Store->setOrdering(Orders[i]);
1333       Builder.CreateBr(ContBB);
1334     }
1335 
1336     SI->addCase(Builder.getInt32(0), BBs[0]);
1337     SI->addCase(Builder.getInt32(3), BBs[1]);
1338     SI->addCase(Builder.getInt32(5), BBs[2]);
1339 
1340     Builder.SetInsertPoint(ContBB);
1341     return RValue::get(nullptr);
1342   }
1343 
1344   case Builtin::BI__atomic_thread_fence:
1345   case Builtin::BI__atomic_signal_fence:
1346   case Builtin::BI__c11_atomic_thread_fence:
1347   case Builtin::BI__c11_atomic_signal_fence: {
1348     llvm::SynchronizationScope Scope;
1349     if (BuiltinID == Builtin::BI__atomic_signal_fence ||
1350         BuiltinID == Builtin::BI__c11_atomic_signal_fence)
1351       Scope = llvm::SingleThread;
1352     else
1353       Scope = llvm::CrossThread;
1354     Value *Order = EmitScalarExpr(E->getArg(0));
1355     if (isa<llvm::ConstantInt>(Order)) {
1356       int ord = cast<llvm::ConstantInt>(Order)->getZExtValue();
1357       switch (ord) {
1358       case 0:  // memory_order_relaxed
1359       default: // invalid order
1360         break;
1361       case 1:  // memory_order_consume
1362       case 2:  // memory_order_acquire
1363         Builder.CreateFence(llvm::Acquire, Scope);
1364         break;
1365       case 3:  // memory_order_release
1366         Builder.CreateFence(llvm::Release, Scope);
1367         break;
1368       case 4:  // memory_order_acq_rel
1369         Builder.CreateFence(llvm::AcquireRelease, Scope);
1370         break;
1371       case 5:  // memory_order_seq_cst
1372         Builder.CreateFence(llvm::SequentiallyConsistent, Scope);
1373         break;
1374       }
1375       return RValue::get(nullptr);
1376     }
1377 
1378     llvm::BasicBlock *AcquireBB, *ReleaseBB, *AcqRelBB, *SeqCstBB;
1379     AcquireBB = createBasicBlock("acquire", CurFn);
1380     ReleaseBB = createBasicBlock("release", CurFn);
1381     AcqRelBB = createBasicBlock("acqrel", CurFn);
1382     SeqCstBB = createBasicBlock("seqcst", CurFn);
1383     llvm::BasicBlock *ContBB = createBasicBlock("atomic.continue", CurFn);
1384 
1385     Order = Builder.CreateIntCast(Order, Builder.getInt32Ty(), false);
1386     llvm::SwitchInst *SI = Builder.CreateSwitch(Order, ContBB);
1387 
1388     Builder.SetInsertPoint(AcquireBB);
1389     Builder.CreateFence(llvm::Acquire, Scope);
1390     Builder.CreateBr(ContBB);
1391     SI->addCase(Builder.getInt32(1), AcquireBB);
1392     SI->addCase(Builder.getInt32(2), AcquireBB);
1393 
1394     Builder.SetInsertPoint(ReleaseBB);
1395     Builder.CreateFence(llvm::Release, Scope);
1396     Builder.CreateBr(ContBB);
1397     SI->addCase(Builder.getInt32(3), ReleaseBB);
1398 
1399     Builder.SetInsertPoint(AcqRelBB);
1400     Builder.CreateFence(llvm::AcquireRelease, Scope);
1401     Builder.CreateBr(ContBB);
1402     SI->addCase(Builder.getInt32(4), AcqRelBB);
1403 
1404     Builder.SetInsertPoint(SeqCstBB);
1405     Builder.CreateFence(llvm::SequentiallyConsistent, Scope);
1406     Builder.CreateBr(ContBB);
1407     SI->addCase(Builder.getInt32(5), SeqCstBB);
1408 
1409     Builder.SetInsertPoint(ContBB);
1410     return RValue::get(nullptr);
1411   }
1412 
1413     // Library functions with special handling.
1414   case Builtin::BIsqrt:
1415   case Builtin::BIsqrtf:
1416   case Builtin::BIsqrtl: {
1417     // Transform a call to sqrt* into a @llvm.sqrt.* intrinsic call, but only
1418     // in finite- or unsafe-math mode (the intrinsic has different semantics
1419     // for handling negative numbers compared to the library function, so
1420     // -fmath-errno=0 is not enough).
1421     if (!FD->hasAttr<ConstAttr>())
1422       break;
1423     if (!(CGM.getCodeGenOpts().UnsafeFPMath ||
1424           CGM.getCodeGenOpts().NoNaNsFPMath))
1425       break;
1426     Value *Arg0 = EmitScalarExpr(E->getArg(0));
1427     llvm::Type *ArgType = Arg0->getType();
1428     Value *F = CGM.getIntrinsic(Intrinsic::sqrt, ArgType);
1429     return RValue::get(Builder.CreateCall(F, Arg0));
1430   }
1431 
1432   case Builtin::BI__builtin_pow:
1433   case Builtin::BI__builtin_powf:
1434   case Builtin::BI__builtin_powl:
1435   case Builtin::BIpow:
1436   case Builtin::BIpowf:
1437   case Builtin::BIpowl: {
1438     // Transform a call to pow* into a @llvm.pow.* intrinsic call.
1439     if (!FD->hasAttr<ConstAttr>())
1440       break;
1441     Value *Base = EmitScalarExpr(E->getArg(0));
1442     Value *Exponent = EmitScalarExpr(E->getArg(1));
1443     llvm::Type *ArgType = Base->getType();
1444     Value *F = CGM.getIntrinsic(Intrinsic::pow, ArgType);
1445     return RValue::get(Builder.CreateCall(F, {Base, Exponent}));
1446   }
1447 
1448   case Builtin::BIfma:
1449   case Builtin::BIfmaf:
1450   case Builtin::BIfmal:
1451   case Builtin::BI__builtin_fma:
1452   case Builtin::BI__builtin_fmaf:
1453   case Builtin::BI__builtin_fmal: {
1454     // Rewrite fma to intrinsic.
1455     Value *FirstArg = EmitScalarExpr(E->getArg(0));
1456     llvm::Type *ArgType = FirstArg->getType();
1457     Value *F = CGM.getIntrinsic(Intrinsic::fma, ArgType);
1458     return RValue::get(
1459         Builder.CreateCall(F, {FirstArg, EmitScalarExpr(E->getArg(1)),
1460                                EmitScalarExpr(E->getArg(2))}));
1461   }
1462 
1463   case Builtin::BI__builtin_signbit:
1464   case Builtin::BI__builtin_signbitf:
1465   case Builtin::BI__builtin_signbitl: {
1466     return RValue::get(
1467         Builder.CreateZExt(EmitSignBit(*this, EmitScalarExpr(E->getArg(0))),
1468                            ConvertType(E->getType())));
1469   }
1470   case Builtin::BI__builtin_annotation: {
1471     llvm::Value *AnnVal = EmitScalarExpr(E->getArg(0));
1472     llvm::Value *F = CGM.getIntrinsic(llvm::Intrinsic::annotation,
1473                                       AnnVal->getType());
1474 
1475     // Get the annotation string, go through casts. Sema requires this to be a
1476     // non-wide string literal, potentially casted, so the cast<> is safe.
1477     const Expr *AnnotationStrExpr = E->getArg(1)->IgnoreParenCasts();
1478     StringRef Str = cast<StringLiteral>(AnnotationStrExpr)->getString();
1479     return RValue::get(EmitAnnotationCall(F, AnnVal, Str, E->getExprLoc()));
1480   }
1481   case Builtin::BI__builtin_addcb:
1482   case Builtin::BI__builtin_addcs:
1483   case Builtin::BI__builtin_addc:
1484   case Builtin::BI__builtin_addcl:
1485   case Builtin::BI__builtin_addcll:
1486   case Builtin::BI__builtin_subcb:
1487   case Builtin::BI__builtin_subcs:
1488   case Builtin::BI__builtin_subc:
1489   case Builtin::BI__builtin_subcl:
1490   case Builtin::BI__builtin_subcll: {
1491 
1492     // We translate all of these builtins from expressions of the form:
1493     //   int x = ..., y = ..., carryin = ..., carryout, result;
1494     //   result = __builtin_addc(x, y, carryin, &carryout);
1495     //
1496     // to LLVM IR of the form:
1497     //
1498     //   %tmp1 = call {i32, i1} @llvm.uadd.with.overflow.i32(i32 %x, i32 %y)
1499     //   %tmpsum1 = extractvalue {i32, i1} %tmp1, 0
1500     //   %carry1 = extractvalue {i32, i1} %tmp1, 1
1501     //   %tmp2 = call {i32, i1} @llvm.uadd.with.overflow.i32(i32 %tmpsum1,
1502     //                                                       i32 %carryin)
1503     //   %result = extractvalue {i32, i1} %tmp2, 0
1504     //   %carry2 = extractvalue {i32, i1} %tmp2, 1
1505     //   %tmp3 = or i1 %carry1, %carry2
1506     //   %tmp4 = zext i1 %tmp3 to i32
1507     //   store i32 %tmp4, i32* %carryout
1508 
1509     // Scalarize our inputs.
1510     llvm::Value *X = EmitScalarExpr(E->getArg(0));
1511     llvm::Value *Y = EmitScalarExpr(E->getArg(1));
1512     llvm::Value *Carryin = EmitScalarExpr(E->getArg(2));
1513     Address CarryOutPtr = EmitPointerWithAlignment(E->getArg(3));
1514 
1515     // Decide if we are lowering to a uadd.with.overflow or usub.with.overflow.
1516     llvm::Intrinsic::ID IntrinsicId;
1517     switch (BuiltinID) {
1518     default: llvm_unreachable("Unknown multiprecision builtin id.");
1519     case Builtin::BI__builtin_addcb:
1520     case Builtin::BI__builtin_addcs:
1521     case Builtin::BI__builtin_addc:
1522     case Builtin::BI__builtin_addcl:
1523     case Builtin::BI__builtin_addcll:
1524       IntrinsicId = llvm::Intrinsic::uadd_with_overflow;
1525       break;
1526     case Builtin::BI__builtin_subcb:
1527     case Builtin::BI__builtin_subcs:
1528     case Builtin::BI__builtin_subc:
1529     case Builtin::BI__builtin_subcl:
1530     case Builtin::BI__builtin_subcll:
1531       IntrinsicId = llvm::Intrinsic::usub_with_overflow;
1532       break;
1533     }
1534 
1535     // Construct our resulting LLVM IR expression.
1536     llvm::Value *Carry1;
1537     llvm::Value *Sum1 = EmitOverflowIntrinsic(*this, IntrinsicId,
1538                                               X, Y, Carry1);
1539     llvm::Value *Carry2;
1540     llvm::Value *Sum2 = EmitOverflowIntrinsic(*this, IntrinsicId,
1541                                               Sum1, Carryin, Carry2);
1542     llvm::Value *CarryOut = Builder.CreateZExt(Builder.CreateOr(Carry1, Carry2),
1543                                                X->getType());
1544     Builder.CreateStore(CarryOut, CarryOutPtr);
1545     return RValue::get(Sum2);
1546   }
1547   case Builtin::BI__builtin_uadd_overflow:
1548   case Builtin::BI__builtin_uaddl_overflow:
1549   case Builtin::BI__builtin_uaddll_overflow:
1550   case Builtin::BI__builtin_usub_overflow:
1551   case Builtin::BI__builtin_usubl_overflow:
1552   case Builtin::BI__builtin_usubll_overflow:
1553   case Builtin::BI__builtin_umul_overflow:
1554   case Builtin::BI__builtin_umull_overflow:
1555   case Builtin::BI__builtin_umulll_overflow:
1556   case Builtin::BI__builtin_sadd_overflow:
1557   case Builtin::BI__builtin_saddl_overflow:
1558   case Builtin::BI__builtin_saddll_overflow:
1559   case Builtin::BI__builtin_ssub_overflow:
1560   case Builtin::BI__builtin_ssubl_overflow:
1561   case Builtin::BI__builtin_ssubll_overflow:
1562   case Builtin::BI__builtin_smul_overflow:
1563   case Builtin::BI__builtin_smull_overflow:
1564   case Builtin::BI__builtin_smulll_overflow: {
1565 
1566     // We translate all of these builtins directly to the relevant llvm IR node.
1567 
1568     // Scalarize our inputs.
1569     llvm::Value *X = EmitScalarExpr(E->getArg(0));
1570     llvm::Value *Y = EmitScalarExpr(E->getArg(1));
1571     Address SumOutPtr = EmitPointerWithAlignment(E->getArg(2));
1572 
1573     // Decide which of the overflow intrinsics we are lowering to:
1574     llvm::Intrinsic::ID IntrinsicId;
1575     switch (BuiltinID) {
1576     default: llvm_unreachable("Unknown security overflow builtin id.");
1577     case Builtin::BI__builtin_uadd_overflow:
1578     case Builtin::BI__builtin_uaddl_overflow:
1579     case Builtin::BI__builtin_uaddll_overflow:
1580       IntrinsicId = llvm::Intrinsic::uadd_with_overflow;
1581       break;
1582     case Builtin::BI__builtin_usub_overflow:
1583     case Builtin::BI__builtin_usubl_overflow:
1584     case Builtin::BI__builtin_usubll_overflow:
1585       IntrinsicId = llvm::Intrinsic::usub_with_overflow;
1586       break;
1587     case Builtin::BI__builtin_umul_overflow:
1588     case Builtin::BI__builtin_umull_overflow:
1589     case Builtin::BI__builtin_umulll_overflow:
1590       IntrinsicId = llvm::Intrinsic::umul_with_overflow;
1591       break;
1592     case Builtin::BI__builtin_sadd_overflow:
1593     case Builtin::BI__builtin_saddl_overflow:
1594     case Builtin::BI__builtin_saddll_overflow:
1595       IntrinsicId = llvm::Intrinsic::sadd_with_overflow;
1596       break;
1597     case Builtin::BI__builtin_ssub_overflow:
1598     case Builtin::BI__builtin_ssubl_overflow:
1599     case Builtin::BI__builtin_ssubll_overflow:
1600       IntrinsicId = llvm::Intrinsic::ssub_with_overflow;
1601       break;
1602     case Builtin::BI__builtin_smul_overflow:
1603     case Builtin::BI__builtin_smull_overflow:
1604     case Builtin::BI__builtin_smulll_overflow:
1605       IntrinsicId = llvm::Intrinsic::smul_with_overflow;
1606       break;
1607     }
1608 
1609 
1610     llvm::Value *Carry;
1611     llvm::Value *Sum = EmitOverflowIntrinsic(*this, IntrinsicId, X, Y, Carry);
1612     Builder.CreateStore(Sum, SumOutPtr);
1613 
1614     return RValue::get(Carry);
1615   }
1616   case Builtin::BI__builtin_addressof:
1617     return RValue::get(EmitLValue(E->getArg(0)).getPointer());
1618   case Builtin::BI__builtin_operator_new:
1619     return EmitBuiltinNewDeleteCall(FD->getType()->castAs<FunctionProtoType>(),
1620                                     E->getArg(0), false);
1621   case Builtin::BI__builtin_operator_delete:
1622     return EmitBuiltinNewDeleteCall(FD->getType()->castAs<FunctionProtoType>(),
1623                                     E->getArg(0), true);
1624   case Builtin::BI__noop:
1625     // __noop always evaluates to an integer literal zero.
1626     return RValue::get(ConstantInt::get(IntTy, 0));
1627   case Builtin::BI__builtin_call_with_static_chain: {
1628     const CallExpr *Call = cast<CallExpr>(E->getArg(0));
1629     const Expr *Chain = E->getArg(1);
1630     return EmitCall(Call->getCallee()->getType(),
1631                     EmitScalarExpr(Call->getCallee()), Call, ReturnValue,
1632                     Call->getCalleeDecl(), EmitScalarExpr(Chain));
1633   }
1634   case Builtin::BI_InterlockedExchange:
1635   case Builtin::BI_InterlockedExchangePointer:
1636     return EmitBinaryAtomic(*this, llvm::AtomicRMWInst::Xchg, E);
1637   case Builtin::BI_InterlockedCompareExchangePointer: {
1638     llvm::Type *RTy;
1639     llvm::IntegerType *IntType =
1640       IntegerType::get(getLLVMContext(),
1641                        getContext().getTypeSize(E->getType()));
1642     llvm::Type *IntPtrType = IntType->getPointerTo();
1643 
1644     llvm::Value *Destination =
1645       Builder.CreateBitCast(EmitScalarExpr(E->getArg(0)), IntPtrType);
1646 
1647     llvm::Value *Exchange = EmitScalarExpr(E->getArg(1));
1648     RTy = Exchange->getType();
1649     Exchange = Builder.CreatePtrToInt(Exchange, IntType);
1650 
1651     llvm::Value *Comparand =
1652       Builder.CreatePtrToInt(EmitScalarExpr(E->getArg(2)), IntType);
1653 
1654     auto Result = Builder.CreateAtomicCmpXchg(Destination, Comparand, Exchange,
1655                                               SequentiallyConsistent,
1656                                               SequentiallyConsistent);
1657     Result->setVolatile(true);
1658 
1659     return RValue::get(Builder.CreateIntToPtr(Builder.CreateExtractValue(Result,
1660                                                                          0),
1661                                               RTy));
1662   }
1663   case Builtin::BI_InterlockedCompareExchange: {
1664     AtomicCmpXchgInst *CXI = Builder.CreateAtomicCmpXchg(
1665         EmitScalarExpr(E->getArg(0)),
1666         EmitScalarExpr(E->getArg(2)),
1667         EmitScalarExpr(E->getArg(1)),
1668         SequentiallyConsistent,
1669         SequentiallyConsistent);
1670       CXI->setVolatile(true);
1671       return RValue::get(Builder.CreateExtractValue(CXI, 0));
1672   }
1673   case Builtin::BI_InterlockedIncrement: {
1674     AtomicRMWInst *RMWI = Builder.CreateAtomicRMW(
1675       AtomicRMWInst::Add,
1676       EmitScalarExpr(E->getArg(0)),
1677       ConstantInt::get(Int32Ty, 1),
1678       llvm::SequentiallyConsistent);
1679     RMWI->setVolatile(true);
1680     return RValue::get(Builder.CreateAdd(RMWI, ConstantInt::get(Int32Ty, 1)));
1681   }
1682   case Builtin::BI_InterlockedDecrement: {
1683     AtomicRMWInst *RMWI = Builder.CreateAtomicRMW(
1684       AtomicRMWInst::Sub,
1685       EmitScalarExpr(E->getArg(0)),
1686       ConstantInt::get(Int32Ty, 1),
1687       llvm::SequentiallyConsistent);
1688     RMWI->setVolatile(true);
1689     return RValue::get(Builder.CreateSub(RMWI, ConstantInt::get(Int32Ty, 1)));
1690   }
1691   case Builtin::BI_InterlockedExchangeAdd: {
1692     AtomicRMWInst *RMWI = Builder.CreateAtomicRMW(
1693       AtomicRMWInst::Add,
1694       EmitScalarExpr(E->getArg(0)),
1695       EmitScalarExpr(E->getArg(1)),
1696       llvm::SequentiallyConsistent);
1697     RMWI->setVolatile(true);
1698     return RValue::get(RMWI);
1699   }
1700   case Builtin::BI__readfsdword: {
1701     Value *IntToPtr =
1702       Builder.CreateIntToPtr(EmitScalarExpr(E->getArg(0)),
1703                              llvm::PointerType::get(CGM.Int32Ty, 257));
1704     LoadInst *Load =
1705         Builder.CreateAlignedLoad(IntToPtr, /*Align=*/4, /*isVolatile=*/true);
1706     return RValue::get(Load);
1707   }
1708 
1709   case Builtin::BI__exception_code:
1710   case Builtin::BI_exception_code:
1711     return RValue::get(EmitSEHExceptionCode());
1712   case Builtin::BI__exception_info:
1713   case Builtin::BI_exception_info:
1714     return RValue::get(EmitSEHExceptionInfo());
1715   case Builtin::BI__abnormal_termination:
1716   case Builtin::BI_abnormal_termination:
1717     return RValue::get(EmitSEHAbnormalTermination());
1718   case Builtin::BI_setjmpex: {
1719     if (getTarget().getTriple().isOSMSVCRT()) {
1720       llvm::Type *ArgTypes[] = {Int8PtrTy, Int8PtrTy};
1721       llvm::AttributeSet ReturnsTwiceAttr =
1722           AttributeSet::get(getLLVMContext(), llvm::AttributeSet::FunctionIndex,
1723                             llvm::Attribute::ReturnsTwice);
1724       llvm::Constant *SetJmpEx = CGM.CreateRuntimeFunction(
1725           llvm::FunctionType::get(IntTy, ArgTypes, /*isVarArg=*/false),
1726           "_setjmpex", ReturnsTwiceAttr);
1727       llvm::Value *Buf = Builder.CreateBitOrPointerCast(
1728           EmitScalarExpr(E->getArg(0)), Int8PtrTy);
1729       llvm::Value *FrameAddr =
1730           Builder.CreateCall(CGM.getIntrinsic(Intrinsic::frameaddress),
1731                              ConstantInt::get(Int32Ty, 0));
1732       llvm::Value *Args[] = {Buf, FrameAddr};
1733       llvm::CallSite CS = EmitRuntimeCallOrInvoke(SetJmpEx, Args);
1734       CS.setAttributes(ReturnsTwiceAttr);
1735       return RValue::get(CS.getInstruction());
1736     }
1737     break;
1738   }
1739   case Builtin::BI_setjmp: {
1740     if (getTarget().getTriple().isOSMSVCRT()) {
1741       llvm::AttributeSet ReturnsTwiceAttr =
1742           AttributeSet::get(getLLVMContext(), llvm::AttributeSet::FunctionIndex,
1743                             llvm::Attribute::ReturnsTwice);
1744       llvm::Value *Buf = Builder.CreateBitOrPointerCast(
1745           EmitScalarExpr(E->getArg(0)), Int8PtrTy);
1746       llvm::CallSite CS;
1747       if (getTarget().getTriple().getArch() == llvm::Triple::x86) {
1748         llvm::Type *ArgTypes[] = {Int8PtrTy, IntTy};
1749         llvm::Constant *SetJmp3 = CGM.CreateRuntimeFunction(
1750             llvm::FunctionType::get(IntTy, ArgTypes, /*isVarArg=*/true),
1751             "_setjmp3", ReturnsTwiceAttr);
1752         llvm::Value *Count = ConstantInt::get(IntTy, 0);
1753         llvm::Value *Args[] = {Buf, Count};
1754         CS = EmitRuntimeCallOrInvoke(SetJmp3, Args);
1755       } else {
1756         llvm::Type *ArgTypes[] = {Int8PtrTy, Int8PtrTy};
1757         llvm::Constant *SetJmp = CGM.CreateRuntimeFunction(
1758             llvm::FunctionType::get(IntTy, ArgTypes, /*isVarArg=*/false),
1759             "_setjmp", ReturnsTwiceAttr);
1760         llvm::Value *FrameAddr =
1761             Builder.CreateCall(CGM.getIntrinsic(Intrinsic::frameaddress),
1762                                ConstantInt::get(Int32Ty, 0));
1763         llvm::Value *Args[] = {Buf, FrameAddr};
1764         CS = EmitRuntimeCallOrInvoke(SetJmp, Args);
1765       }
1766       CS.setAttributes(ReturnsTwiceAttr);
1767       return RValue::get(CS.getInstruction());
1768     }
1769     break;
1770   }
1771 
1772   case Builtin::BI__GetExceptionInfo: {
1773     if (llvm::GlobalVariable *GV =
1774             CGM.getCXXABI().getThrowInfo(FD->getParamDecl(0)->getType()))
1775       return RValue::get(llvm::ConstantExpr::getBitCast(GV, CGM.Int8PtrTy));
1776     break;
1777   }
1778   }
1779 
1780   // If this is an alias for a lib function (e.g. __builtin_sin), emit
1781   // the call using the normal call path, but using the unmangled
1782   // version of the function name.
1783   if (getContext().BuiltinInfo.isLibFunction(BuiltinID))
1784     return emitLibraryCall(*this, FD, E,
1785                            CGM.getBuiltinLibFunction(FD, BuiltinID));
1786 
1787   // If this is a predefined lib function (e.g. malloc), emit the call
1788   // using exactly the normal call path.
1789   if (getContext().BuiltinInfo.isPredefinedLibFunction(BuiltinID))
1790     return emitLibraryCall(*this, FD, E, EmitScalarExpr(E->getCallee()));
1791 
1792   // See if we have a target specific intrinsic.
1793   const char *Name = getContext().BuiltinInfo.getName(BuiltinID);
1794   Intrinsic::ID IntrinsicID = Intrinsic::not_intrinsic;
1795   if (const char *Prefix =
1796           llvm::Triple::getArchTypePrefix(getTarget().getTriple().getArch())) {
1797     IntrinsicID = Intrinsic::getIntrinsicForGCCBuiltin(Prefix, Name);
1798     // NOTE we dont need to perform a compatibility flag check here since the
1799     // intrinsics are declared in Builtins*.def via LANGBUILTIN which filter the
1800     // MS builtins via ALL_MS_LANGUAGES and are filtered earlier.
1801     if (IntrinsicID == Intrinsic::not_intrinsic)
1802       IntrinsicID = Intrinsic::getIntrinsicForMSBuiltin(Prefix, Name);
1803   }
1804 
1805   if (IntrinsicID != Intrinsic::not_intrinsic) {
1806     SmallVector<Value*, 16> Args;
1807 
1808     // Find out if any arguments are required to be integer constant
1809     // expressions.
1810     unsigned ICEArguments = 0;
1811     ASTContext::GetBuiltinTypeError Error;
1812     getContext().GetBuiltinType(BuiltinID, Error, &ICEArguments);
1813     assert(Error == ASTContext::GE_None && "Should not codegen an error");
1814 
1815     Function *F = CGM.getIntrinsic(IntrinsicID);
1816     llvm::FunctionType *FTy = F->getFunctionType();
1817 
1818     for (unsigned i = 0, e = E->getNumArgs(); i != e; ++i) {
1819       Value *ArgValue;
1820       // If this is a normal argument, just emit it as a scalar.
1821       if ((ICEArguments & (1 << i)) == 0) {
1822         ArgValue = EmitScalarExpr(E->getArg(i));
1823       } else {
1824         // If this is required to be a constant, constant fold it so that we
1825         // know that the generated intrinsic gets a ConstantInt.
1826         llvm::APSInt Result;
1827         bool IsConst = E->getArg(i)->isIntegerConstantExpr(Result,getContext());
1828         assert(IsConst && "Constant arg isn't actually constant?");
1829         (void)IsConst;
1830         ArgValue = llvm::ConstantInt::get(getLLVMContext(), Result);
1831       }
1832 
1833       // If the intrinsic arg type is different from the builtin arg type
1834       // we need to do a bit cast.
1835       llvm::Type *PTy = FTy->getParamType(i);
1836       if (PTy != ArgValue->getType()) {
1837         assert(PTy->canLosslesslyBitCastTo(FTy->getParamType(i)) &&
1838                "Must be able to losslessly bit cast to param");
1839         ArgValue = Builder.CreateBitCast(ArgValue, PTy);
1840       }
1841 
1842       Args.push_back(ArgValue);
1843     }
1844 
1845     Value *V = Builder.CreateCall(F, Args);
1846     QualType BuiltinRetType = E->getType();
1847 
1848     llvm::Type *RetTy = VoidTy;
1849     if (!BuiltinRetType->isVoidType())
1850       RetTy = ConvertType(BuiltinRetType);
1851 
1852     if (RetTy != V->getType()) {
1853       assert(V->getType()->canLosslesslyBitCastTo(RetTy) &&
1854              "Must be able to losslessly bit cast result type");
1855       V = Builder.CreateBitCast(V, RetTy);
1856     }
1857 
1858     return RValue::get(V);
1859   }
1860 
1861   // See if we have a target specific builtin that needs to be lowered.
1862   if (Value *V = EmitTargetBuiltinExpr(BuiltinID, E))
1863     return RValue::get(V);
1864 
1865   ErrorUnsupported(E, "builtin function");
1866 
1867   // Unknown builtin, for now just dump it out and return undef.
1868   return GetUndefRValue(E->getType());
1869 }
1870 
1871 static Value *EmitTargetArchBuiltinExpr(CodeGenFunction *CGF,
1872                                         unsigned BuiltinID, const CallExpr *E,
1873                                         llvm::Triple::ArchType Arch) {
1874   switch (Arch) {
1875   case llvm::Triple::arm:
1876   case llvm::Triple::armeb:
1877   case llvm::Triple::thumb:
1878   case llvm::Triple::thumbeb:
1879     return CGF->EmitARMBuiltinExpr(BuiltinID, E);
1880   case llvm::Triple::aarch64:
1881   case llvm::Triple::aarch64_be:
1882     return CGF->EmitAArch64BuiltinExpr(BuiltinID, E);
1883   case llvm::Triple::x86:
1884   case llvm::Triple::x86_64:
1885     return CGF->EmitX86BuiltinExpr(BuiltinID, E);
1886   case llvm::Triple::ppc:
1887   case llvm::Triple::ppc64:
1888   case llvm::Triple::ppc64le:
1889     return CGF->EmitPPCBuiltinExpr(BuiltinID, E);
1890   case llvm::Triple::r600:
1891   case llvm::Triple::amdgcn:
1892     return CGF->EmitAMDGPUBuiltinExpr(BuiltinID, E);
1893   case llvm::Triple::systemz:
1894     return CGF->EmitSystemZBuiltinExpr(BuiltinID, E);
1895   case llvm::Triple::nvptx:
1896   case llvm::Triple::nvptx64:
1897     return CGF->EmitNVPTXBuiltinExpr(BuiltinID, E);
1898   case llvm::Triple::wasm32:
1899   case llvm::Triple::wasm64:
1900     return CGF->EmitWebAssemblyBuiltinExpr(BuiltinID, E);
1901   default:
1902     return nullptr;
1903   }
1904 }
1905 
1906 Value *CodeGenFunction::EmitTargetBuiltinExpr(unsigned BuiltinID,
1907                                               const CallExpr *E) {
1908   if (getContext().BuiltinInfo.isAuxBuiltinID(BuiltinID)) {
1909     assert(getContext().getAuxTargetInfo() && "Missing aux target info");
1910     return EmitTargetArchBuiltinExpr(
1911         this, getContext().BuiltinInfo.getAuxBuiltinID(BuiltinID), E,
1912         getContext().getAuxTargetInfo()->getTriple().getArch());
1913   }
1914 
1915   return EmitTargetArchBuiltinExpr(this, BuiltinID, E,
1916                                    getTarget().getTriple().getArch());
1917 }
1918 
1919 static llvm::VectorType *GetNeonType(CodeGenFunction *CGF,
1920                                      NeonTypeFlags TypeFlags,
1921                                      bool V1Ty=false) {
1922   int IsQuad = TypeFlags.isQuad();
1923   switch (TypeFlags.getEltType()) {
1924   case NeonTypeFlags::Int8:
1925   case NeonTypeFlags::Poly8:
1926     return llvm::VectorType::get(CGF->Int8Ty, V1Ty ? 1 : (8 << IsQuad));
1927   case NeonTypeFlags::Int16:
1928   case NeonTypeFlags::Poly16:
1929   case NeonTypeFlags::Float16:
1930     return llvm::VectorType::get(CGF->Int16Ty, V1Ty ? 1 : (4 << IsQuad));
1931   case NeonTypeFlags::Int32:
1932     return llvm::VectorType::get(CGF->Int32Ty, V1Ty ? 1 : (2 << IsQuad));
1933   case NeonTypeFlags::Int64:
1934   case NeonTypeFlags::Poly64:
1935     return llvm::VectorType::get(CGF->Int64Ty, V1Ty ? 1 : (1 << IsQuad));
1936   case NeonTypeFlags::Poly128:
1937     // FIXME: i128 and f128 doesn't get fully support in Clang and llvm.
1938     // There is a lot of i128 and f128 API missing.
1939     // so we use v16i8 to represent poly128 and get pattern matched.
1940     return llvm::VectorType::get(CGF->Int8Ty, 16);
1941   case NeonTypeFlags::Float32:
1942     return llvm::VectorType::get(CGF->FloatTy, V1Ty ? 1 : (2 << IsQuad));
1943   case NeonTypeFlags::Float64:
1944     return llvm::VectorType::get(CGF->DoubleTy, V1Ty ? 1 : (1 << IsQuad));
1945   }
1946   llvm_unreachable("Unknown vector element type!");
1947 }
1948 
1949 static llvm::VectorType *GetFloatNeonType(CodeGenFunction *CGF,
1950                                           NeonTypeFlags IntTypeFlags) {
1951   int IsQuad = IntTypeFlags.isQuad();
1952   switch (IntTypeFlags.getEltType()) {
1953   case NeonTypeFlags::Int32:
1954     return llvm::VectorType::get(CGF->FloatTy, (2 << IsQuad));
1955   case NeonTypeFlags::Int64:
1956     return llvm::VectorType::get(CGF->DoubleTy, (1 << IsQuad));
1957   default:
1958     llvm_unreachable("Type can't be converted to floating-point!");
1959   }
1960 }
1961 
1962 Value *CodeGenFunction::EmitNeonSplat(Value *V, Constant *C) {
1963   unsigned nElts = cast<llvm::VectorType>(V->getType())->getNumElements();
1964   Value* SV = llvm::ConstantVector::getSplat(nElts, C);
1965   return Builder.CreateShuffleVector(V, V, SV, "lane");
1966 }
1967 
1968 Value *CodeGenFunction::EmitNeonCall(Function *F, SmallVectorImpl<Value*> &Ops,
1969                                      const char *name,
1970                                      unsigned shift, bool rightshift) {
1971   unsigned j = 0;
1972   for (Function::const_arg_iterator ai = F->arg_begin(), ae = F->arg_end();
1973        ai != ae; ++ai, ++j)
1974     if (shift > 0 && shift == j)
1975       Ops[j] = EmitNeonShiftVector(Ops[j], ai->getType(), rightshift);
1976     else
1977       Ops[j] = Builder.CreateBitCast(Ops[j], ai->getType(), name);
1978 
1979   return Builder.CreateCall(F, Ops, name);
1980 }
1981 
1982 Value *CodeGenFunction::EmitNeonShiftVector(Value *V, llvm::Type *Ty,
1983                                             bool neg) {
1984   int SV = cast<ConstantInt>(V)->getSExtValue();
1985   return ConstantInt::get(Ty, neg ? -SV : SV);
1986 }
1987 
1988 // \brief Right-shift a vector by a constant.
1989 Value *CodeGenFunction::EmitNeonRShiftImm(Value *Vec, Value *Shift,
1990                                           llvm::Type *Ty, bool usgn,
1991                                           const char *name) {
1992   llvm::VectorType *VTy = cast<llvm::VectorType>(Ty);
1993 
1994   int ShiftAmt = cast<ConstantInt>(Shift)->getSExtValue();
1995   int EltSize = VTy->getScalarSizeInBits();
1996 
1997   Vec = Builder.CreateBitCast(Vec, Ty);
1998 
1999   // lshr/ashr are undefined when the shift amount is equal to the vector
2000   // element size.
2001   if (ShiftAmt == EltSize) {
2002     if (usgn) {
2003       // Right-shifting an unsigned value by its size yields 0.
2004       return llvm::ConstantAggregateZero::get(VTy);
2005     } else {
2006       // Right-shifting a signed value by its size is equivalent
2007       // to a shift of size-1.
2008       --ShiftAmt;
2009       Shift = ConstantInt::get(VTy->getElementType(), ShiftAmt);
2010     }
2011   }
2012 
2013   Shift = EmitNeonShiftVector(Shift, Ty, false);
2014   if (usgn)
2015     return Builder.CreateLShr(Vec, Shift, name);
2016   else
2017     return Builder.CreateAShr(Vec, Shift, name);
2018 }
2019 
2020 enum {
2021   AddRetType = (1 << 0),
2022   Add1ArgType = (1 << 1),
2023   Add2ArgTypes = (1 << 2),
2024 
2025   VectorizeRetType = (1 << 3),
2026   VectorizeArgTypes = (1 << 4),
2027 
2028   InventFloatType = (1 << 5),
2029   UnsignedAlts = (1 << 6),
2030 
2031   Use64BitVectors = (1 << 7),
2032   Use128BitVectors = (1 << 8),
2033 
2034   Vectorize1ArgType = Add1ArgType | VectorizeArgTypes,
2035   VectorRet = AddRetType | VectorizeRetType,
2036   VectorRetGetArgs01 =
2037       AddRetType | Add2ArgTypes | VectorizeRetType | VectorizeArgTypes,
2038   FpCmpzModifiers =
2039       AddRetType | VectorizeRetType | Add1ArgType | InventFloatType
2040 };
2041 
2042  struct NeonIntrinsicInfo {
2043   unsigned BuiltinID;
2044   unsigned LLVMIntrinsic;
2045   unsigned AltLLVMIntrinsic;
2046   const char *NameHint;
2047   unsigned TypeModifier;
2048 
2049   bool operator<(unsigned RHSBuiltinID) const {
2050     return BuiltinID < RHSBuiltinID;
2051   }
2052 };
2053 
2054 #define NEONMAP0(NameBase) \
2055   { NEON::BI__builtin_neon_ ## NameBase, 0, 0, #NameBase, 0 }
2056 
2057 #define NEONMAP1(NameBase, LLVMIntrinsic, TypeModifier) \
2058   { NEON:: BI__builtin_neon_ ## NameBase, \
2059       Intrinsic::LLVMIntrinsic, 0, #NameBase, TypeModifier }
2060 
2061 #define NEONMAP2(NameBase, LLVMIntrinsic, AltLLVMIntrinsic, TypeModifier) \
2062   { NEON:: BI__builtin_neon_ ## NameBase, \
2063       Intrinsic::LLVMIntrinsic, Intrinsic::AltLLVMIntrinsic, \
2064       #NameBase, TypeModifier }
2065 
2066 static NeonIntrinsicInfo ARMSIMDIntrinsicMap [] = {
2067   NEONMAP2(vabd_v, arm_neon_vabdu, arm_neon_vabds, Add1ArgType | UnsignedAlts),
2068   NEONMAP2(vabdq_v, arm_neon_vabdu, arm_neon_vabds, Add1ArgType | UnsignedAlts),
2069   NEONMAP1(vabs_v, arm_neon_vabs, 0),
2070   NEONMAP1(vabsq_v, arm_neon_vabs, 0),
2071   NEONMAP0(vaddhn_v),
2072   NEONMAP1(vaesdq_v, arm_neon_aesd, 0),
2073   NEONMAP1(vaeseq_v, arm_neon_aese, 0),
2074   NEONMAP1(vaesimcq_v, arm_neon_aesimc, 0),
2075   NEONMAP1(vaesmcq_v, arm_neon_aesmc, 0),
2076   NEONMAP1(vbsl_v, arm_neon_vbsl, AddRetType),
2077   NEONMAP1(vbslq_v, arm_neon_vbsl, AddRetType),
2078   NEONMAP1(vcage_v, arm_neon_vacge, 0),
2079   NEONMAP1(vcageq_v, arm_neon_vacge, 0),
2080   NEONMAP1(vcagt_v, arm_neon_vacgt, 0),
2081   NEONMAP1(vcagtq_v, arm_neon_vacgt, 0),
2082   NEONMAP1(vcale_v, arm_neon_vacge, 0),
2083   NEONMAP1(vcaleq_v, arm_neon_vacge, 0),
2084   NEONMAP1(vcalt_v, arm_neon_vacgt, 0),
2085   NEONMAP1(vcaltq_v, arm_neon_vacgt, 0),
2086   NEONMAP1(vcls_v, arm_neon_vcls, Add1ArgType),
2087   NEONMAP1(vclsq_v, arm_neon_vcls, Add1ArgType),
2088   NEONMAP1(vclz_v, ctlz, Add1ArgType),
2089   NEONMAP1(vclzq_v, ctlz, Add1ArgType),
2090   NEONMAP1(vcnt_v, ctpop, Add1ArgType),
2091   NEONMAP1(vcntq_v, ctpop, Add1ArgType),
2092   NEONMAP1(vcvt_f16_f32, arm_neon_vcvtfp2hf, 0),
2093   NEONMAP1(vcvt_f32_f16, arm_neon_vcvthf2fp, 0),
2094   NEONMAP0(vcvt_f32_v),
2095   NEONMAP2(vcvt_n_f32_v, arm_neon_vcvtfxu2fp, arm_neon_vcvtfxs2fp, 0),
2096   NEONMAP1(vcvt_n_s32_v, arm_neon_vcvtfp2fxs, 0),
2097   NEONMAP1(vcvt_n_s64_v, arm_neon_vcvtfp2fxs, 0),
2098   NEONMAP1(vcvt_n_u32_v, arm_neon_vcvtfp2fxu, 0),
2099   NEONMAP1(vcvt_n_u64_v, arm_neon_vcvtfp2fxu, 0),
2100   NEONMAP0(vcvt_s32_v),
2101   NEONMAP0(vcvt_s64_v),
2102   NEONMAP0(vcvt_u32_v),
2103   NEONMAP0(vcvt_u64_v),
2104   NEONMAP1(vcvta_s32_v, arm_neon_vcvtas, 0),
2105   NEONMAP1(vcvta_s64_v, arm_neon_vcvtas, 0),
2106   NEONMAP1(vcvta_u32_v, arm_neon_vcvtau, 0),
2107   NEONMAP1(vcvta_u64_v, arm_neon_vcvtau, 0),
2108   NEONMAP1(vcvtaq_s32_v, arm_neon_vcvtas, 0),
2109   NEONMAP1(vcvtaq_s64_v, arm_neon_vcvtas, 0),
2110   NEONMAP1(vcvtaq_u32_v, arm_neon_vcvtau, 0),
2111   NEONMAP1(vcvtaq_u64_v, arm_neon_vcvtau, 0),
2112   NEONMAP1(vcvtm_s32_v, arm_neon_vcvtms, 0),
2113   NEONMAP1(vcvtm_s64_v, arm_neon_vcvtms, 0),
2114   NEONMAP1(vcvtm_u32_v, arm_neon_vcvtmu, 0),
2115   NEONMAP1(vcvtm_u64_v, arm_neon_vcvtmu, 0),
2116   NEONMAP1(vcvtmq_s32_v, arm_neon_vcvtms, 0),
2117   NEONMAP1(vcvtmq_s64_v, arm_neon_vcvtms, 0),
2118   NEONMAP1(vcvtmq_u32_v, arm_neon_vcvtmu, 0),
2119   NEONMAP1(vcvtmq_u64_v, arm_neon_vcvtmu, 0),
2120   NEONMAP1(vcvtn_s32_v, arm_neon_vcvtns, 0),
2121   NEONMAP1(vcvtn_s64_v, arm_neon_vcvtns, 0),
2122   NEONMAP1(vcvtn_u32_v, arm_neon_vcvtnu, 0),
2123   NEONMAP1(vcvtn_u64_v, arm_neon_vcvtnu, 0),
2124   NEONMAP1(vcvtnq_s32_v, arm_neon_vcvtns, 0),
2125   NEONMAP1(vcvtnq_s64_v, arm_neon_vcvtns, 0),
2126   NEONMAP1(vcvtnq_u32_v, arm_neon_vcvtnu, 0),
2127   NEONMAP1(vcvtnq_u64_v, arm_neon_vcvtnu, 0),
2128   NEONMAP1(vcvtp_s32_v, arm_neon_vcvtps, 0),
2129   NEONMAP1(vcvtp_s64_v, arm_neon_vcvtps, 0),
2130   NEONMAP1(vcvtp_u32_v, arm_neon_vcvtpu, 0),
2131   NEONMAP1(vcvtp_u64_v, arm_neon_vcvtpu, 0),
2132   NEONMAP1(vcvtpq_s32_v, arm_neon_vcvtps, 0),
2133   NEONMAP1(vcvtpq_s64_v, arm_neon_vcvtps, 0),
2134   NEONMAP1(vcvtpq_u32_v, arm_neon_vcvtpu, 0),
2135   NEONMAP1(vcvtpq_u64_v, arm_neon_vcvtpu, 0),
2136   NEONMAP0(vcvtq_f32_v),
2137   NEONMAP2(vcvtq_n_f32_v, arm_neon_vcvtfxu2fp, arm_neon_vcvtfxs2fp, 0),
2138   NEONMAP1(vcvtq_n_s32_v, arm_neon_vcvtfp2fxs, 0),
2139   NEONMAP1(vcvtq_n_s64_v, arm_neon_vcvtfp2fxs, 0),
2140   NEONMAP1(vcvtq_n_u32_v, arm_neon_vcvtfp2fxu, 0),
2141   NEONMAP1(vcvtq_n_u64_v, arm_neon_vcvtfp2fxu, 0),
2142   NEONMAP0(vcvtq_s32_v),
2143   NEONMAP0(vcvtq_s64_v),
2144   NEONMAP0(vcvtq_u32_v),
2145   NEONMAP0(vcvtq_u64_v),
2146   NEONMAP0(vext_v),
2147   NEONMAP0(vextq_v),
2148   NEONMAP0(vfma_v),
2149   NEONMAP0(vfmaq_v),
2150   NEONMAP2(vhadd_v, arm_neon_vhaddu, arm_neon_vhadds, Add1ArgType | UnsignedAlts),
2151   NEONMAP2(vhaddq_v, arm_neon_vhaddu, arm_neon_vhadds, Add1ArgType | UnsignedAlts),
2152   NEONMAP2(vhsub_v, arm_neon_vhsubu, arm_neon_vhsubs, Add1ArgType | UnsignedAlts),
2153   NEONMAP2(vhsubq_v, arm_neon_vhsubu, arm_neon_vhsubs, Add1ArgType | UnsignedAlts),
2154   NEONMAP0(vld1_dup_v),
2155   NEONMAP1(vld1_v, arm_neon_vld1, 0),
2156   NEONMAP0(vld1q_dup_v),
2157   NEONMAP1(vld1q_v, arm_neon_vld1, 0),
2158   NEONMAP1(vld2_lane_v, arm_neon_vld2lane, 0),
2159   NEONMAP1(vld2_v, arm_neon_vld2, 0),
2160   NEONMAP1(vld2q_lane_v, arm_neon_vld2lane, 0),
2161   NEONMAP1(vld2q_v, arm_neon_vld2, 0),
2162   NEONMAP1(vld3_lane_v, arm_neon_vld3lane, 0),
2163   NEONMAP1(vld3_v, arm_neon_vld3, 0),
2164   NEONMAP1(vld3q_lane_v, arm_neon_vld3lane, 0),
2165   NEONMAP1(vld3q_v, arm_neon_vld3, 0),
2166   NEONMAP1(vld4_lane_v, arm_neon_vld4lane, 0),
2167   NEONMAP1(vld4_v, arm_neon_vld4, 0),
2168   NEONMAP1(vld4q_lane_v, arm_neon_vld4lane, 0),
2169   NEONMAP1(vld4q_v, arm_neon_vld4, 0),
2170   NEONMAP2(vmax_v, arm_neon_vmaxu, arm_neon_vmaxs, Add1ArgType | UnsignedAlts),
2171   NEONMAP1(vmaxnm_v, arm_neon_vmaxnm, Add1ArgType),
2172   NEONMAP1(vmaxnmq_v, arm_neon_vmaxnm, Add1ArgType),
2173   NEONMAP2(vmaxq_v, arm_neon_vmaxu, arm_neon_vmaxs, Add1ArgType | UnsignedAlts),
2174   NEONMAP2(vmin_v, arm_neon_vminu, arm_neon_vmins, Add1ArgType | UnsignedAlts),
2175   NEONMAP1(vminnm_v, arm_neon_vminnm, Add1ArgType),
2176   NEONMAP1(vminnmq_v, arm_neon_vminnm, Add1ArgType),
2177   NEONMAP2(vminq_v, arm_neon_vminu, arm_neon_vmins, Add1ArgType | UnsignedAlts),
2178   NEONMAP0(vmovl_v),
2179   NEONMAP0(vmovn_v),
2180   NEONMAP1(vmul_v, arm_neon_vmulp, Add1ArgType),
2181   NEONMAP0(vmull_v),
2182   NEONMAP1(vmulq_v, arm_neon_vmulp, Add1ArgType),
2183   NEONMAP2(vpadal_v, arm_neon_vpadalu, arm_neon_vpadals, UnsignedAlts),
2184   NEONMAP2(vpadalq_v, arm_neon_vpadalu, arm_neon_vpadals, UnsignedAlts),
2185   NEONMAP1(vpadd_v, arm_neon_vpadd, Add1ArgType),
2186   NEONMAP2(vpaddl_v, arm_neon_vpaddlu, arm_neon_vpaddls, UnsignedAlts),
2187   NEONMAP2(vpaddlq_v, arm_neon_vpaddlu, arm_neon_vpaddls, UnsignedAlts),
2188   NEONMAP1(vpaddq_v, arm_neon_vpadd, Add1ArgType),
2189   NEONMAP2(vpmax_v, arm_neon_vpmaxu, arm_neon_vpmaxs, Add1ArgType | UnsignedAlts),
2190   NEONMAP2(vpmin_v, arm_neon_vpminu, arm_neon_vpmins, Add1ArgType | UnsignedAlts),
2191   NEONMAP1(vqabs_v, arm_neon_vqabs, Add1ArgType),
2192   NEONMAP1(vqabsq_v, arm_neon_vqabs, Add1ArgType),
2193   NEONMAP2(vqadd_v, arm_neon_vqaddu, arm_neon_vqadds, Add1ArgType | UnsignedAlts),
2194   NEONMAP2(vqaddq_v, arm_neon_vqaddu, arm_neon_vqadds, Add1ArgType | UnsignedAlts),
2195   NEONMAP2(vqdmlal_v, arm_neon_vqdmull, arm_neon_vqadds, 0),
2196   NEONMAP2(vqdmlsl_v, arm_neon_vqdmull, arm_neon_vqsubs, 0),
2197   NEONMAP1(vqdmulh_v, arm_neon_vqdmulh, Add1ArgType),
2198   NEONMAP1(vqdmulhq_v, arm_neon_vqdmulh, Add1ArgType),
2199   NEONMAP1(vqdmull_v, arm_neon_vqdmull, Add1ArgType),
2200   NEONMAP2(vqmovn_v, arm_neon_vqmovnu, arm_neon_vqmovns, Add1ArgType | UnsignedAlts),
2201   NEONMAP1(vqmovun_v, arm_neon_vqmovnsu, Add1ArgType),
2202   NEONMAP1(vqneg_v, arm_neon_vqneg, Add1ArgType),
2203   NEONMAP1(vqnegq_v, arm_neon_vqneg, Add1ArgType),
2204   NEONMAP1(vqrdmulh_v, arm_neon_vqrdmulh, Add1ArgType),
2205   NEONMAP1(vqrdmulhq_v, arm_neon_vqrdmulh, Add1ArgType),
2206   NEONMAP2(vqrshl_v, arm_neon_vqrshiftu, arm_neon_vqrshifts, Add1ArgType | UnsignedAlts),
2207   NEONMAP2(vqrshlq_v, arm_neon_vqrshiftu, arm_neon_vqrshifts, Add1ArgType | UnsignedAlts),
2208   NEONMAP2(vqshl_n_v, arm_neon_vqshiftu, arm_neon_vqshifts, UnsignedAlts),
2209   NEONMAP2(vqshl_v, arm_neon_vqshiftu, arm_neon_vqshifts, Add1ArgType | UnsignedAlts),
2210   NEONMAP2(vqshlq_n_v, arm_neon_vqshiftu, arm_neon_vqshifts, UnsignedAlts),
2211   NEONMAP2(vqshlq_v, arm_neon_vqshiftu, arm_neon_vqshifts, Add1ArgType | UnsignedAlts),
2212   NEONMAP1(vqshlu_n_v, arm_neon_vqshiftsu, 0),
2213   NEONMAP1(vqshluq_n_v, arm_neon_vqshiftsu, 0),
2214   NEONMAP2(vqsub_v, arm_neon_vqsubu, arm_neon_vqsubs, Add1ArgType | UnsignedAlts),
2215   NEONMAP2(vqsubq_v, arm_neon_vqsubu, arm_neon_vqsubs, Add1ArgType | UnsignedAlts),
2216   NEONMAP1(vraddhn_v, arm_neon_vraddhn, Add1ArgType),
2217   NEONMAP2(vrecpe_v, arm_neon_vrecpe, arm_neon_vrecpe, 0),
2218   NEONMAP2(vrecpeq_v, arm_neon_vrecpe, arm_neon_vrecpe, 0),
2219   NEONMAP1(vrecps_v, arm_neon_vrecps, Add1ArgType),
2220   NEONMAP1(vrecpsq_v, arm_neon_vrecps, Add1ArgType),
2221   NEONMAP2(vrhadd_v, arm_neon_vrhaddu, arm_neon_vrhadds, Add1ArgType | UnsignedAlts),
2222   NEONMAP2(vrhaddq_v, arm_neon_vrhaddu, arm_neon_vrhadds, Add1ArgType | UnsignedAlts),
2223   NEONMAP1(vrnd_v, arm_neon_vrintz, Add1ArgType),
2224   NEONMAP1(vrnda_v, arm_neon_vrinta, Add1ArgType),
2225   NEONMAP1(vrndaq_v, arm_neon_vrinta, Add1ArgType),
2226   NEONMAP1(vrndm_v, arm_neon_vrintm, Add1ArgType),
2227   NEONMAP1(vrndmq_v, arm_neon_vrintm, Add1ArgType),
2228   NEONMAP1(vrndn_v, arm_neon_vrintn, Add1ArgType),
2229   NEONMAP1(vrndnq_v, arm_neon_vrintn, Add1ArgType),
2230   NEONMAP1(vrndp_v, arm_neon_vrintp, Add1ArgType),
2231   NEONMAP1(vrndpq_v, arm_neon_vrintp, Add1ArgType),
2232   NEONMAP1(vrndq_v, arm_neon_vrintz, Add1ArgType),
2233   NEONMAP1(vrndx_v, arm_neon_vrintx, Add1ArgType),
2234   NEONMAP1(vrndxq_v, arm_neon_vrintx, Add1ArgType),
2235   NEONMAP2(vrshl_v, arm_neon_vrshiftu, arm_neon_vrshifts, Add1ArgType | UnsignedAlts),
2236   NEONMAP2(vrshlq_v, arm_neon_vrshiftu, arm_neon_vrshifts, Add1ArgType | UnsignedAlts),
2237   NEONMAP2(vrshr_n_v, arm_neon_vrshiftu, arm_neon_vrshifts, UnsignedAlts),
2238   NEONMAP2(vrshrq_n_v, arm_neon_vrshiftu, arm_neon_vrshifts, UnsignedAlts),
2239   NEONMAP2(vrsqrte_v, arm_neon_vrsqrte, arm_neon_vrsqrte, 0),
2240   NEONMAP2(vrsqrteq_v, arm_neon_vrsqrte, arm_neon_vrsqrte, 0),
2241   NEONMAP1(vrsqrts_v, arm_neon_vrsqrts, Add1ArgType),
2242   NEONMAP1(vrsqrtsq_v, arm_neon_vrsqrts, Add1ArgType),
2243   NEONMAP1(vrsubhn_v, arm_neon_vrsubhn, Add1ArgType),
2244   NEONMAP1(vsha1su0q_v, arm_neon_sha1su0, 0),
2245   NEONMAP1(vsha1su1q_v, arm_neon_sha1su1, 0),
2246   NEONMAP1(vsha256h2q_v, arm_neon_sha256h2, 0),
2247   NEONMAP1(vsha256hq_v, arm_neon_sha256h, 0),
2248   NEONMAP1(vsha256su0q_v, arm_neon_sha256su0, 0),
2249   NEONMAP1(vsha256su1q_v, arm_neon_sha256su1, 0),
2250   NEONMAP0(vshl_n_v),
2251   NEONMAP2(vshl_v, arm_neon_vshiftu, arm_neon_vshifts, Add1ArgType | UnsignedAlts),
2252   NEONMAP0(vshll_n_v),
2253   NEONMAP0(vshlq_n_v),
2254   NEONMAP2(vshlq_v, arm_neon_vshiftu, arm_neon_vshifts, Add1ArgType | UnsignedAlts),
2255   NEONMAP0(vshr_n_v),
2256   NEONMAP0(vshrn_n_v),
2257   NEONMAP0(vshrq_n_v),
2258   NEONMAP1(vst1_v, arm_neon_vst1, 0),
2259   NEONMAP1(vst1q_v, arm_neon_vst1, 0),
2260   NEONMAP1(vst2_lane_v, arm_neon_vst2lane, 0),
2261   NEONMAP1(vst2_v, arm_neon_vst2, 0),
2262   NEONMAP1(vst2q_lane_v, arm_neon_vst2lane, 0),
2263   NEONMAP1(vst2q_v, arm_neon_vst2, 0),
2264   NEONMAP1(vst3_lane_v, arm_neon_vst3lane, 0),
2265   NEONMAP1(vst3_v, arm_neon_vst3, 0),
2266   NEONMAP1(vst3q_lane_v, arm_neon_vst3lane, 0),
2267   NEONMAP1(vst3q_v, arm_neon_vst3, 0),
2268   NEONMAP1(vst4_lane_v, arm_neon_vst4lane, 0),
2269   NEONMAP1(vst4_v, arm_neon_vst4, 0),
2270   NEONMAP1(vst4q_lane_v, arm_neon_vst4lane, 0),
2271   NEONMAP1(vst4q_v, arm_neon_vst4, 0),
2272   NEONMAP0(vsubhn_v),
2273   NEONMAP0(vtrn_v),
2274   NEONMAP0(vtrnq_v),
2275   NEONMAP0(vtst_v),
2276   NEONMAP0(vtstq_v),
2277   NEONMAP0(vuzp_v),
2278   NEONMAP0(vuzpq_v),
2279   NEONMAP0(vzip_v),
2280   NEONMAP0(vzipq_v)
2281 };
2282 
2283 static NeonIntrinsicInfo AArch64SIMDIntrinsicMap[] = {
2284   NEONMAP1(vabs_v, aarch64_neon_abs, 0),
2285   NEONMAP1(vabsq_v, aarch64_neon_abs, 0),
2286   NEONMAP0(vaddhn_v),
2287   NEONMAP1(vaesdq_v, aarch64_crypto_aesd, 0),
2288   NEONMAP1(vaeseq_v, aarch64_crypto_aese, 0),
2289   NEONMAP1(vaesimcq_v, aarch64_crypto_aesimc, 0),
2290   NEONMAP1(vaesmcq_v, aarch64_crypto_aesmc, 0),
2291   NEONMAP1(vcage_v, aarch64_neon_facge, 0),
2292   NEONMAP1(vcageq_v, aarch64_neon_facge, 0),
2293   NEONMAP1(vcagt_v, aarch64_neon_facgt, 0),
2294   NEONMAP1(vcagtq_v, aarch64_neon_facgt, 0),
2295   NEONMAP1(vcale_v, aarch64_neon_facge, 0),
2296   NEONMAP1(vcaleq_v, aarch64_neon_facge, 0),
2297   NEONMAP1(vcalt_v, aarch64_neon_facgt, 0),
2298   NEONMAP1(vcaltq_v, aarch64_neon_facgt, 0),
2299   NEONMAP1(vcls_v, aarch64_neon_cls, Add1ArgType),
2300   NEONMAP1(vclsq_v, aarch64_neon_cls, Add1ArgType),
2301   NEONMAP1(vclz_v, ctlz, Add1ArgType),
2302   NEONMAP1(vclzq_v, ctlz, Add1ArgType),
2303   NEONMAP1(vcnt_v, ctpop, Add1ArgType),
2304   NEONMAP1(vcntq_v, ctpop, Add1ArgType),
2305   NEONMAP1(vcvt_f16_f32, aarch64_neon_vcvtfp2hf, 0),
2306   NEONMAP1(vcvt_f32_f16, aarch64_neon_vcvthf2fp, 0),
2307   NEONMAP0(vcvt_f32_v),
2308   NEONMAP2(vcvt_n_f32_v, aarch64_neon_vcvtfxu2fp, aarch64_neon_vcvtfxs2fp, 0),
2309   NEONMAP2(vcvt_n_f64_v, aarch64_neon_vcvtfxu2fp, aarch64_neon_vcvtfxs2fp, 0),
2310   NEONMAP1(vcvt_n_s32_v, aarch64_neon_vcvtfp2fxs, 0),
2311   NEONMAP1(vcvt_n_s64_v, aarch64_neon_vcvtfp2fxs, 0),
2312   NEONMAP1(vcvt_n_u32_v, aarch64_neon_vcvtfp2fxu, 0),
2313   NEONMAP1(vcvt_n_u64_v, aarch64_neon_vcvtfp2fxu, 0),
2314   NEONMAP0(vcvtq_f32_v),
2315   NEONMAP2(vcvtq_n_f32_v, aarch64_neon_vcvtfxu2fp, aarch64_neon_vcvtfxs2fp, 0),
2316   NEONMAP2(vcvtq_n_f64_v, aarch64_neon_vcvtfxu2fp, aarch64_neon_vcvtfxs2fp, 0),
2317   NEONMAP1(vcvtq_n_s32_v, aarch64_neon_vcvtfp2fxs, 0),
2318   NEONMAP1(vcvtq_n_s64_v, aarch64_neon_vcvtfp2fxs, 0),
2319   NEONMAP1(vcvtq_n_u32_v, aarch64_neon_vcvtfp2fxu, 0),
2320   NEONMAP1(vcvtq_n_u64_v, aarch64_neon_vcvtfp2fxu, 0),
2321   NEONMAP1(vcvtx_f32_v, aarch64_neon_fcvtxn, AddRetType | Add1ArgType),
2322   NEONMAP0(vext_v),
2323   NEONMAP0(vextq_v),
2324   NEONMAP0(vfma_v),
2325   NEONMAP0(vfmaq_v),
2326   NEONMAP2(vhadd_v, aarch64_neon_uhadd, aarch64_neon_shadd, Add1ArgType | UnsignedAlts),
2327   NEONMAP2(vhaddq_v, aarch64_neon_uhadd, aarch64_neon_shadd, Add1ArgType | UnsignedAlts),
2328   NEONMAP2(vhsub_v, aarch64_neon_uhsub, aarch64_neon_shsub, Add1ArgType | UnsignedAlts),
2329   NEONMAP2(vhsubq_v, aarch64_neon_uhsub, aarch64_neon_shsub, Add1ArgType | UnsignedAlts),
2330   NEONMAP0(vmovl_v),
2331   NEONMAP0(vmovn_v),
2332   NEONMAP1(vmul_v, aarch64_neon_pmul, Add1ArgType),
2333   NEONMAP1(vmulq_v, aarch64_neon_pmul, Add1ArgType),
2334   NEONMAP1(vpadd_v, aarch64_neon_addp, Add1ArgType),
2335   NEONMAP2(vpaddl_v, aarch64_neon_uaddlp, aarch64_neon_saddlp, UnsignedAlts),
2336   NEONMAP2(vpaddlq_v, aarch64_neon_uaddlp, aarch64_neon_saddlp, UnsignedAlts),
2337   NEONMAP1(vpaddq_v, aarch64_neon_addp, Add1ArgType),
2338   NEONMAP1(vqabs_v, aarch64_neon_sqabs, Add1ArgType),
2339   NEONMAP1(vqabsq_v, aarch64_neon_sqabs, Add1ArgType),
2340   NEONMAP2(vqadd_v, aarch64_neon_uqadd, aarch64_neon_sqadd, Add1ArgType | UnsignedAlts),
2341   NEONMAP2(vqaddq_v, aarch64_neon_uqadd, aarch64_neon_sqadd, Add1ArgType | UnsignedAlts),
2342   NEONMAP2(vqdmlal_v, aarch64_neon_sqdmull, aarch64_neon_sqadd, 0),
2343   NEONMAP2(vqdmlsl_v, aarch64_neon_sqdmull, aarch64_neon_sqsub, 0),
2344   NEONMAP1(vqdmulh_v, aarch64_neon_sqdmulh, Add1ArgType),
2345   NEONMAP1(vqdmulhq_v, aarch64_neon_sqdmulh, Add1ArgType),
2346   NEONMAP1(vqdmull_v, aarch64_neon_sqdmull, Add1ArgType),
2347   NEONMAP2(vqmovn_v, aarch64_neon_uqxtn, aarch64_neon_sqxtn, Add1ArgType | UnsignedAlts),
2348   NEONMAP1(vqmovun_v, aarch64_neon_sqxtun, Add1ArgType),
2349   NEONMAP1(vqneg_v, aarch64_neon_sqneg, Add1ArgType),
2350   NEONMAP1(vqnegq_v, aarch64_neon_sqneg, Add1ArgType),
2351   NEONMAP1(vqrdmulh_v, aarch64_neon_sqrdmulh, Add1ArgType),
2352   NEONMAP1(vqrdmulhq_v, aarch64_neon_sqrdmulh, Add1ArgType),
2353   NEONMAP2(vqrshl_v, aarch64_neon_uqrshl, aarch64_neon_sqrshl, Add1ArgType | UnsignedAlts),
2354   NEONMAP2(vqrshlq_v, aarch64_neon_uqrshl, aarch64_neon_sqrshl, Add1ArgType | UnsignedAlts),
2355   NEONMAP2(vqshl_n_v, aarch64_neon_uqshl, aarch64_neon_sqshl, UnsignedAlts),
2356   NEONMAP2(vqshl_v, aarch64_neon_uqshl, aarch64_neon_sqshl, Add1ArgType | UnsignedAlts),
2357   NEONMAP2(vqshlq_n_v, aarch64_neon_uqshl, aarch64_neon_sqshl,UnsignedAlts),
2358   NEONMAP2(vqshlq_v, aarch64_neon_uqshl, aarch64_neon_sqshl, Add1ArgType | UnsignedAlts),
2359   NEONMAP1(vqshlu_n_v, aarch64_neon_sqshlu, 0),
2360   NEONMAP1(vqshluq_n_v, aarch64_neon_sqshlu, 0),
2361   NEONMAP2(vqsub_v, aarch64_neon_uqsub, aarch64_neon_sqsub, Add1ArgType | UnsignedAlts),
2362   NEONMAP2(vqsubq_v, aarch64_neon_uqsub, aarch64_neon_sqsub, Add1ArgType | UnsignedAlts),
2363   NEONMAP1(vraddhn_v, aarch64_neon_raddhn, Add1ArgType),
2364   NEONMAP2(vrecpe_v, aarch64_neon_frecpe, aarch64_neon_urecpe, 0),
2365   NEONMAP2(vrecpeq_v, aarch64_neon_frecpe, aarch64_neon_urecpe, 0),
2366   NEONMAP1(vrecps_v, aarch64_neon_frecps, Add1ArgType),
2367   NEONMAP1(vrecpsq_v, aarch64_neon_frecps, Add1ArgType),
2368   NEONMAP2(vrhadd_v, aarch64_neon_urhadd, aarch64_neon_srhadd, Add1ArgType | UnsignedAlts),
2369   NEONMAP2(vrhaddq_v, aarch64_neon_urhadd, aarch64_neon_srhadd, Add1ArgType | UnsignedAlts),
2370   NEONMAP2(vrshl_v, aarch64_neon_urshl, aarch64_neon_srshl, Add1ArgType | UnsignedAlts),
2371   NEONMAP2(vrshlq_v, aarch64_neon_urshl, aarch64_neon_srshl, Add1ArgType | UnsignedAlts),
2372   NEONMAP2(vrshr_n_v, aarch64_neon_urshl, aarch64_neon_srshl, UnsignedAlts),
2373   NEONMAP2(vrshrq_n_v, aarch64_neon_urshl, aarch64_neon_srshl, UnsignedAlts),
2374   NEONMAP2(vrsqrte_v, aarch64_neon_frsqrte, aarch64_neon_ursqrte, 0),
2375   NEONMAP2(vrsqrteq_v, aarch64_neon_frsqrte, aarch64_neon_ursqrte, 0),
2376   NEONMAP1(vrsqrts_v, aarch64_neon_frsqrts, Add1ArgType),
2377   NEONMAP1(vrsqrtsq_v, aarch64_neon_frsqrts, Add1ArgType),
2378   NEONMAP1(vrsubhn_v, aarch64_neon_rsubhn, Add1ArgType),
2379   NEONMAP1(vsha1su0q_v, aarch64_crypto_sha1su0, 0),
2380   NEONMAP1(vsha1su1q_v, aarch64_crypto_sha1su1, 0),
2381   NEONMAP1(vsha256h2q_v, aarch64_crypto_sha256h2, 0),
2382   NEONMAP1(vsha256hq_v, aarch64_crypto_sha256h, 0),
2383   NEONMAP1(vsha256su0q_v, aarch64_crypto_sha256su0, 0),
2384   NEONMAP1(vsha256su1q_v, aarch64_crypto_sha256su1, 0),
2385   NEONMAP0(vshl_n_v),
2386   NEONMAP2(vshl_v, aarch64_neon_ushl, aarch64_neon_sshl, Add1ArgType | UnsignedAlts),
2387   NEONMAP0(vshll_n_v),
2388   NEONMAP0(vshlq_n_v),
2389   NEONMAP2(vshlq_v, aarch64_neon_ushl, aarch64_neon_sshl, Add1ArgType | UnsignedAlts),
2390   NEONMAP0(vshr_n_v),
2391   NEONMAP0(vshrn_n_v),
2392   NEONMAP0(vshrq_n_v),
2393   NEONMAP0(vsubhn_v),
2394   NEONMAP0(vtst_v),
2395   NEONMAP0(vtstq_v),
2396 };
2397 
2398 static NeonIntrinsicInfo AArch64SISDIntrinsicMap[] = {
2399   NEONMAP1(vabdd_f64, aarch64_sisd_fabd, Add1ArgType),
2400   NEONMAP1(vabds_f32, aarch64_sisd_fabd, Add1ArgType),
2401   NEONMAP1(vabsd_s64, aarch64_neon_abs, Add1ArgType),
2402   NEONMAP1(vaddlv_s32, aarch64_neon_saddlv, AddRetType | Add1ArgType),
2403   NEONMAP1(vaddlv_u32, aarch64_neon_uaddlv, AddRetType | Add1ArgType),
2404   NEONMAP1(vaddlvq_s32, aarch64_neon_saddlv, AddRetType | Add1ArgType),
2405   NEONMAP1(vaddlvq_u32, aarch64_neon_uaddlv, AddRetType | Add1ArgType),
2406   NEONMAP1(vaddv_f32, aarch64_neon_faddv, AddRetType | Add1ArgType),
2407   NEONMAP1(vaddv_s32, aarch64_neon_saddv, AddRetType | Add1ArgType),
2408   NEONMAP1(vaddv_u32, aarch64_neon_uaddv, AddRetType | Add1ArgType),
2409   NEONMAP1(vaddvq_f32, aarch64_neon_faddv, AddRetType | Add1ArgType),
2410   NEONMAP1(vaddvq_f64, aarch64_neon_faddv, AddRetType | Add1ArgType),
2411   NEONMAP1(vaddvq_s32, aarch64_neon_saddv, AddRetType | Add1ArgType),
2412   NEONMAP1(vaddvq_s64, aarch64_neon_saddv, AddRetType | Add1ArgType),
2413   NEONMAP1(vaddvq_u32, aarch64_neon_uaddv, AddRetType | Add1ArgType),
2414   NEONMAP1(vaddvq_u64, aarch64_neon_uaddv, AddRetType | Add1ArgType),
2415   NEONMAP1(vcaged_f64, aarch64_neon_facge, AddRetType | Add1ArgType),
2416   NEONMAP1(vcages_f32, aarch64_neon_facge, AddRetType | Add1ArgType),
2417   NEONMAP1(vcagtd_f64, aarch64_neon_facgt, AddRetType | Add1ArgType),
2418   NEONMAP1(vcagts_f32, aarch64_neon_facgt, AddRetType | Add1ArgType),
2419   NEONMAP1(vcaled_f64, aarch64_neon_facge, AddRetType | Add1ArgType),
2420   NEONMAP1(vcales_f32, aarch64_neon_facge, AddRetType | Add1ArgType),
2421   NEONMAP1(vcaltd_f64, aarch64_neon_facgt, AddRetType | Add1ArgType),
2422   NEONMAP1(vcalts_f32, aarch64_neon_facgt, AddRetType | Add1ArgType),
2423   NEONMAP1(vcvtad_s64_f64, aarch64_neon_fcvtas, AddRetType | Add1ArgType),
2424   NEONMAP1(vcvtad_u64_f64, aarch64_neon_fcvtau, AddRetType | Add1ArgType),
2425   NEONMAP1(vcvtas_s32_f32, aarch64_neon_fcvtas, AddRetType | Add1ArgType),
2426   NEONMAP1(vcvtas_u32_f32, aarch64_neon_fcvtau, AddRetType | Add1ArgType),
2427   NEONMAP1(vcvtd_n_f64_s64, aarch64_neon_vcvtfxs2fp, AddRetType | Add1ArgType),
2428   NEONMAP1(vcvtd_n_f64_u64, aarch64_neon_vcvtfxu2fp, AddRetType | Add1ArgType),
2429   NEONMAP1(vcvtd_n_s64_f64, aarch64_neon_vcvtfp2fxs, AddRetType | Add1ArgType),
2430   NEONMAP1(vcvtd_n_u64_f64, aarch64_neon_vcvtfp2fxu, AddRetType | Add1ArgType),
2431   NEONMAP1(vcvtmd_s64_f64, aarch64_neon_fcvtms, AddRetType | Add1ArgType),
2432   NEONMAP1(vcvtmd_u64_f64, aarch64_neon_fcvtmu, AddRetType | Add1ArgType),
2433   NEONMAP1(vcvtms_s32_f32, aarch64_neon_fcvtms, AddRetType | Add1ArgType),
2434   NEONMAP1(vcvtms_u32_f32, aarch64_neon_fcvtmu, AddRetType | Add1ArgType),
2435   NEONMAP1(vcvtnd_s64_f64, aarch64_neon_fcvtns, AddRetType | Add1ArgType),
2436   NEONMAP1(vcvtnd_u64_f64, aarch64_neon_fcvtnu, AddRetType | Add1ArgType),
2437   NEONMAP1(vcvtns_s32_f32, aarch64_neon_fcvtns, AddRetType | Add1ArgType),
2438   NEONMAP1(vcvtns_u32_f32, aarch64_neon_fcvtnu, AddRetType | Add1ArgType),
2439   NEONMAP1(vcvtpd_s64_f64, aarch64_neon_fcvtps, AddRetType | Add1ArgType),
2440   NEONMAP1(vcvtpd_u64_f64, aarch64_neon_fcvtpu, AddRetType | Add1ArgType),
2441   NEONMAP1(vcvtps_s32_f32, aarch64_neon_fcvtps, AddRetType | Add1ArgType),
2442   NEONMAP1(vcvtps_u32_f32, aarch64_neon_fcvtpu, AddRetType | Add1ArgType),
2443   NEONMAP1(vcvts_n_f32_s32, aarch64_neon_vcvtfxs2fp, AddRetType | Add1ArgType),
2444   NEONMAP1(vcvts_n_f32_u32, aarch64_neon_vcvtfxu2fp, AddRetType | Add1ArgType),
2445   NEONMAP1(vcvts_n_s32_f32, aarch64_neon_vcvtfp2fxs, AddRetType | Add1ArgType),
2446   NEONMAP1(vcvts_n_u32_f32, aarch64_neon_vcvtfp2fxu, AddRetType | Add1ArgType),
2447   NEONMAP1(vcvtxd_f32_f64, aarch64_sisd_fcvtxn, 0),
2448   NEONMAP1(vmaxnmv_f32, aarch64_neon_fmaxnmv, AddRetType | Add1ArgType),
2449   NEONMAP1(vmaxnmvq_f32, aarch64_neon_fmaxnmv, AddRetType | Add1ArgType),
2450   NEONMAP1(vmaxnmvq_f64, aarch64_neon_fmaxnmv, AddRetType | Add1ArgType),
2451   NEONMAP1(vmaxv_f32, aarch64_neon_fmaxv, AddRetType | Add1ArgType),
2452   NEONMAP1(vmaxv_s32, aarch64_neon_smaxv, AddRetType | Add1ArgType),
2453   NEONMAP1(vmaxv_u32, aarch64_neon_umaxv, AddRetType | Add1ArgType),
2454   NEONMAP1(vmaxvq_f32, aarch64_neon_fmaxv, AddRetType | Add1ArgType),
2455   NEONMAP1(vmaxvq_f64, aarch64_neon_fmaxv, AddRetType | Add1ArgType),
2456   NEONMAP1(vmaxvq_s32, aarch64_neon_smaxv, AddRetType | Add1ArgType),
2457   NEONMAP1(vmaxvq_u32, aarch64_neon_umaxv, AddRetType | Add1ArgType),
2458   NEONMAP1(vminnmv_f32, aarch64_neon_fminnmv, AddRetType | Add1ArgType),
2459   NEONMAP1(vminnmvq_f32, aarch64_neon_fminnmv, AddRetType | Add1ArgType),
2460   NEONMAP1(vminnmvq_f64, aarch64_neon_fminnmv, AddRetType | Add1ArgType),
2461   NEONMAP1(vminv_f32, aarch64_neon_fminv, AddRetType | Add1ArgType),
2462   NEONMAP1(vminv_s32, aarch64_neon_sminv, AddRetType | Add1ArgType),
2463   NEONMAP1(vminv_u32, aarch64_neon_uminv, AddRetType | Add1ArgType),
2464   NEONMAP1(vminvq_f32, aarch64_neon_fminv, AddRetType | Add1ArgType),
2465   NEONMAP1(vminvq_f64, aarch64_neon_fminv, AddRetType | Add1ArgType),
2466   NEONMAP1(vminvq_s32, aarch64_neon_sminv, AddRetType | Add1ArgType),
2467   NEONMAP1(vminvq_u32, aarch64_neon_uminv, AddRetType | Add1ArgType),
2468   NEONMAP1(vmull_p64, aarch64_neon_pmull64, 0),
2469   NEONMAP1(vmulxd_f64, aarch64_neon_fmulx, Add1ArgType),
2470   NEONMAP1(vmulxs_f32, aarch64_neon_fmulx, Add1ArgType),
2471   NEONMAP1(vpaddd_s64, aarch64_neon_uaddv, AddRetType | Add1ArgType),
2472   NEONMAP1(vpaddd_u64, aarch64_neon_uaddv, AddRetType | Add1ArgType),
2473   NEONMAP1(vpmaxnmqd_f64, aarch64_neon_fmaxnmv, AddRetType | Add1ArgType),
2474   NEONMAP1(vpmaxnms_f32, aarch64_neon_fmaxnmv, AddRetType | Add1ArgType),
2475   NEONMAP1(vpmaxqd_f64, aarch64_neon_fmaxv, AddRetType | Add1ArgType),
2476   NEONMAP1(vpmaxs_f32, aarch64_neon_fmaxv, AddRetType | Add1ArgType),
2477   NEONMAP1(vpminnmqd_f64, aarch64_neon_fminnmv, AddRetType | Add1ArgType),
2478   NEONMAP1(vpminnms_f32, aarch64_neon_fminnmv, AddRetType | Add1ArgType),
2479   NEONMAP1(vpminqd_f64, aarch64_neon_fminv, AddRetType | Add1ArgType),
2480   NEONMAP1(vpmins_f32, aarch64_neon_fminv, AddRetType | Add1ArgType),
2481   NEONMAP1(vqabsb_s8, aarch64_neon_sqabs, Vectorize1ArgType | Use64BitVectors),
2482   NEONMAP1(vqabsd_s64, aarch64_neon_sqabs, Add1ArgType),
2483   NEONMAP1(vqabsh_s16, aarch64_neon_sqabs, Vectorize1ArgType | Use64BitVectors),
2484   NEONMAP1(vqabss_s32, aarch64_neon_sqabs, Add1ArgType),
2485   NEONMAP1(vqaddb_s8, aarch64_neon_sqadd, Vectorize1ArgType | Use64BitVectors),
2486   NEONMAP1(vqaddb_u8, aarch64_neon_uqadd, Vectorize1ArgType | Use64BitVectors),
2487   NEONMAP1(vqaddd_s64, aarch64_neon_sqadd, Add1ArgType),
2488   NEONMAP1(vqaddd_u64, aarch64_neon_uqadd, Add1ArgType),
2489   NEONMAP1(vqaddh_s16, aarch64_neon_sqadd, Vectorize1ArgType | Use64BitVectors),
2490   NEONMAP1(vqaddh_u16, aarch64_neon_uqadd, Vectorize1ArgType | Use64BitVectors),
2491   NEONMAP1(vqadds_s32, aarch64_neon_sqadd, Add1ArgType),
2492   NEONMAP1(vqadds_u32, aarch64_neon_uqadd, Add1ArgType),
2493   NEONMAP1(vqdmulhh_s16, aarch64_neon_sqdmulh, Vectorize1ArgType | Use64BitVectors),
2494   NEONMAP1(vqdmulhs_s32, aarch64_neon_sqdmulh, Add1ArgType),
2495   NEONMAP1(vqdmullh_s16, aarch64_neon_sqdmull, VectorRet | Use128BitVectors),
2496   NEONMAP1(vqdmulls_s32, aarch64_neon_sqdmulls_scalar, 0),
2497   NEONMAP1(vqmovnd_s64, aarch64_neon_scalar_sqxtn, AddRetType | Add1ArgType),
2498   NEONMAP1(vqmovnd_u64, aarch64_neon_scalar_uqxtn, AddRetType | Add1ArgType),
2499   NEONMAP1(vqmovnh_s16, aarch64_neon_sqxtn, VectorRet | Use64BitVectors),
2500   NEONMAP1(vqmovnh_u16, aarch64_neon_uqxtn, VectorRet | Use64BitVectors),
2501   NEONMAP1(vqmovns_s32, aarch64_neon_sqxtn, VectorRet | Use64BitVectors),
2502   NEONMAP1(vqmovns_u32, aarch64_neon_uqxtn, VectorRet | Use64BitVectors),
2503   NEONMAP1(vqmovund_s64, aarch64_neon_scalar_sqxtun, AddRetType | Add1ArgType),
2504   NEONMAP1(vqmovunh_s16, aarch64_neon_sqxtun, VectorRet | Use64BitVectors),
2505   NEONMAP1(vqmovuns_s32, aarch64_neon_sqxtun, VectorRet | Use64BitVectors),
2506   NEONMAP1(vqnegb_s8, aarch64_neon_sqneg, Vectorize1ArgType | Use64BitVectors),
2507   NEONMAP1(vqnegd_s64, aarch64_neon_sqneg, Add1ArgType),
2508   NEONMAP1(vqnegh_s16, aarch64_neon_sqneg, Vectorize1ArgType | Use64BitVectors),
2509   NEONMAP1(vqnegs_s32, aarch64_neon_sqneg, Add1ArgType),
2510   NEONMAP1(vqrdmulhh_s16, aarch64_neon_sqrdmulh, Vectorize1ArgType | Use64BitVectors),
2511   NEONMAP1(vqrdmulhs_s32, aarch64_neon_sqrdmulh, Add1ArgType),
2512   NEONMAP1(vqrshlb_s8, aarch64_neon_sqrshl, Vectorize1ArgType | Use64BitVectors),
2513   NEONMAP1(vqrshlb_u8, aarch64_neon_uqrshl, Vectorize1ArgType | Use64BitVectors),
2514   NEONMAP1(vqrshld_s64, aarch64_neon_sqrshl, Add1ArgType),
2515   NEONMAP1(vqrshld_u64, aarch64_neon_uqrshl, Add1ArgType),
2516   NEONMAP1(vqrshlh_s16, aarch64_neon_sqrshl, Vectorize1ArgType | Use64BitVectors),
2517   NEONMAP1(vqrshlh_u16, aarch64_neon_uqrshl, Vectorize1ArgType | Use64BitVectors),
2518   NEONMAP1(vqrshls_s32, aarch64_neon_sqrshl, Add1ArgType),
2519   NEONMAP1(vqrshls_u32, aarch64_neon_uqrshl, Add1ArgType),
2520   NEONMAP1(vqrshrnd_n_s64, aarch64_neon_sqrshrn, AddRetType),
2521   NEONMAP1(vqrshrnd_n_u64, aarch64_neon_uqrshrn, AddRetType),
2522   NEONMAP1(vqrshrnh_n_s16, aarch64_neon_sqrshrn, VectorRet | Use64BitVectors),
2523   NEONMAP1(vqrshrnh_n_u16, aarch64_neon_uqrshrn, VectorRet | Use64BitVectors),
2524   NEONMAP1(vqrshrns_n_s32, aarch64_neon_sqrshrn, VectorRet | Use64BitVectors),
2525   NEONMAP1(vqrshrns_n_u32, aarch64_neon_uqrshrn, VectorRet | Use64BitVectors),
2526   NEONMAP1(vqrshrund_n_s64, aarch64_neon_sqrshrun, AddRetType),
2527   NEONMAP1(vqrshrunh_n_s16, aarch64_neon_sqrshrun, VectorRet | Use64BitVectors),
2528   NEONMAP1(vqrshruns_n_s32, aarch64_neon_sqrshrun, VectorRet | Use64BitVectors),
2529   NEONMAP1(vqshlb_n_s8, aarch64_neon_sqshl, Vectorize1ArgType | Use64BitVectors),
2530   NEONMAP1(vqshlb_n_u8, aarch64_neon_uqshl, Vectorize1ArgType | Use64BitVectors),
2531   NEONMAP1(vqshlb_s8, aarch64_neon_sqshl, Vectorize1ArgType | Use64BitVectors),
2532   NEONMAP1(vqshlb_u8, aarch64_neon_uqshl, Vectorize1ArgType | Use64BitVectors),
2533   NEONMAP1(vqshld_s64, aarch64_neon_sqshl, Add1ArgType),
2534   NEONMAP1(vqshld_u64, aarch64_neon_uqshl, Add1ArgType),
2535   NEONMAP1(vqshlh_n_s16, aarch64_neon_sqshl, Vectorize1ArgType | Use64BitVectors),
2536   NEONMAP1(vqshlh_n_u16, aarch64_neon_uqshl, Vectorize1ArgType | Use64BitVectors),
2537   NEONMAP1(vqshlh_s16, aarch64_neon_sqshl, Vectorize1ArgType | Use64BitVectors),
2538   NEONMAP1(vqshlh_u16, aarch64_neon_uqshl, Vectorize1ArgType | Use64BitVectors),
2539   NEONMAP1(vqshls_n_s32, aarch64_neon_sqshl, Add1ArgType),
2540   NEONMAP1(vqshls_n_u32, aarch64_neon_uqshl, Add1ArgType),
2541   NEONMAP1(vqshls_s32, aarch64_neon_sqshl, Add1ArgType),
2542   NEONMAP1(vqshls_u32, aarch64_neon_uqshl, Add1ArgType),
2543   NEONMAP1(vqshlub_n_s8, aarch64_neon_sqshlu, Vectorize1ArgType | Use64BitVectors),
2544   NEONMAP1(vqshluh_n_s16, aarch64_neon_sqshlu, Vectorize1ArgType | Use64BitVectors),
2545   NEONMAP1(vqshlus_n_s32, aarch64_neon_sqshlu, Add1ArgType),
2546   NEONMAP1(vqshrnd_n_s64, aarch64_neon_sqshrn, AddRetType),
2547   NEONMAP1(vqshrnd_n_u64, aarch64_neon_uqshrn, AddRetType),
2548   NEONMAP1(vqshrnh_n_s16, aarch64_neon_sqshrn, VectorRet | Use64BitVectors),
2549   NEONMAP1(vqshrnh_n_u16, aarch64_neon_uqshrn, VectorRet | Use64BitVectors),
2550   NEONMAP1(vqshrns_n_s32, aarch64_neon_sqshrn, VectorRet | Use64BitVectors),
2551   NEONMAP1(vqshrns_n_u32, aarch64_neon_uqshrn, VectorRet | Use64BitVectors),
2552   NEONMAP1(vqshrund_n_s64, aarch64_neon_sqshrun, AddRetType),
2553   NEONMAP1(vqshrunh_n_s16, aarch64_neon_sqshrun, VectorRet | Use64BitVectors),
2554   NEONMAP1(vqshruns_n_s32, aarch64_neon_sqshrun, VectorRet | Use64BitVectors),
2555   NEONMAP1(vqsubb_s8, aarch64_neon_sqsub, Vectorize1ArgType | Use64BitVectors),
2556   NEONMAP1(vqsubb_u8, aarch64_neon_uqsub, Vectorize1ArgType | Use64BitVectors),
2557   NEONMAP1(vqsubd_s64, aarch64_neon_sqsub, Add1ArgType),
2558   NEONMAP1(vqsubd_u64, aarch64_neon_uqsub, Add1ArgType),
2559   NEONMAP1(vqsubh_s16, aarch64_neon_sqsub, Vectorize1ArgType | Use64BitVectors),
2560   NEONMAP1(vqsubh_u16, aarch64_neon_uqsub, Vectorize1ArgType | Use64BitVectors),
2561   NEONMAP1(vqsubs_s32, aarch64_neon_sqsub, Add1ArgType),
2562   NEONMAP1(vqsubs_u32, aarch64_neon_uqsub, Add1ArgType),
2563   NEONMAP1(vrecped_f64, aarch64_neon_frecpe, Add1ArgType),
2564   NEONMAP1(vrecpes_f32, aarch64_neon_frecpe, Add1ArgType),
2565   NEONMAP1(vrecpxd_f64, aarch64_neon_frecpx, Add1ArgType),
2566   NEONMAP1(vrecpxs_f32, aarch64_neon_frecpx, Add1ArgType),
2567   NEONMAP1(vrshld_s64, aarch64_neon_srshl, Add1ArgType),
2568   NEONMAP1(vrshld_u64, aarch64_neon_urshl, Add1ArgType),
2569   NEONMAP1(vrsqrted_f64, aarch64_neon_frsqrte, Add1ArgType),
2570   NEONMAP1(vrsqrtes_f32, aarch64_neon_frsqrte, Add1ArgType),
2571   NEONMAP1(vrsqrtsd_f64, aarch64_neon_frsqrts, Add1ArgType),
2572   NEONMAP1(vrsqrtss_f32, aarch64_neon_frsqrts, Add1ArgType),
2573   NEONMAP1(vsha1cq_u32, aarch64_crypto_sha1c, 0),
2574   NEONMAP1(vsha1h_u32, aarch64_crypto_sha1h, 0),
2575   NEONMAP1(vsha1mq_u32, aarch64_crypto_sha1m, 0),
2576   NEONMAP1(vsha1pq_u32, aarch64_crypto_sha1p, 0),
2577   NEONMAP1(vshld_s64, aarch64_neon_sshl, Add1ArgType),
2578   NEONMAP1(vshld_u64, aarch64_neon_ushl, Add1ArgType),
2579   NEONMAP1(vslid_n_s64, aarch64_neon_vsli, Vectorize1ArgType),
2580   NEONMAP1(vslid_n_u64, aarch64_neon_vsli, Vectorize1ArgType),
2581   NEONMAP1(vsqaddb_u8, aarch64_neon_usqadd, Vectorize1ArgType | Use64BitVectors),
2582   NEONMAP1(vsqaddd_u64, aarch64_neon_usqadd, Add1ArgType),
2583   NEONMAP1(vsqaddh_u16, aarch64_neon_usqadd, Vectorize1ArgType | Use64BitVectors),
2584   NEONMAP1(vsqadds_u32, aarch64_neon_usqadd, Add1ArgType),
2585   NEONMAP1(vsrid_n_s64, aarch64_neon_vsri, Vectorize1ArgType),
2586   NEONMAP1(vsrid_n_u64, aarch64_neon_vsri, Vectorize1ArgType),
2587   NEONMAP1(vuqaddb_s8, aarch64_neon_suqadd, Vectorize1ArgType | Use64BitVectors),
2588   NEONMAP1(vuqaddd_s64, aarch64_neon_suqadd, Add1ArgType),
2589   NEONMAP1(vuqaddh_s16, aarch64_neon_suqadd, Vectorize1ArgType | Use64BitVectors),
2590   NEONMAP1(vuqadds_s32, aarch64_neon_suqadd, Add1ArgType),
2591 };
2592 
2593 #undef NEONMAP0
2594 #undef NEONMAP1
2595 #undef NEONMAP2
2596 
2597 static bool NEONSIMDIntrinsicsProvenSorted = false;
2598 
2599 static bool AArch64SIMDIntrinsicsProvenSorted = false;
2600 static bool AArch64SISDIntrinsicsProvenSorted = false;
2601 
2602 
2603 static const NeonIntrinsicInfo *
2604 findNeonIntrinsicInMap(ArrayRef<NeonIntrinsicInfo> IntrinsicMap,
2605                        unsigned BuiltinID, bool &MapProvenSorted) {
2606 
2607 #ifndef NDEBUG
2608   if (!MapProvenSorted) {
2609     // FIXME: use std::is_sorted once C++11 is allowed
2610     for (unsigned i = 0; i < IntrinsicMap.size() - 1; ++i)
2611       assert(IntrinsicMap[i].BuiltinID <= IntrinsicMap[i + 1].BuiltinID);
2612     MapProvenSorted = true;
2613   }
2614 #endif
2615 
2616   const NeonIntrinsicInfo *Builtin =
2617       std::lower_bound(IntrinsicMap.begin(), IntrinsicMap.end(), BuiltinID);
2618 
2619   if (Builtin != IntrinsicMap.end() && Builtin->BuiltinID == BuiltinID)
2620     return Builtin;
2621 
2622   return nullptr;
2623 }
2624 
2625 Function *CodeGenFunction::LookupNeonLLVMIntrinsic(unsigned IntrinsicID,
2626                                                    unsigned Modifier,
2627                                                    llvm::Type *ArgType,
2628                                                    const CallExpr *E) {
2629   int VectorSize = 0;
2630   if (Modifier & Use64BitVectors)
2631     VectorSize = 64;
2632   else if (Modifier & Use128BitVectors)
2633     VectorSize = 128;
2634 
2635   // Return type.
2636   SmallVector<llvm::Type *, 3> Tys;
2637   if (Modifier & AddRetType) {
2638     llvm::Type *Ty = ConvertType(E->getCallReturnType(getContext()));
2639     if (Modifier & VectorizeRetType)
2640       Ty = llvm::VectorType::get(
2641           Ty, VectorSize ? VectorSize / Ty->getPrimitiveSizeInBits() : 1);
2642 
2643     Tys.push_back(Ty);
2644   }
2645 
2646   // Arguments.
2647   if (Modifier & VectorizeArgTypes) {
2648     int Elts = VectorSize ? VectorSize / ArgType->getPrimitiveSizeInBits() : 1;
2649     ArgType = llvm::VectorType::get(ArgType, Elts);
2650   }
2651 
2652   if (Modifier & (Add1ArgType | Add2ArgTypes))
2653     Tys.push_back(ArgType);
2654 
2655   if (Modifier & Add2ArgTypes)
2656     Tys.push_back(ArgType);
2657 
2658   if (Modifier & InventFloatType)
2659     Tys.push_back(FloatTy);
2660 
2661   return CGM.getIntrinsic(IntrinsicID, Tys);
2662 }
2663 
2664 static Value *EmitCommonNeonSISDBuiltinExpr(CodeGenFunction &CGF,
2665                                             const NeonIntrinsicInfo &SISDInfo,
2666                                             SmallVectorImpl<Value *> &Ops,
2667                                             const CallExpr *E) {
2668   unsigned BuiltinID = SISDInfo.BuiltinID;
2669   unsigned int Int = SISDInfo.LLVMIntrinsic;
2670   unsigned Modifier = SISDInfo.TypeModifier;
2671   const char *s = SISDInfo.NameHint;
2672 
2673   switch (BuiltinID) {
2674   case NEON::BI__builtin_neon_vcled_s64:
2675   case NEON::BI__builtin_neon_vcled_u64:
2676   case NEON::BI__builtin_neon_vcles_f32:
2677   case NEON::BI__builtin_neon_vcled_f64:
2678   case NEON::BI__builtin_neon_vcltd_s64:
2679   case NEON::BI__builtin_neon_vcltd_u64:
2680   case NEON::BI__builtin_neon_vclts_f32:
2681   case NEON::BI__builtin_neon_vcltd_f64:
2682   case NEON::BI__builtin_neon_vcales_f32:
2683   case NEON::BI__builtin_neon_vcaled_f64:
2684   case NEON::BI__builtin_neon_vcalts_f32:
2685   case NEON::BI__builtin_neon_vcaltd_f64:
2686     // Only one direction of comparisons actually exist, cmle is actually a cmge
2687     // with swapped operands. The table gives us the right intrinsic but we
2688     // still need to do the swap.
2689     std::swap(Ops[0], Ops[1]);
2690     break;
2691   }
2692 
2693   assert(Int && "Generic code assumes a valid intrinsic");
2694 
2695   // Determine the type(s) of this overloaded AArch64 intrinsic.
2696   const Expr *Arg = E->getArg(0);
2697   llvm::Type *ArgTy = CGF.ConvertType(Arg->getType());
2698   Function *F = CGF.LookupNeonLLVMIntrinsic(Int, Modifier, ArgTy, E);
2699 
2700   int j = 0;
2701   ConstantInt *C0 = ConstantInt::get(CGF.SizeTy, 0);
2702   for (Function::const_arg_iterator ai = F->arg_begin(), ae = F->arg_end();
2703        ai != ae; ++ai, ++j) {
2704     llvm::Type *ArgTy = ai->getType();
2705     if (Ops[j]->getType()->getPrimitiveSizeInBits() ==
2706              ArgTy->getPrimitiveSizeInBits())
2707       continue;
2708 
2709     assert(ArgTy->isVectorTy() && !Ops[j]->getType()->isVectorTy());
2710     // The constant argument to an _n_ intrinsic always has Int32Ty, so truncate
2711     // it before inserting.
2712     Ops[j] =
2713         CGF.Builder.CreateTruncOrBitCast(Ops[j], ArgTy->getVectorElementType());
2714     Ops[j] =
2715         CGF.Builder.CreateInsertElement(UndefValue::get(ArgTy), Ops[j], C0);
2716   }
2717 
2718   Value *Result = CGF.EmitNeonCall(F, Ops, s);
2719   llvm::Type *ResultType = CGF.ConvertType(E->getType());
2720   if (ResultType->getPrimitiveSizeInBits() <
2721       Result->getType()->getPrimitiveSizeInBits())
2722     return CGF.Builder.CreateExtractElement(Result, C0);
2723 
2724   return CGF.Builder.CreateBitCast(Result, ResultType, s);
2725 }
2726 
2727 Value *CodeGenFunction::EmitCommonNeonBuiltinExpr(
2728     unsigned BuiltinID, unsigned LLVMIntrinsic, unsigned AltLLVMIntrinsic,
2729     const char *NameHint, unsigned Modifier, const CallExpr *E,
2730     SmallVectorImpl<llvm::Value *> &Ops, Address PtrOp0, Address PtrOp1) {
2731   // Get the last argument, which specifies the vector type.
2732   llvm::APSInt NeonTypeConst;
2733   const Expr *Arg = E->getArg(E->getNumArgs() - 1);
2734   if (!Arg->isIntegerConstantExpr(NeonTypeConst, getContext()))
2735     return nullptr;
2736 
2737   // Determine the type of this overloaded NEON intrinsic.
2738   NeonTypeFlags Type(NeonTypeConst.getZExtValue());
2739   bool Usgn = Type.isUnsigned();
2740   bool Quad = Type.isQuad();
2741 
2742   llvm::VectorType *VTy = GetNeonType(this, Type);
2743   llvm::Type *Ty = VTy;
2744   if (!Ty)
2745     return nullptr;
2746 
2747   auto getAlignmentValue32 = [&](Address addr) -> Value* {
2748     return Builder.getInt32(addr.getAlignment().getQuantity());
2749   };
2750 
2751   unsigned Int = LLVMIntrinsic;
2752   if ((Modifier & UnsignedAlts) && !Usgn)
2753     Int = AltLLVMIntrinsic;
2754 
2755   switch (BuiltinID) {
2756   default: break;
2757   case NEON::BI__builtin_neon_vabs_v:
2758   case NEON::BI__builtin_neon_vabsq_v:
2759     if (VTy->getElementType()->isFloatingPointTy())
2760       return EmitNeonCall(CGM.getIntrinsic(Intrinsic::fabs, Ty), Ops, "vabs");
2761     return EmitNeonCall(CGM.getIntrinsic(LLVMIntrinsic, Ty), Ops, "vabs");
2762   case NEON::BI__builtin_neon_vaddhn_v: {
2763     llvm::VectorType *SrcTy =
2764         llvm::VectorType::getExtendedElementVectorType(VTy);
2765 
2766     // %sum = add <4 x i32> %lhs, %rhs
2767     Ops[0] = Builder.CreateBitCast(Ops[0], SrcTy);
2768     Ops[1] = Builder.CreateBitCast(Ops[1], SrcTy);
2769     Ops[0] = Builder.CreateAdd(Ops[0], Ops[1], "vaddhn");
2770 
2771     // %high = lshr <4 x i32> %sum, <i32 16, i32 16, i32 16, i32 16>
2772     Constant *ShiftAmt =
2773         ConstantInt::get(SrcTy, SrcTy->getScalarSizeInBits() / 2);
2774     Ops[0] = Builder.CreateLShr(Ops[0], ShiftAmt, "vaddhn");
2775 
2776     // %res = trunc <4 x i32> %high to <4 x i16>
2777     return Builder.CreateTrunc(Ops[0], VTy, "vaddhn");
2778   }
2779   case NEON::BI__builtin_neon_vcale_v:
2780   case NEON::BI__builtin_neon_vcaleq_v:
2781   case NEON::BI__builtin_neon_vcalt_v:
2782   case NEON::BI__builtin_neon_vcaltq_v:
2783     std::swap(Ops[0], Ops[1]);
2784   case NEON::BI__builtin_neon_vcage_v:
2785   case NEON::BI__builtin_neon_vcageq_v:
2786   case NEON::BI__builtin_neon_vcagt_v:
2787   case NEON::BI__builtin_neon_vcagtq_v: {
2788     llvm::Type *VecFlt = llvm::VectorType::get(
2789         VTy->getScalarSizeInBits() == 32 ? FloatTy : DoubleTy,
2790         VTy->getNumElements());
2791     llvm::Type *Tys[] = { VTy, VecFlt };
2792     Function *F = CGM.getIntrinsic(LLVMIntrinsic, Tys);
2793     return EmitNeonCall(F, Ops, NameHint);
2794   }
2795   case NEON::BI__builtin_neon_vclz_v:
2796   case NEON::BI__builtin_neon_vclzq_v:
2797     // We generate target-independent intrinsic, which needs a second argument
2798     // for whether or not clz of zero is undefined; on ARM it isn't.
2799     Ops.push_back(Builder.getInt1(getTarget().isCLZForZeroUndef()));
2800     break;
2801   case NEON::BI__builtin_neon_vcvt_f32_v:
2802   case NEON::BI__builtin_neon_vcvtq_f32_v:
2803     Ops[0] = Builder.CreateBitCast(Ops[0], Ty);
2804     Ty = GetNeonType(this, NeonTypeFlags(NeonTypeFlags::Float32, false, Quad));
2805     return Usgn ? Builder.CreateUIToFP(Ops[0], Ty, "vcvt")
2806                 : Builder.CreateSIToFP(Ops[0], Ty, "vcvt");
2807   case NEON::BI__builtin_neon_vcvt_n_f32_v:
2808   case NEON::BI__builtin_neon_vcvt_n_f64_v:
2809   case NEON::BI__builtin_neon_vcvtq_n_f32_v:
2810   case NEON::BI__builtin_neon_vcvtq_n_f64_v: {
2811     llvm::Type *Tys[2] = { GetFloatNeonType(this, Type), Ty };
2812     Int = Usgn ? LLVMIntrinsic : AltLLVMIntrinsic;
2813     Function *F = CGM.getIntrinsic(Int, Tys);
2814     return EmitNeonCall(F, Ops, "vcvt_n");
2815   }
2816   case NEON::BI__builtin_neon_vcvt_n_s32_v:
2817   case NEON::BI__builtin_neon_vcvt_n_u32_v:
2818   case NEON::BI__builtin_neon_vcvt_n_s64_v:
2819   case NEON::BI__builtin_neon_vcvt_n_u64_v:
2820   case NEON::BI__builtin_neon_vcvtq_n_s32_v:
2821   case NEON::BI__builtin_neon_vcvtq_n_u32_v:
2822   case NEON::BI__builtin_neon_vcvtq_n_s64_v:
2823   case NEON::BI__builtin_neon_vcvtq_n_u64_v: {
2824     llvm::Type *Tys[2] = { Ty, GetFloatNeonType(this, Type) };
2825     Function *F = CGM.getIntrinsic(LLVMIntrinsic, Tys);
2826     return EmitNeonCall(F, Ops, "vcvt_n");
2827   }
2828   case NEON::BI__builtin_neon_vcvt_s32_v:
2829   case NEON::BI__builtin_neon_vcvt_u32_v:
2830   case NEON::BI__builtin_neon_vcvt_s64_v:
2831   case NEON::BI__builtin_neon_vcvt_u64_v:
2832   case NEON::BI__builtin_neon_vcvtq_s32_v:
2833   case NEON::BI__builtin_neon_vcvtq_u32_v:
2834   case NEON::BI__builtin_neon_vcvtq_s64_v:
2835   case NEON::BI__builtin_neon_vcvtq_u64_v: {
2836     Ops[0] = Builder.CreateBitCast(Ops[0], GetFloatNeonType(this, Type));
2837     return Usgn ? Builder.CreateFPToUI(Ops[0], Ty, "vcvt")
2838                 : Builder.CreateFPToSI(Ops[0], Ty, "vcvt");
2839   }
2840   case NEON::BI__builtin_neon_vcvta_s32_v:
2841   case NEON::BI__builtin_neon_vcvta_s64_v:
2842   case NEON::BI__builtin_neon_vcvta_u32_v:
2843   case NEON::BI__builtin_neon_vcvta_u64_v:
2844   case NEON::BI__builtin_neon_vcvtaq_s32_v:
2845   case NEON::BI__builtin_neon_vcvtaq_s64_v:
2846   case NEON::BI__builtin_neon_vcvtaq_u32_v:
2847   case NEON::BI__builtin_neon_vcvtaq_u64_v:
2848   case NEON::BI__builtin_neon_vcvtn_s32_v:
2849   case NEON::BI__builtin_neon_vcvtn_s64_v:
2850   case NEON::BI__builtin_neon_vcvtn_u32_v:
2851   case NEON::BI__builtin_neon_vcvtn_u64_v:
2852   case NEON::BI__builtin_neon_vcvtnq_s32_v:
2853   case NEON::BI__builtin_neon_vcvtnq_s64_v:
2854   case NEON::BI__builtin_neon_vcvtnq_u32_v:
2855   case NEON::BI__builtin_neon_vcvtnq_u64_v:
2856   case NEON::BI__builtin_neon_vcvtp_s32_v:
2857   case NEON::BI__builtin_neon_vcvtp_s64_v:
2858   case NEON::BI__builtin_neon_vcvtp_u32_v:
2859   case NEON::BI__builtin_neon_vcvtp_u64_v:
2860   case NEON::BI__builtin_neon_vcvtpq_s32_v:
2861   case NEON::BI__builtin_neon_vcvtpq_s64_v:
2862   case NEON::BI__builtin_neon_vcvtpq_u32_v:
2863   case NEON::BI__builtin_neon_vcvtpq_u64_v:
2864   case NEON::BI__builtin_neon_vcvtm_s32_v:
2865   case NEON::BI__builtin_neon_vcvtm_s64_v:
2866   case NEON::BI__builtin_neon_vcvtm_u32_v:
2867   case NEON::BI__builtin_neon_vcvtm_u64_v:
2868   case NEON::BI__builtin_neon_vcvtmq_s32_v:
2869   case NEON::BI__builtin_neon_vcvtmq_s64_v:
2870   case NEON::BI__builtin_neon_vcvtmq_u32_v:
2871   case NEON::BI__builtin_neon_vcvtmq_u64_v: {
2872     llvm::Type *Tys[2] = { Ty, GetFloatNeonType(this, Type) };
2873     return EmitNeonCall(CGM.getIntrinsic(LLVMIntrinsic, Tys), Ops, NameHint);
2874   }
2875   case NEON::BI__builtin_neon_vext_v:
2876   case NEON::BI__builtin_neon_vextq_v: {
2877     int CV = cast<ConstantInt>(Ops[2])->getSExtValue();
2878     SmallVector<Constant*, 16> Indices;
2879     for (unsigned i = 0, e = VTy->getNumElements(); i != e; ++i)
2880       Indices.push_back(ConstantInt::get(Int32Ty, i+CV));
2881 
2882     Ops[0] = Builder.CreateBitCast(Ops[0], Ty);
2883     Ops[1] = Builder.CreateBitCast(Ops[1], Ty);
2884     Value *SV = llvm::ConstantVector::get(Indices);
2885     return Builder.CreateShuffleVector(Ops[0], Ops[1], SV, "vext");
2886   }
2887   case NEON::BI__builtin_neon_vfma_v:
2888   case NEON::BI__builtin_neon_vfmaq_v: {
2889     Value *F = CGM.getIntrinsic(Intrinsic::fma, Ty);
2890     Ops[0] = Builder.CreateBitCast(Ops[0], Ty);
2891     Ops[1] = Builder.CreateBitCast(Ops[1], Ty);
2892     Ops[2] = Builder.CreateBitCast(Ops[2], Ty);
2893 
2894     // NEON intrinsic puts accumulator first, unlike the LLVM fma.
2895     return Builder.CreateCall(F, {Ops[1], Ops[2], Ops[0]});
2896   }
2897   case NEON::BI__builtin_neon_vld1_v:
2898   case NEON::BI__builtin_neon_vld1q_v:
2899     Ops.push_back(getAlignmentValue32(PtrOp0));
2900     return EmitNeonCall(CGM.getIntrinsic(LLVMIntrinsic, Ty), Ops, "vld1");
2901   case NEON::BI__builtin_neon_vld2_v:
2902   case NEON::BI__builtin_neon_vld2q_v:
2903   case NEON::BI__builtin_neon_vld3_v:
2904   case NEON::BI__builtin_neon_vld3q_v:
2905   case NEON::BI__builtin_neon_vld4_v:
2906   case NEON::BI__builtin_neon_vld4q_v: {
2907     Function *F = CGM.getIntrinsic(LLVMIntrinsic, Ty);
2908     Value *Align = getAlignmentValue32(PtrOp1);
2909     Ops[1] = Builder.CreateCall(F, {Ops[1], Align}, NameHint);
2910     Ty = llvm::PointerType::getUnqual(Ops[1]->getType());
2911     Ops[0] = Builder.CreateBitCast(Ops[0], Ty);
2912     return Builder.CreateDefaultAlignedStore(Ops[1], Ops[0]);
2913   }
2914   case NEON::BI__builtin_neon_vld1_dup_v:
2915   case NEON::BI__builtin_neon_vld1q_dup_v: {
2916     Value *V = UndefValue::get(Ty);
2917     Ty = llvm::PointerType::getUnqual(VTy->getElementType());
2918     PtrOp0 = Builder.CreateBitCast(PtrOp0, Ty);
2919     LoadInst *Ld = Builder.CreateLoad(PtrOp0);
2920     llvm::Constant *CI = ConstantInt::get(SizeTy, 0);
2921     Ops[0] = Builder.CreateInsertElement(V, Ld, CI);
2922     return EmitNeonSplat(Ops[0], CI);
2923   }
2924   case NEON::BI__builtin_neon_vld2_lane_v:
2925   case NEON::BI__builtin_neon_vld2q_lane_v:
2926   case NEON::BI__builtin_neon_vld3_lane_v:
2927   case NEON::BI__builtin_neon_vld3q_lane_v:
2928   case NEON::BI__builtin_neon_vld4_lane_v:
2929   case NEON::BI__builtin_neon_vld4q_lane_v: {
2930     Function *F = CGM.getIntrinsic(LLVMIntrinsic, Ty);
2931     for (unsigned I = 2; I < Ops.size() - 1; ++I)
2932       Ops[I] = Builder.CreateBitCast(Ops[I], Ty);
2933     Ops.push_back(getAlignmentValue32(PtrOp1));
2934     Ops[1] = Builder.CreateCall(F, makeArrayRef(Ops).slice(1), NameHint);
2935     Ty = llvm::PointerType::getUnqual(Ops[1]->getType());
2936     Ops[0] = Builder.CreateBitCast(Ops[0], Ty);
2937     return Builder.CreateDefaultAlignedStore(Ops[1], Ops[0]);
2938   }
2939   case NEON::BI__builtin_neon_vmovl_v: {
2940     llvm::Type *DTy =llvm::VectorType::getTruncatedElementVectorType(VTy);
2941     Ops[0] = Builder.CreateBitCast(Ops[0], DTy);
2942     if (Usgn)
2943       return Builder.CreateZExt(Ops[0], Ty, "vmovl");
2944     return Builder.CreateSExt(Ops[0], Ty, "vmovl");
2945   }
2946   case NEON::BI__builtin_neon_vmovn_v: {
2947     llvm::Type *QTy = llvm::VectorType::getExtendedElementVectorType(VTy);
2948     Ops[0] = Builder.CreateBitCast(Ops[0], QTy);
2949     return Builder.CreateTrunc(Ops[0], Ty, "vmovn");
2950   }
2951   case NEON::BI__builtin_neon_vmull_v:
2952     // FIXME: the integer vmull operations could be emitted in terms of pure
2953     // LLVM IR (2 exts followed by a mul). Unfortunately LLVM has a habit of
2954     // hoisting the exts outside loops. Until global ISel comes along that can
2955     // see through such movement this leads to bad CodeGen. So we need an
2956     // intrinsic for now.
2957     Int = Usgn ? Intrinsic::arm_neon_vmullu : Intrinsic::arm_neon_vmulls;
2958     Int = Type.isPoly() ? (unsigned)Intrinsic::arm_neon_vmullp : Int;
2959     return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vmull");
2960   case NEON::BI__builtin_neon_vpadal_v:
2961   case NEON::BI__builtin_neon_vpadalq_v: {
2962     // The source operand type has twice as many elements of half the size.
2963     unsigned EltBits = VTy->getElementType()->getPrimitiveSizeInBits();
2964     llvm::Type *EltTy =
2965       llvm::IntegerType::get(getLLVMContext(), EltBits / 2);
2966     llvm::Type *NarrowTy =
2967       llvm::VectorType::get(EltTy, VTy->getNumElements() * 2);
2968     llvm::Type *Tys[2] = { Ty, NarrowTy };
2969     return EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, NameHint);
2970   }
2971   case NEON::BI__builtin_neon_vpaddl_v:
2972   case NEON::BI__builtin_neon_vpaddlq_v: {
2973     // The source operand type has twice as many elements of half the size.
2974     unsigned EltBits = VTy->getElementType()->getPrimitiveSizeInBits();
2975     llvm::Type *EltTy = llvm::IntegerType::get(getLLVMContext(), EltBits / 2);
2976     llvm::Type *NarrowTy =
2977       llvm::VectorType::get(EltTy, VTy->getNumElements() * 2);
2978     llvm::Type *Tys[2] = { Ty, NarrowTy };
2979     return EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vpaddl");
2980   }
2981   case NEON::BI__builtin_neon_vqdmlal_v:
2982   case NEON::BI__builtin_neon_vqdmlsl_v: {
2983     SmallVector<Value *, 2> MulOps(Ops.begin() + 1, Ops.end());
2984     Ops[1] =
2985         EmitNeonCall(CGM.getIntrinsic(LLVMIntrinsic, Ty), MulOps, "vqdmlal");
2986     Ops.resize(2);
2987     return EmitNeonCall(CGM.getIntrinsic(AltLLVMIntrinsic, Ty), Ops, NameHint);
2988   }
2989   case NEON::BI__builtin_neon_vqshl_n_v:
2990   case NEON::BI__builtin_neon_vqshlq_n_v:
2991     return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vqshl_n",
2992                         1, false);
2993   case NEON::BI__builtin_neon_vqshlu_n_v:
2994   case NEON::BI__builtin_neon_vqshluq_n_v:
2995     return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vqshlu_n",
2996                         1, false);
2997   case NEON::BI__builtin_neon_vrecpe_v:
2998   case NEON::BI__builtin_neon_vrecpeq_v:
2999   case NEON::BI__builtin_neon_vrsqrte_v:
3000   case NEON::BI__builtin_neon_vrsqrteq_v:
3001     Int = Ty->isFPOrFPVectorTy() ? LLVMIntrinsic : AltLLVMIntrinsic;
3002     return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, NameHint);
3003 
3004   case NEON::BI__builtin_neon_vrshr_n_v:
3005   case NEON::BI__builtin_neon_vrshrq_n_v:
3006     return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vrshr_n",
3007                         1, true);
3008   case NEON::BI__builtin_neon_vshl_n_v:
3009   case NEON::BI__builtin_neon_vshlq_n_v:
3010     Ops[1] = EmitNeonShiftVector(Ops[1], Ty, false);
3011     return Builder.CreateShl(Builder.CreateBitCast(Ops[0],Ty), Ops[1],
3012                              "vshl_n");
3013   case NEON::BI__builtin_neon_vshll_n_v: {
3014     llvm::Type *SrcTy = llvm::VectorType::getTruncatedElementVectorType(VTy);
3015     Ops[0] = Builder.CreateBitCast(Ops[0], SrcTy);
3016     if (Usgn)
3017       Ops[0] = Builder.CreateZExt(Ops[0], VTy);
3018     else
3019       Ops[0] = Builder.CreateSExt(Ops[0], VTy);
3020     Ops[1] = EmitNeonShiftVector(Ops[1], VTy, false);
3021     return Builder.CreateShl(Ops[0], Ops[1], "vshll_n");
3022   }
3023   case NEON::BI__builtin_neon_vshrn_n_v: {
3024     llvm::Type *SrcTy = llvm::VectorType::getExtendedElementVectorType(VTy);
3025     Ops[0] = Builder.CreateBitCast(Ops[0], SrcTy);
3026     Ops[1] = EmitNeonShiftVector(Ops[1], SrcTy, false);
3027     if (Usgn)
3028       Ops[0] = Builder.CreateLShr(Ops[0], Ops[1]);
3029     else
3030       Ops[0] = Builder.CreateAShr(Ops[0], Ops[1]);
3031     return Builder.CreateTrunc(Ops[0], Ty, "vshrn_n");
3032   }
3033   case NEON::BI__builtin_neon_vshr_n_v:
3034   case NEON::BI__builtin_neon_vshrq_n_v:
3035     return EmitNeonRShiftImm(Ops[0], Ops[1], Ty, Usgn, "vshr_n");
3036   case NEON::BI__builtin_neon_vst1_v:
3037   case NEON::BI__builtin_neon_vst1q_v:
3038   case NEON::BI__builtin_neon_vst2_v:
3039   case NEON::BI__builtin_neon_vst2q_v:
3040   case NEON::BI__builtin_neon_vst3_v:
3041   case NEON::BI__builtin_neon_vst3q_v:
3042   case NEON::BI__builtin_neon_vst4_v:
3043   case NEON::BI__builtin_neon_vst4q_v:
3044   case NEON::BI__builtin_neon_vst2_lane_v:
3045   case NEON::BI__builtin_neon_vst2q_lane_v:
3046   case NEON::BI__builtin_neon_vst3_lane_v:
3047   case NEON::BI__builtin_neon_vst3q_lane_v:
3048   case NEON::BI__builtin_neon_vst4_lane_v:
3049   case NEON::BI__builtin_neon_vst4q_lane_v:
3050     Ops.push_back(getAlignmentValue32(PtrOp0));
3051     return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "");
3052   case NEON::BI__builtin_neon_vsubhn_v: {
3053     llvm::VectorType *SrcTy =
3054         llvm::VectorType::getExtendedElementVectorType(VTy);
3055 
3056     // %sum = add <4 x i32> %lhs, %rhs
3057     Ops[0] = Builder.CreateBitCast(Ops[0], SrcTy);
3058     Ops[1] = Builder.CreateBitCast(Ops[1], SrcTy);
3059     Ops[0] = Builder.CreateSub(Ops[0], Ops[1], "vsubhn");
3060 
3061     // %high = lshr <4 x i32> %sum, <i32 16, i32 16, i32 16, i32 16>
3062     Constant *ShiftAmt =
3063         ConstantInt::get(SrcTy, SrcTy->getScalarSizeInBits() / 2);
3064     Ops[0] = Builder.CreateLShr(Ops[0], ShiftAmt, "vsubhn");
3065 
3066     // %res = trunc <4 x i32> %high to <4 x i16>
3067     return Builder.CreateTrunc(Ops[0], VTy, "vsubhn");
3068   }
3069   case NEON::BI__builtin_neon_vtrn_v:
3070   case NEON::BI__builtin_neon_vtrnq_v: {
3071     Ops[0] = Builder.CreateBitCast(Ops[0], llvm::PointerType::getUnqual(Ty));
3072     Ops[1] = Builder.CreateBitCast(Ops[1], Ty);
3073     Ops[2] = Builder.CreateBitCast(Ops[2], Ty);
3074     Value *SV = nullptr;
3075 
3076     for (unsigned vi = 0; vi != 2; ++vi) {
3077       SmallVector<Constant*, 16> Indices;
3078       for (unsigned i = 0, e = VTy->getNumElements(); i != e; i += 2) {
3079         Indices.push_back(Builder.getInt32(i+vi));
3080         Indices.push_back(Builder.getInt32(i+e+vi));
3081       }
3082       Value *Addr = Builder.CreateConstInBoundsGEP1_32(Ty, Ops[0], vi);
3083       SV = llvm::ConstantVector::get(Indices);
3084       SV = Builder.CreateShuffleVector(Ops[1], Ops[2], SV, "vtrn");
3085       SV = Builder.CreateDefaultAlignedStore(SV, Addr);
3086     }
3087     return SV;
3088   }
3089   case NEON::BI__builtin_neon_vtst_v:
3090   case NEON::BI__builtin_neon_vtstq_v: {
3091     Ops[0] = Builder.CreateBitCast(Ops[0], Ty);
3092     Ops[1] = Builder.CreateBitCast(Ops[1], Ty);
3093     Ops[0] = Builder.CreateAnd(Ops[0], Ops[1]);
3094     Ops[0] = Builder.CreateICmp(ICmpInst::ICMP_NE, Ops[0],
3095                                 ConstantAggregateZero::get(Ty));
3096     return Builder.CreateSExt(Ops[0], Ty, "vtst");
3097   }
3098   case NEON::BI__builtin_neon_vuzp_v:
3099   case NEON::BI__builtin_neon_vuzpq_v: {
3100     Ops[0] = Builder.CreateBitCast(Ops[0], llvm::PointerType::getUnqual(Ty));
3101     Ops[1] = Builder.CreateBitCast(Ops[1], Ty);
3102     Ops[2] = Builder.CreateBitCast(Ops[2], Ty);
3103     Value *SV = nullptr;
3104 
3105     for (unsigned vi = 0; vi != 2; ++vi) {
3106       SmallVector<Constant*, 16> Indices;
3107       for (unsigned i = 0, e = VTy->getNumElements(); i != e; ++i)
3108         Indices.push_back(ConstantInt::get(Int32Ty, 2*i+vi));
3109 
3110       Value *Addr = Builder.CreateConstInBoundsGEP1_32(Ty, Ops[0], vi);
3111       SV = llvm::ConstantVector::get(Indices);
3112       SV = Builder.CreateShuffleVector(Ops[1], Ops[2], SV, "vuzp");
3113       SV = Builder.CreateDefaultAlignedStore(SV, Addr);
3114     }
3115     return SV;
3116   }
3117   case NEON::BI__builtin_neon_vzip_v:
3118   case NEON::BI__builtin_neon_vzipq_v: {
3119     Ops[0] = Builder.CreateBitCast(Ops[0], llvm::PointerType::getUnqual(Ty));
3120     Ops[1] = Builder.CreateBitCast(Ops[1], Ty);
3121     Ops[2] = Builder.CreateBitCast(Ops[2], Ty);
3122     Value *SV = nullptr;
3123 
3124     for (unsigned vi = 0; vi != 2; ++vi) {
3125       SmallVector<Constant*, 16> Indices;
3126       for (unsigned i = 0, e = VTy->getNumElements(); i != e; i += 2) {
3127         Indices.push_back(ConstantInt::get(Int32Ty, (i + vi*e) >> 1));
3128         Indices.push_back(ConstantInt::get(Int32Ty, ((i + vi*e) >> 1)+e));
3129       }
3130       Value *Addr = Builder.CreateConstInBoundsGEP1_32(Ty, Ops[0], vi);
3131       SV = llvm::ConstantVector::get(Indices);
3132       SV = Builder.CreateShuffleVector(Ops[1], Ops[2], SV, "vzip");
3133       SV = Builder.CreateDefaultAlignedStore(SV, Addr);
3134     }
3135     return SV;
3136   }
3137   }
3138 
3139   assert(Int && "Expected valid intrinsic number");
3140 
3141   // Determine the type(s) of this overloaded AArch64 intrinsic.
3142   Function *F = LookupNeonLLVMIntrinsic(Int, Modifier, Ty, E);
3143 
3144   Value *Result = EmitNeonCall(F, Ops, NameHint);
3145   llvm::Type *ResultType = ConvertType(E->getType());
3146   // AArch64 intrinsic one-element vector type cast to
3147   // scalar type expected by the builtin
3148   return Builder.CreateBitCast(Result, ResultType, NameHint);
3149 }
3150 
3151 Value *CodeGenFunction::EmitAArch64CompareBuiltinExpr(
3152     Value *Op, llvm::Type *Ty, const CmpInst::Predicate Fp,
3153     const CmpInst::Predicate Ip, const Twine &Name) {
3154   llvm::Type *OTy = Op->getType();
3155 
3156   // FIXME: this is utterly horrific. We should not be looking at previous
3157   // codegen context to find out what needs doing. Unfortunately TableGen
3158   // currently gives us exactly the same calls for vceqz_f32 and vceqz_s32
3159   // (etc).
3160   if (BitCastInst *BI = dyn_cast<BitCastInst>(Op))
3161     OTy = BI->getOperand(0)->getType();
3162 
3163   Op = Builder.CreateBitCast(Op, OTy);
3164   if (OTy->getScalarType()->isFloatingPointTy()) {
3165     Op = Builder.CreateFCmp(Fp, Op, Constant::getNullValue(OTy));
3166   } else {
3167     Op = Builder.CreateICmp(Ip, Op, Constant::getNullValue(OTy));
3168   }
3169   return Builder.CreateSExt(Op, Ty, Name);
3170 }
3171 
3172 static Value *packTBLDVectorList(CodeGenFunction &CGF, ArrayRef<Value *> Ops,
3173                                  Value *ExtOp, Value *IndexOp,
3174                                  llvm::Type *ResTy, unsigned IntID,
3175                                  const char *Name) {
3176   SmallVector<Value *, 2> TblOps;
3177   if (ExtOp)
3178     TblOps.push_back(ExtOp);
3179 
3180   // Build a vector containing sequential number like (0, 1, 2, ..., 15)
3181   SmallVector<Constant*, 16> Indices;
3182   llvm::VectorType *TblTy = cast<llvm::VectorType>(Ops[0]->getType());
3183   for (unsigned i = 0, e = TblTy->getNumElements(); i != e; ++i) {
3184     Indices.push_back(ConstantInt::get(CGF.Int32Ty, 2*i));
3185     Indices.push_back(ConstantInt::get(CGF.Int32Ty, 2*i+1));
3186   }
3187   Value *SV = llvm::ConstantVector::get(Indices);
3188 
3189   int PairPos = 0, End = Ops.size() - 1;
3190   while (PairPos < End) {
3191     TblOps.push_back(CGF.Builder.CreateShuffleVector(Ops[PairPos],
3192                                                      Ops[PairPos+1], SV, Name));
3193     PairPos += 2;
3194   }
3195 
3196   // If there's an odd number of 64-bit lookup table, fill the high 64-bit
3197   // of the 128-bit lookup table with zero.
3198   if (PairPos == End) {
3199     Value *ZeroTbl = ConstantAggregateZero::get(TblTy);
3200     TblOps.push_back(CGF.Builder.CreateShuffleVector(Ops[PairPos],
3201                                                      ZeroTbl, SV, Name));
3202   }
3203 
3204   Function *TblF;
3205   TblOps.push_back(IndexOp);
3206   TblF = CGF.CGM.getIntrinsic(IntID, ResTy);
3207 
3208   return CGF.EmitNeonCall(TblF, TblOps, Name);
3209 }
3210 
3211 Value *CodeGenFunction::GetValueForARMHint(unsigned BuiltinID) {
3212   unsigned Value;
3213   switch (BuiltinID) {
3214   default:
3215     return nullptr;
3216   case ARM::BI__builtin_arm_nop:
3217     Value = 0;
3218     break;
3219   case ARM::BI__builtin_arm_yield:
3220   case ARM::BI__yield:
3221     Value = 1;
3222     break;
3223   case ARM::BI__builtin_arm_wfe:
3224   case ARM::BI__wfe:
3225     Value = 2;
3226     break;
3227   case ARM::BI__builtin_arm_wfi:
3228   case ARM::BI__wfi:
3229     Value = 3;
3230     break;
3231   case ARM::BI__builtin_arm_sev:
3232   case ARM::BI__sev:
3233     Value = 4;
3234     break;
3235   case ARM::BI__builtin_arm_sevl:
3236   case ARM::BI__sevl:
3237     Value = 5;
3238     break;
3239   }
3240 
3241   return Builder.CreateCall(CGM.getIntrinsic(Intrinsic::arm_hint),
3242                             llvm::ConstantInt::get(Int32Ty, Value));
3243 }
3244 
3245 // Generates the IR for the read/write special register builtin,
3246 // ValueType is the type of the value that is to be written or read,
3247 // RegisterType is the type of the register being written to or read from.
3248 static Value *EmitSpecialRegisterBuiltin(CodeGenFunction &CGF,
3249                                          const CallExpr *E,
3250                                          llvm::Type *RegisterType,
3251                                          llvm::Type *ValueType, bool IsRead) {
3252   // write and register intrinsics only support 32 and 64 bit operations.
3253   assert((RegisterType->isIntegerTy(32) || RegisterType->isIntegerTy(64))
3254           && "Unsupported size for register.");
3255 
3256   CodeGen::CGBuilderTy &Builder = CGF.Builder;
3257   CodeGen::CodeGenModule &CGM = CGF.CGM;
3258   LLVMContext &Context = CGM.getLLVMContext();
3259 
3260   const Expr *SysRegStrExpr = E->getArg(0)->IgnoreParenCasts();
3261   StringRef SysReg = cast<StringLiteral>(SysRegStrExpr)->getString();
3262 
3263   llvm::Metadata *Ops[] = { llvm::MDString::get(Context, SysReg) };
3264   llvm::MDNode *RegName = llvm::MDNode::get(Context, Ops);
3265   llvm::Value *Metadata = llvm::MetadataAsValue::get(Context, RegName);
3266 
3267   llvm::Type *Types[] = { RegisterType };
3268 
3269   bool MixedTypes = RegisterType->isIntegerTy(64) && ValueType->isIntegerTy(32);
3270   assert(!(RegisterType->isIntegerTy(32) && ValueType->isIntegerTy(64))
3271             && "Can't fit 64-bit value in 32-bit register");
3272 
3273   if (IsRead) {
3274     llvm::Value *F = CGM.getIntrinsic(llvm::Intrinsic::read_register, Types);
3275     llvm::Value *Call = Builder.CreateCall(F, Metadata);
3276 
3277     if (MixedTypes)
3278       // Read into 64 bit register and then truncate result to 32 bit.
3279       return Builder.CreateTrunc(Call, ValueType);
3280 
3281     if (ValueType->isPointerTy())
3282       // Have i32/i64 result (Call) but want to return a VoidPtrTy (i8*).
3283       return Builder.CreateIntToPtr(Call, ValueType);
3284 
3285     return Call;
3286   }
3287 
3288   llvm::Value *F = CGM.getIntrinsic(llvm::Intrinsic::write_register, Types);
3289   llvm::Value *ArgValue = CGF.EmitScalarExpr(E->getArg(1));
3290   if (MixedTypes) {
3291     // Extend 32 bit write value to 64 bit to pass to write.
3292     ArgValue = Builder.CreateZExt(ArgValue, RegisterType);
3293     return Builder.CreateCall(F, { Metadata, ArgValue });
3294   }
3295 
3296   if (ValueType->isPointerTy()) {
3297     // Have VoidPtrTy ArgValue but want to return an i32/i64.
3298     ArgValue = Builder.CreatePtrToInt(ArgValue, RegisterType);
3299     return Builder.CreateCall(F, { Metadata, ArgValue });
3300   }
3301 
3302   return Builder.CreateCall(F, { Metadata, ArgValue });
3303 }
3304 
3305 /// Return true if BuiltinID is an overloaded Neon intrinsic with an extra
3306 /// argument that specifies the vector type.
3307 static bool HasExtraNeonArgument(unsigned BuiltinID) {
3308   switch (BuiltinID) {
3309   default: break;
3310   case NEON::BI__builtin_neon_vget_lane_i8:
3311   case NEON::BI__builtin_neon_vget_lane_i16:
3312   case NEON::BI__builtin_neon_vget_lane_i32:
3313   case NEON::BI__builtin_neon_vget_lane_i64:
3314   case NEON::BI__builtin_neon_vget_lane_f32:
3315   case NEON::BI__builtin_neon_vgetq_lane_i8:
3316   case NEON::BI__builtin_neon_vgetq_lane_i16:
3317   case NEON::BI__builtin_neon_vgetq_lane_i32:
3318   case NEON::BI__builtin_neon_vgetq_lane_i64:
3319   case NEON::BI__builtin_neon_vgetq_lane_f32:
3320   case NEON::BI__builtin_neon_vset_lane_i8:
3321   case NEON::BI__builtin_neon_vset_lane_i16:
3322   case NEON::BI__builtin_neon_vset_lane_i32:
3323   case NEON::BI__builtin_neon_vset_lane_i64:
3324   case NEON::BI__builtin_neon_vset_lane_f32:
3325   case NEON::BI__builtin_neon_vsetq_lane_i8:
3326   case NEON::BI__builtin_neon_vsetq_lane_i16:
3327   case NEON::BI__builtin_neon_vsetq_lane_i32:
3328   case NEON::BI__builtin_neon_vsetq_lane_i64:
3329   case NEON::BI__builtin_neon_vsetq_lane_f32:
3330   case NEON::BI__builtin_neon_vsha1h_u32:
3331   case NEON::BI__builtin_neon_vsha1cq_u32:
3332   case NEON::BI__builtin_neon_vsha1pq_u32:
3333   case NEON::BI__builtin_neon_vsha1mq_u32:
3334   case ARM::BI_MoveToCoprocessor:
3335   case ARM::BI_MoveToCoprocessor2:
3336     return false;
3337   }
3338   return true;
3339 }
3340 
3341 Value *CodeGenFunction::EmitARMBuiltinExpr(unsigned BuiltinID,
3342                                            const CallExpr *E) {
3343   if (auto Hint = GetValueForARMHint(BuiltinID))
3344     return Hint;
3345 
3346   if (BuiltinID == ARM::BI__emit) {
3347     bool IsThumb = getTarget().getTriple().getArch() == llvm::Triple::thumb;
3348     llvm::FunctionType *FTy =
3349         llvm::FunctionType::get(VoidTy, /*Variadic=*/false);
3350 
3351     APSInt Value;
3352     if (!E->getArg(0)->EvaluateAsInt(Value, CGM.getContext()))
3353       llvm_unreachable("Sema will ensure that the parameter is constant");
3354 
3355     uint64_t ZExtValue = Value.zextOrTrunc(IsThumb ? 16 : 32).getZExtValue();
3356 
3357     llvm::InlineAsm *Emit =
3358         IsThumb ? InlineAsm::get(FTy, ".inst.n 0x" + utohexstr(ZExtValue), "",
3359                                  /*SideEffects=*/true)
3360                 : InlineAsm::get(FTy, ".inst 0x" + utohexstr(ZExtValue), "",
3361                                  /*SideEffects=*/true);
3362 
3363     return Builder.CreateCall(Emit);
3364   }
3365 
3366   if (BuiltinID == ARM::BI__builtin_arm_dbg) {
3367     Value *Option = EmitScalarExpr(E->getArg(0));
3368     return Builder.CreateCall(CGM.getIntrinsic(Intrinsic::arm_dbg), Option);
3369   }
3370 
3371   if (BuiltinID == ARM::BI__builtin_arm_prefetch) {
3372     Value *Address = EmitScalarExpr(E->getArg(0));
3373     Value *RW      = EmitScalarExpr(E->getArg(1));
3374     Value *IsData  = EmitScalarExpr(E->getArg(2));
3375 
3376     // Locality is not supported on ARM target
3377     Value *Locality = llvm::ConstantInt::get(Int32Ty, 3);
3378 
3379     Value *F = CGM.getIntrinsic(Intrinsic::prefetch);
3380     return Builder.CreateCall(F, {Address, RW, Locality, IsData});
3381   }
3382 
3383   if (BuiltinID == ARM::BI__builtin_arm_rbit) {
3384     return Builder.CreateCall(CGM.getIntrinsic(Intrinsic::arm_rbit),
3385                                                EmitScalarExpr(E->getArg(0)),
3386                               "rbit");
3387   }
3388 
3389   if (BuiltinID == ARM::BI__clear_cache) {
3390     assert(E->getNumArgs() == 2 && "__clear_cache takes 2 arguments");
3391     const FunctionDecl *FD = E->getDirectCallee();
3392     Value *Ops[2];
3393     for (unsigned i = 0; i < 2; i++)
3394       Ops[i] = EmitScalarExpr(E->getArg(i));
3395     llvm::Type *Ty = CGM.getTypes().ConvertType(FD->getType());
3396     llvm::FunctionType *FTy = cast<llvm::FunctionType>(Ty);
3397     StringRef Name = FD->getName();
3398     return EmitNounwindRuntimeCall(CGM.CreateRuntimeFunction(FTy, Name), Ops);
3399   }
3400 
3401   if (BuiltinID == ARM::BI__builtin_arm_ldrexd ||
3402       ((BuiltinID == ARM::BI__builtin_arm_ldrex ||
3403         BuiltinID == ARM::BI__builtin_arm_ldaex) &&
3404        getContext().getTypeSize(E->getType()) == 64) ||
3405       BuiltinID == ARM::BI__ldrexd) {
3406     Function *F;
3407 
3408     switch (BuiltinID) {
3409     default: llvm_unreachable("unexpected builtin");
3410     case ARM::BI__builtin_arm_ldaex:
3411       F = CGM.getIntrinsic(Intrinsic::arm_ldaexd);
3412       break;
3413     case ARM::BI__builtin_arm_ldrexd:
3414     case ARM::BI__builtin_arm_ldrex:
3415     case ARM::BI__ldrexd:
3416       F = CGM.getIntrinsic(Intrinsic::arm_ldrexd);
3417       break;
3418     }
3419 
3420     Value *LdPtr = EmitScalarExpr(E->getArg(0));
3421     Value *Val = Builder.CreateCall(F, Builder.CreateBitCast(LdPtr, Int8PtrTy),
3422                                     "ldrexd");
3423 
3424     Value *Val0 = Builder.CreateExtractValue(Val, 1);
3425     Value *Val1 = Builder.CreateExtractValue(Val, 0);
3426     Val0 = Builder.CreateZExt(Val0, Int64Ty);
3427     Val1 = Builder.CreateZExt(Val1, Int64Ty);
3428 
3429     Value *ShiftCst = llvm::ConstantInt::get(Int64Ty, 32);
3430     Val = Builder.CreateShl(Val0, ShiftCst, "shl", true /* nuw */);
3431     Val = Builder.CreateOr(Val, Val1);
3432     return Builder.CreateBitCast(Val, ConvertType(E->getType()));
3433   }
3434 
3435   if (BuiltinID == ARM::BI__builtin_arm_ldrex ||
3436       BuiltinID == ARM::BI__builtin_arm_ldaex) {
3437     Value *LoadAddr = EmitScalarExpr(E->getArg(0));
3438 
3439     QualType Ty = E->getType();
3440     llvm::Type *RealResTy = ConvertType(Ty);
3441     llvm::Type *IntResTy = llvm::IntegerType::get(getLLVMContext(),
3442                                                   getContext().getTypeSize(Ty));
3443     LoadAddr = Builder.CreateBitCast(LoadAddr, IntResTy->getPointerTo());
3444 
3445     Function *F = CGM.getIntrinsic(BuiltinID == ARM::BI__builtin_arm_ldaex
3446                                        ? Intrinsic::arm_ldaex
3447                                        : Intrinsic::arm_ldrex,
3448                                    LoadAddr->getType());
3449     Value *Val = Builder.CreateCall(F, LoadAddr, "ldrex");
3450 
3451     if (RealResTy->isPointerTy())
3452       return Builder.CreateIntToPtr(Val, RealResTy);
3453     else {
3454       Val = Builder.CreateTruncOrBitCast(Val, IntResTy);
3455       return Builder.CreateBitCast(Val, RealResTy);
3456     }
3457   }
3458 
3459   if (BuiltinID == ARM::BI__builtin_arm_strexd ||
3460       ((BuiltinID == ARM::BI__builtin_arm_stlex ||
3461         BuiltinID == ARM::BI__builtin_arm_strex) &&
3462        getContext().getTypeSize(E->getArg(0)->getType()) == 64)) {
3463     Function *F = CGM.getIntrinsic(BuiltinID == ARM::BI__builtin_arm_stlex
3464                                        ? Intrinsic::arm_stlexd
3465                                        : Intrinsic::arm_strexd);
3466     llvm::Type *STy = llvm::StructType::get(Int32Ty, Int32Ty, nullptr);
3467 
3468     Address Tmp = CreateMemTemp(E->getArg(0)->getType());
3469     Value *Val = EmitScalarExpr(E->getArg(0));
3470     Builder.CreateStore(Val, Tmp);
3471 
3472     Address LdPtr = Builder.CreateBitCast(Tmp,llvm::PointerType::getUnqual(STy));
3473     Val = Builder.CreateLoad(LdPtr);
3474 
3475     Value *Arg0 = Builder.CreateExtractValue(Val, 0);
3476     Value *Arg1 = Builder.CreateExtractValue(Val, 1);
3477     Value *StPtr = Builder.CreateBitCast(EmitScalarExpr(E->getArg(1)), Int8PtrTy);
3478     return Builder.CreateCall(F, {Arg0, Arg1, StPtr}, "strexd");
3479   }
3480 
3481   if (BuiltinID == ARM::BI__builtin_arm_strex ||
3482       BuiltinID == ARM::BI__builtin_arm_stlex) {
3483     Value *StoreVal = EmitScalarExpr(E->getArg(0));
3484     Value *StoreAddr = EmitScalarExpr(E->getArg(1));
3485 
3486     QualType Ty = E->getArg(0)->getType();
3487     llvm::Type *StoreTy = llvm::IntegerType::get(getLLVMContext(),
3488                                                  getContext().getTypeSize(Ty));
3489     StoreAddr = Builder.CreateBitCast(StoreAddr, StoreTy->getPointerTo());
3490 
3491     if (StoreVal->getType()->isPointerTy())
3492       StoreVal = Builder.CreatePtrToInt(StoreVal, Int32Ty);
3493     else {
3494       StoreVal = Builder.CreateBitCast(StoreVal, StoreTy);
3495       StoreVal = Builder.CreateZExtOrBitCast(StoreVal, Int32Ty);
3496     }
3497 
3498     Function *F = CGM.getIntrinsic(BuiltinID == ARM::BI__builtin_arm_stlex
3499                                        ? Intrinsic::arm_stlex
3500                                        : Intrinsic::arm_strex,
3501                                    StoreAddr->getType());
3502     return Builder.CreateCall(F, {StoreVal, StoreAddr}, "strex");
3503   }
3504 
3505   if (BuiltinID == ARM::BI__builtin_arm_clrex) {
3506     Function *F = CGM.getIntrinsic(Intrinsic::arm_clrex);
3507     return Builder.CreateCall(F);
3508   }
3509 
3510   // CRC32
3511   Intrinsic::ID CRCIntrinsicID = Intrinsic::not_intrinsic;
3512   switch (BuiltinID) {
3513   case ARM::BI__builtin_arm_crc32b:
3514     CRCIntrinsicID = Intrinsic::arm_crc32b; break;
3515   case ARM::BI__builtin_arm_crc32cb:
3516     CRCIntrinsicID = Intrinsic::arm_crc32cb; break;
3517   case ARM::BI__builtin_arm_crc32h:
3518     CRCIntrinsicID = Intrinsic::arm_crc32h; break;
3519   case ARM::BI__builtin_arm_crc32ch:
3520     CRCIntrinsicID = Intrinsic::arm_crc32ch; break;
3521   case ARM::BI__builtin_arm_crc32w:
3522   case ARM::BI__builtin_arm_crc32d:
3523     CRCIntrinsicID = Intrinsic::arm_crc32w; break;
3524   case ARM::BI__builtin_arm_crc32cw:
3525   case ARM::BI__builtin_arm_crc32cd:
3526     CRCIntrinsicID = Intrinsic::arm_crc32cw; break;
3527   }
3528 
3529   if (CRCIntrinsicID != Intrinsic::not_intrinsic) {
3530     Value *Arg0 = EmitScalarExpr(E->getArg(0));
3531     Value *Arg1 = EmitScalarExpr(E->getArg(1));
3532 
3533     // crc32{c,}d intrinsics are implemnted as two calls to crc32{c,}w
3534     // intrinsics, hence we need different codegen for these cases.
3535     if (BuiltinID == ARM::BI__builtin_arm_crc32d ||
3536         BuiltinID == ARM::BI__builtin_arm_crc32cd) {
3537       Value *C1 = llvm::ConstantInt::get(Int64Ty, 32);
3538       Value *Arg1a = Builder.CreateTruncOrBitCast(Arg1, Int32Ty);
3539       Value *Arg1b = Builder.CreateLShr(Arg1, C1);
3540       Arg1b = Builder.CreateTruncOrBitCast(Arg1b, Int32Ty);
3541 
3542       Function *F = CGM.getIntrinsic(CRCIntrinsicID);
3543       Value *Res = Builder.CreateCall(F, {Arg0, Arg1a});
3544       return Builder.CreateCall(F, {Res, Arg1b});
3545     } else {
3546       Arg1 = Builder.CreateZExtOrBitCast(Arg1, Int32Ty);
3547 
3548       Function *F = CGM.getIntrinsic(CRCIntrinsicID);
3549       return Builder.CreateCall(F, {Arg0, Arg1});
3550     }
3551   }
3552 
3553   if (BuiltinID == ARM::BI__builtin_arm_rsr ||
3554       BuiltinID == ARM::BI__builtin_arm_rsr64 ||
3555       BuiltinID == ARM::BI__builtin_arm_rsrp ||
3556       BuiltinID == ARM::BI__builtin_arm_wsr ||
3557       BuiltinID == ARM::BI__builtin_arm_wsr64 ||
3558       BuiltinID == ARM::BI__builtin_arm_wsrp) {
3559 
3560     bool IsRead = BuiltinID == ARM::BI__builtin_arm_rsr ||
3561                   BuiltinID == ARM::BI__builtin_arm_rsr64 ||
3562                   BuiltinID == ARM::BI__builtin_arm_rsrp;
3563 
3564     bool IsPointerBuiltin = BuiltinID == ARM::BI__builtin_arm_rsrp ||
3565                             BuiltinID == ARM::BI__builtin_arm_wsrp;
3566 
3567     bool Is64Bit = BuiltinID == ARM::BI__builtin_arm_rsr64 ||
3568                    BuiltinID == ARM::BI__builtin_arm_wsr64;
3569 
3570     llvm::Type *ValueType;
3571     llvm::Type *RegisterType;
3572     if (IsPointerBuiltin) {
3573       ValueType = VoidPtrTy;
3574       RegisterType = Int32Ty;
3575     } else if (Is64Bit) {
3576       ValueType = RegisterType = Int64Ty;
3577     } else {
3578       ValueType = RegisterType = Int32Ty;
3579     }
3580 
3581     return EmitSpecialRegisterBuiltin(*this, E, RegisterType, ValueType, IsRead);
3582   }
3583 
3584   // Find out if any arguments are required to be integer constant
3585   // expressions.
3586   unsigned ICEArguments = 0;
3587   ASTContext::GetBuiltinTypeError Error;
3588   getContext().GetBuiltinType(BuiltinID, Error, &ICEArguments);
3589   assert(Error == ASTContext::GE_None && "Should not codegen an error");
3590 
3591   auto getAlignmentValue32 = [&](Address addr) -> Value* {
3592     return Builder.getInt32(addr.getAlignment().getQuantity());
3593   };
3594 
3595   Address PtrOp0 = Address::invalid();
3596   Address PtrOp1 = Address::invalid();
3597   SmallVector<Value*, 4> Ops;
3598   bool HasExtraArg = HasExtraNeonArgument(BuiltinID);
3599   unsigned NumArgs = E->getNumArgs() - (HasExtraArg ? 1 : 0);
3600   for (unsigned i = 0, e = NumArgs; i != e; i++) {
3601     if (i == 0) {
3602       switch (BuiltinID) {
3603       case NEON::BI__builtin_neon_vld1_v:
3604       case NEON::BI__builtin_neon_vld1q_v:
3605       case NEON::BI__builtin_neon_vld1q_lane_v:
3606       case NEON::BI__builtin_neon_vld1_lane_v:
3607       case NEON::BI__builtin_neon_vld1_dup_v:
3608       case NEON::BI__builtin_neon_vld1q_dup_v:
3609       case NEON::BI__builtin_neon_vst1_v:
3610       case NEON::BI__builtin_neon_vst1q_v:
3611       case NEON::BI__builtin_neon_vst1q_lane_v:
3612       case NEON::BI__builtin_neon_vst1_lane_v:
3613       case NEON::BI__builtin_neon_vst2_v:
3614       case NEON::BI__builtin_neon_vst2q_v:
3615       case NEON::BI__builtin_neon_vst2_lane_v:
3616       case NEON::BI__builtin_neon_vst2q_lane_v:
3617       case NEON::BI__builtin_neon_vst3_v:
3618       case NEON::BI__builtin_neon_vst3q_v:
3619       case NEON::BI__builtin_neon_vst3_lane_v:
3620       case NEON::BI__builtin_neon_vst3q_lane_v:
3621       case NEON::BI__builtin_neon_vst4_v:
3622       case NEON::BI__builtin_neon_vst4q_v:
3623       case NEON::BI__builtin_neon_vst4_lane_v:
3624       case NEON::BI__builtin_neon_vst4q_lane_v:
3625         // Get the alignment for the argument in addition to the value;
3626         // we'll use it later.
3627         PtrOp0 = EmitPointerWithAlignment(E->getArg(0));
3628         Ops.push_back(PtrOp0.getPointer());
3629         continue;
3630       }
3631     }
3632     if (i == 1) {
3633       switch (BuiltinID) {
3634       case NEON::BI__builtin_neon_vld2_v:
3635       case NEON::BI__builtin_neon_vld2q_v:
3636       case NEON::BI__builtin_neon_vld3_v:
3637       case NEON::BI__builtin_neon_vld3q_v:
3638       case NEON::BI__builtin_neon_vld4_v:
3639       case NEON::BI__builtin_neon_vld4q_v:
3640       case NEON::BI__builtin_neon_vld2_lane_v:
3641       case NEON::BI__builtin_neon_vld2q_lane_v:
3642       case NEON::BI__builtin_neon_vld3_lane_v:
3643       case NEON::BI__builtin_neon_vld3q_lane_v:
3644       case NEON::BI__builtin_neon_vld4_lane_v:
3645       case NEON::BI__builtin_neon_vld4q_lane_v:
3646       case NEON::BI__builtin_neon_vld2_dup_v:
3647       case NEON::BI__builtin_neon_vld3_dup_v:
3648       case NEON::BI__builtin_neon_vld4_dup_v:
3649         // Get the alignment for the argument in addition to the value;
3650         // we'll use it later.
3651         PtrOp1 = EmitPointerWithAlignment(E->getArg(1));
3652         Ops.push_back(PtrOp1.getPointer());
3653         continue;
3654       }
3655     }
3656 
3657     if ((ICEArguments & (1 << i)) == 0) {
3658       Ops.push_back(EmitScalarExpr(E->getArg(i)));
3659     } else {
3660       // If this is required to be a constant, constant fold it so that we know
3661       // that the generated intrinsic gets a ConstantInt.
3662       llvm::APSInt Result;
3663       bool IsConst = E->getArg(i)->isIntegerConstantExpr(Result, getContext());
3664       assert(IsConst && "Constant arg isn't actually constant?"); (void)IsConst;
3665       Ops.push_back(llvm::ConstantInt::get(getLLVMContext(), Result));
3666     }
3667   }
3668 
3669   switch (BuiltinID) {
3670   default: break;
3671 
3672   case NEON::BI__builtin_neon_vget_lane_i8:
3673   case NEON::BI__builtin_neon_vget_lane_i16:
3674   case NEON::BI__builtin_neon_vget_lane_i32:
3675   case NEON::BI__builtin_neon_vget_lane_i64:
3676   case NEON::BI__builtin_neon_vget_lane_f32:
3677   case NEON::BI__builtin_neon_vgetq_lane_i8:
3678   case NEON::BI__builtin_neon_vgetq_lane_i16:
3679   case NEON::BI__builtin_neon_vgetq_lane_i32:
3680   case NEON::BI__builtin_neon_vgetq_lane_i64:
3681   case NEON::BI__builtin_neon_vgetq_lane_f32:
3682     return Builder.CreateExtractElement(Ops[0], Ops[1], "vget_lane");
3683 
3684   case NEON::BI__builtin_neon_vset_lane_i8:
3685   case NEON::BI__builtin_neon_vset_lane_i16:
3686   case NEON::BI__builtin_neon_vset_lane_i32:
3687   case NEON::BI__builtin_neon_vset_lane_i64:
3688   case NEON::BI__builtin_neon_vset_lane_f32:
3689   case NEON::BI__builtin_neon_vsetq_lane_i8:
3690   case NEON::BI__builtin_neon_vsetq_lane_i16:
3691   case NEON::BI__builtin_neon_vsetq_lane_i32:
3692   case NEON::BI__builtin_neon_vsetq_lane_i64:
3693   case NEON::BI__builtin_neon_vsetq_lane_f32:
3694     return Builder.CreateInsertElement(Ops[1], Ops[0], Ops[2], "vset_lane");
3695 
3696   case NEON::BI__builtin_neon_vsha1h_u32:
3697     return EmitNeonCall(CGM.getIntrinsic(Intrinsic::arm_neon_sha1h), Ops,
3698                         "vsha1h");
3699   case NEON::BI__builtin_neon_vsha1cq_u32:
3700     return EmitNeonCall(CGM.getIntrinsic(Intrinsic::arm_neon_sha1c), Ops,
3701                         "vsha1h");
3702   case NEON::BI__builtin_neon_vsha1pq_u32:
3703     return EmitNeonCall(CGM.getIntrinsic(Intrinsic::arm_neon_sha1p), Ops,
3704                         "vsha1h");
3705   case NEON::BI__builtin_neon_vsha1mq_u32:
3706     return EmitNeonCall(CGM.getIntrinsic(Intrinsic::arm_neon_sha1m), Ops,
3707                         "vsha1h");
3708 
3709   // The ARM _MoveToCoprocessor builtins put the input register value as
3710   // the first argument, but the LLVM intrinsic expects it as the third one.
3711   case ARM::BI_MoveToCoprocessor:
3712   case ARM::BI_MoveToCoprocessor2: {
3713     Function *F = CGM.getIntrinsic(BuiltinID == ARM::BI_MoveToCoprocessor ?
3714                                    Intrinsic::arm_mcr : Intrinsic::arm_mcr2);
3715     return Builder.CreateCall(F, {Ops[1], Ops[2], Ops[0],
3716                                   Ops[3], Ops[4], Ops[5]});
3717   }
3718   }
3719 
3720   // Get the last argument, which specifies the vector type.
3721   assert(HasExtraArg);
3722   llvm::APSInt Result;
3723   const Expr *Arg = E->getArg(E->getNumArgs()-1);
3724   if (!Arg->isIntegerConstantExpr(Result, getContext()))
3725     return nullptr;
3726 
3727   if (BuiltinID == ARM::BI__builtin_arm_vcvtr_f ||
3728       BuiltinID == ARM::BI__builtin_arm_vcvtr_d) {
3729     // Determine the overloaded type of this builtin.
3730     llvm::Type *Ty;
3731     if (BuiltinID == ARM::BI__builtin_arm_vcvtr_f)
3732       Ty = FloatTy;
3733     else
3734       Ty = DoubleTy;
3735 
3736     // Determine whether this is an unsigned conversion or not.
3737     bool usgn = Result.getZExtValue() == 1;
3738     unsigned Int = usgn ? Intrinsic::arm_vcvtru : Intrinsic::arm_vcvtr;
3739 
3740     // Call the appropriate intrinsic.
3741     Function *F = CGM.getIntrinsic(Int, Ty);
3742     return Builder.CreateCall(F, Ops, "vcvtr");
3743   }
3744 
3745   // Determine the type of this overloaded NEON intrinsic.
3746   NeonTypeFlags Type(Result.getZExtValue());
3747   bool usgn = Type.isUnsigned();
3748   bool rightShift = false;
3749 
3750   llvm::VectorType *VTy = GetNeonType(this, Type);
3751   llvm::Type *Ty = VTy;
3752   if (!Ty)
3753     return nullptr;
3754 
3755   // Many NEON builtins have identical semantics and uses in ARM and
3756   // AArch64. Emit these in a single function.
3757   auto IntrinsicMap = makeArrayRef(ARMSIMDIntrinsicMap);
3758   const NeonIntrinsicInfo *Builtin = findNeonIntrinsicInMap(
3759       IntrinsicMap, BuiltinID, NEONSIMDIntrinsicsProvenSorted);
3760   if (Builtin)
3761     return EmitCommonNeonBuiltinExpr(
3762         Builtin->BuiltinID, Builtin->LLVMIntrinsic, Builtin->AltLLVMIntrinsic,
3763         Builtin->NameHint, Builtin->TypeModifier, E, Ops, PtrOp0, PtrOp1);
3764 
3765   unsigned Int;
3766   switch (BuiltinID) {
3767   default: return nullptr;
3768   case NEON::BI__builtin_neon_vld1q_lane_v:
3769     // Handle 64-bit integer elements as a special case.  Use shuffles of
3770     // one-element vectors to avoid poor code for i64 in the backend.
3771     if (VTy->getElementType()->isIntegerTy(64)) {
3772       // Extract the other lane.
3773       Ops[1] = Builder.CreateBitCast(Ops[1], Ty);
3774       uint32_t Lane = cast<ConstantInt>(Ops[2])->getZExtValue();
3775       Value *SV = llvm::ConstantVector::get(ConstantInt::get(Int32Ty, 1-Lane));
3776       Ops[1] = Builder.CreateShuffleVector(Ops[1], Ops[1], SV);
3777       // Load the value as a one-element vector.
3778       Ty = llvm::VectorType::get(VTy->getElementType(), 1);
3779       Function *F = CGM.getIntrinsic(Intrinsic::arm_neon_vld1, Ty);
3780       Value *Align = getAlignmentValue32(PtrOp0);
3781       Value *Ld = Builder.CreateCall(F, {Ops[0], Align});
3782       // Combine them.
3783       uint32_t Indices[] = {1 - Lane, Lane};
3784       SV = llvm::ConstantDataVector::get(getLLVMContext(), Indices);
3785       return Builder.CreateShuffleVector(Ops[1], Ld, SV, "vld1q_lane");
3786     }
3787     // fall through
3788   case NEON::BI__builtin_neon_vld1_lane_v: {
3789     Ops[1] = Builder.CreateBitCast(Ops[1], Ty);
3790     PtrOp0 = Builder.CreateElementBitCast(PtrOp0, VTy->getElementType());
3791     Value *Ld = Builder.CreateLoad(PtrOp0);
3792     return Builder.CreateInsertElement(Ops[1], Ld, Ops[2], "vld1_lane");
3793   }
3794   case NEON::BI__builtin_neon_vld2_dup_v:
3795   case NEON::BI__builtin_neon_vld3_dup_v:
3796   case NEON::BI__builtin_neon_vld4_dup_v: {
3797     // Handle 64-bit elements as a special-case.  There is no "dup" needed.
3798     if (VTy->getElementType()->getPrimitiveSizeInBits() == 64) {
3799       switch (BuiltinID) {
3800       case NEON::BI__builtin_neon_vld2_dup_v:
3801         Int = Intrinsic::arm_neon_vld2;
3802         break;
3803       case NEON::BI__builtin_neon_vld3_dup_v:
3804         Int = Intrinsic::arm_neon_vld3;
3805         break;
3806       case NEON::BI__builtin_neon_vld4_dup_v:
3807         Int = Intrinsic::arm_neon_vld4;
3808         break;
3809       default: llvm_unreachable("unknown vld_dup intrinsic?");
3810       }
3811       Function *F = CGM.getIntrinsic(Int, Ty);
3812       llvm::Value *Align = getAlignmentValue32(PtrOp1);
3813       Ops[1] = Builder.CreateCall(F, {Ops[1], Align}, "vld_dup");
3814       Ty = llvm::PointerType::getUnqual(Ops[1]->getType());
3815       Ops[0] = Builder.CreateBitCast(Ops[0], Ty);
3816       return Builder.CreateDefaultAlignedStore(Ops[1], Ops[0]);
3817     }
3818     switch (BuiltinID) {
3819     case NEON::BI__builtin_neon_vld2_dup_v:
3820       Int = Intrinsic::arm_neon_vld2lane;
3821       break;
3822     case NEON::BI__builtin_neon_vld3_dup_v:
3823       Int = Intrinsic::arm_neon_vld3lane;
3824       break;
3825     case NEON::BI__builtin_neon_vld4_dup_v:
3826       Int = Intrinsic::arm_neon_vld4lane;
3827       break;
3828     default: llvm_unreachable("unknown vld_dup intrinsic?");
3829     }
3830     Function *F = CGM.getIntrinsic(Int, Ty);
3831     llvm::StructType *STy = cast<llvm::StructType>(F->getReturnType());
3832 
3833     SmallVector<Value*, 6> Args;
3834     Args.push_back(Ops[1]);
3835     Args.append(STy->getNumElements(), UndefValue::get(Ty));
3836 
3837     llvm::Constant *CI = ConstantInt::get(Int32Ty, 0);
3838     Args.push_back(CI);
3839     Args.push_back(getAlignmentValue32(PtrOp1));
3840 
3841     Ops[1] = Builder.CreateCall(F, Args, "vld_dup");
3842     // splat lane 0 to all elts in each vector of the result.
3843     for (unsigned i = 0, e = STy->getNumElements(); i != e; ++i) {
3844       Value *Val = Builder.CreateExtractValue(Ops[1], i);
3845       Value *Elt = Builder.CreateBitCast(Val, Ty);
3846       Elt = EmitNeonSplat(Elt, CI);
3847       Elt = Builder.CreateBitCast(Elt, Val->getType());
3848       Ops[1] = Builder.CreateInsertValue(Ops[1], Elt, i);
3849     }
3850     Ty = llvm::PointerType::getUnqual(Ops[1]->getType());
3851     Ops[0] = Builder.CreateBitCast(Ops[0], Ty);
3852     return Builder.CreateDefaultAlignedStore(Ops[1], Ops[0]);
3853   }
3854   case NEON::BI__builtin_neon_vqrshrn_n_v:
3855     Int =
3856       usgn ? Intrinsic::arm_neon_vqrshiftnu : Intrinsic::arm_neon_vqrshiftns;
3857     return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vqrshrn_n",
3858                         1, true);
3859   case NEON::BI__builtin_neon_vqrshrun_n_v:
3860     return EmitNeonCall(CGM.getIntrinsic(Intrinsic::arm_neon_vqrshiftnsu, Ty),
3861                         Ops, "vqrshrun_n", 1, true);
3862   case NEON::BI__builtin_neon_vqshrn_n_v:
3863     Int = usgn ? Intrinsic::arm_neon_vqshiftnu : Intrinsic::arm_neon_vqshiftns;
3864     return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vqshrn_n",
3865                         1, true);
3866   case NEON::BI__builtin_neon_vqshrun_n_v:
3867     return EmitNeonCall(CGM.getIntrinsic(Intrinsic::arm_neon_vqshiftnsu, Ty),
3868                         Ops, "vqshrun_n", 1, true);
3869   case NEON::BI__builtin_neon_vrecpe_v:
3870   case NEON::BI__builtin_neon_vrecpeq_v:
3871     return EmitNeonCall(CGM.getIntrinsic(Intrinsic::arm_neon_vrecpe, Ty),
3872                         Ops, "vrecpe");
3873   case NEON::BI__builtin_neon_vrshrn_n_v:
3874     return EmitNeonCall(CGM.getIntrinsic(Intrinsic::arm_neon_vrshiftn, Ty),
3875                         Ops, "vrshrn_n", 1, true);
3876   case NEON::BI__builtin_neon_vrsra_n_v:
3877   case NEON::BI__builtin_neon_vrsraq_n_v:
3878     Ops[0] = Builder.CreateBitCast(Ops[0], Ty);
3879     Ops[1] = Builder.CreateBitCast(Ops[1], Ty);
3880     Ops[2] = EmitNeonShiftVector(Ops[2], Ty, true);
3881     Int = usgn ? Intrinsic::arm_neon_vrshiftu : Intrinsic::arm_neon_vrshifts;
3882     Ops[1] = Builder.CreateCall(CGM.getIntrinsic(Int, Ty), {Ops[1], Ops[2]});
3883     return Builder.CreateAdd(Ops[0], Ops[1], "vrsra_n");
3884   case NEON::BI__builtin_neon_vsri_n_v:
3885   case NEON::BI__builtin_neon_vsriq_n_v:
3886     rightShift = true;
3887   case NEON::BI__builtin_neon_vsli_n_v:
3888   case NEON::BI__builtin_neon_vsliq_n_v:
3889     Ops[2] = EmitNeonShiftVector(Ops[2], Ty, rightShift);
3890     return EmitNeonCall(CGM.getIntrinsic(Intrinsic::arm_neon_vshiftins, Ty),
3891                         Ops, "vsli_n");
3892   case NEON::BI__builtin_neon_vsra_n_v:
3893   case NEON::BI__builtin_neon_vsraq_n_v:
3894     Ops[0] = Builder.CreateBitCast(Ops[0], Ty);
3895     Ops[1] = EmitNeonRShiftImm(Ops[1], Ops[2], Ty, usgn, "vsra_n");
3896     return Builder.CreateAdd(Ops[0], Ops[1]);
3897   case NEON::BI__builtin_neon_vst1q_lane_v:
3898     // Handle 64-bit integer elements as a special case.  Use a shuffle to get
3899     // a one-element vector and avoid poor code for i64 in the backend.
3900     if (VTy->getElementType()->isIntegerTy(64)) {
3901       Ops[1] = Builder.CreateBitCast(Ops[1], Ty);
3902       Value *SV = llvm::ConstantVector::get(cast<llvm::Constant>(Ops[2]));
3903       Ops[1] = Builder.CreateShuffleVector(Ops[1], Ops[1], SV);
3904       Ops[2] = getAlignmentValue32(PtrOp0);
3905       return Builder.CreateCall(CGM.getIntrinsic(Intrinsic::arm_neon_vst1,
3906                                                  Ops[1]->getType()), Ops);
3907     }
3908     // fall through
3909   case NEON::BI__builtin_neon_vst1_lane_v: {
3910     Ops[1] = Builder.CreateBitCast(Ops[1], Ty);
3911     Ops[1] = Builder.CreateExtractElement(Ops[1], Ops[2]);
3912     Ty = llvm::PointerType::getUnqual(Ops[1]->getType());
3913     auto St = Builder.CreateStore(Ops[1], Builder.CreateBitCast(PtrOp0, Ty));
3914     return St;
3915   }
3916   case NEON::BI__builtin_neon_vtbl1_v:
3917     return EmitNeonCall(CGM.getIntrinsic(Intrinsic::arm_neon_vtbl1),
3918                         Ops, "vtbl1");
3919   case NEON::BI__builtin_neon_vtbl2_v:
3920     return EmitNeonCall(CGM.getIntrinsic(Intrinsic::arm_neon_vtbl2),
3921                         Ops, "vtbl2");
3922   case NEON::BI__builtin_neon_vtbl3_v:
3923     return EmitNeonCall(CGM.getIntrinsic(Intrinsic::arm_neon_vtbl3),
3924                         Ops, "vtbl3");
3925   case NEON::BI__builtin_neon_vtbl4_v:
3926     return EmitNeonCall(CGM.getIntrinsic(Intrinsic::arm_neon_vtbl4),
3927                         Ops, "vtbl4");
3928   case NEON::BI__builtin_neon_vtbx1_v:
3929     return EmitNeonCall(CGM.getIntrinsic(Intrinsic::arm_neon_vtbx1),
3930                         Ops, "vtbx1");
3931   case NEON::BI__builtin_neon_vtbx2_v:
3932     return EmitNeonCall(CGM.getIntrinsic(Intrinsic::arm_neon_vtbx2),
3933                         Ops, "vtbx2");
3934   case NEON::BI__builtin_neon_vtbx3_v:
3935     return EmitNeonCall(CGM.getIntrinsic(Intrinsic::arm_neon_vtbx3),
3936                         Ops, "vtbx3");
3937   case NEON::BI__builtin_neon_vtbx4_v:
3938     return EmitNeonCall(CGM.getIntrinsic(Intrinsic::arm_neon_vtbx4),
3939                         Ops, "vtbx4");
3940   }
3941 }
3942 
3943 static Value *EmitAArch64TblBuiltinExpr(CodeGenFunction &CGF, unsigned BuiltinID,
3944                                       const CallExpr *E,
3945                                       SmallVectorImpl<Value *> &Ops) {
3946   unsigned int Int = 0;
3947   const char *s = nullptr;
3948 
3949   switch (BuiltinID) {
3950   default:
3951     return nullptr;
3952   case NEON::BI__builtin_neon_vtbl1_v:
3953   case NEON::BI__builtin_neon_vqtbl1_v:
3954   case NEON::BI__builtin_neon_vqtbl1q_v:
3955   case NEON::BI__builtin_neon_vtbl2_v:
3956   case NEON::BI__builtin_neon_vqtbl2_v:
3957   case NEON::BI__builtin_neon_vqtbl2q_v:
3958   case NEON::BI__builtin_neon_vtbl3_v:
3959   case NEON::BI__builtin_neon_vqtbl3_v:
3960   case NEON::BI__builtin_neon_vqtbl3q_v:
3961   case NEON::BI__builtin_neon_vtbl4_v:
3962   case NEON::BI__builtin_neon_vqtbl4_v:
3963   case NEON::BI__builtin_neon_vqtbl4q_v:
3964     break;
3965   case NEON::BI__builtin_neon_vtbx1_v:
3966   case NEON::BI__builtin_neon_vqtbx1_v:
3967   case NEON::BI__builtin_neon_vqtbx1q_v:
3968   case NEON::BI__builtin_neon_vtbx2_v:
3969   case NEON::BI__builtin_neon_vqtbx2_v:
3970   case NEON::BI__builtin_neon_vqtbx2q_v:
3971   case NEON::BI__builtin_neon_vtbx3_v:
3972   case NEON::BI__builtin_neon_vqtbx3_v:
3973   case NEON::BI__builtin_neon_vqtbx3q_v:
3974   case NEON::BI__builtin_neon_vtbx4_v:
3975   case NEON::BI__builtin_neon_vqtbx4_v:
3976   case NEON::BI__builtin_neon_vqtbx4q_v:
3977     break;
3978   }
3979 
3980   assert(E->getNumArgs() >= 3);
3981 
3982   // Get the last argument, which specifies the vector type.
3983   llvm::APSInt Result;
3984   const Expr *Arg = E->getArg(E->getNumArgs() - 1);
3985   if (!Arg->isIntegerConstantExpr(Result, CGF.getContext()))
3986     return nullptr;
3987 
3988   // Determine the type of this overloaded NEON intrinsic.
3989   NeonTypeFlags Type(Result.getZExtValue());
3990   llvm::VectorType *Ty = GetNeonType(&CGF, Type);
3991   if (!Ty)
3992     return nullptr;
3993 
3994   CodeGen::CGBuilderTy &Builder = CGF.Builder;
3995 
3996   // AArch64 scalar builtins are not overloaded, they do not have an extra
3997   // argument that specifies the vector type, need to handle each case.
3998   switch (BuiltinID) {
3999   case NEON::BI__builtin_neon_vtbl1_v: {
4000     return packTBLDVectorList(CGF, makeArrayRef(Ops).slice(0, 1), nullptr,
4001                               Ops[1], Ty, Intrinsic::aarch64_neon_tbl1,
4002                               "vtbl1");
4003   }
4004   case NEON::BI__builtin_neon_vtbl2_v: {
4005     return packTBLDVectorList(CGF, makeArrayRef(Ops).slice(0, 2), nullptr,
4006                               Ops[2], Ty, Intrinsic::aarch64_neon_tbl1,
4007                               "vtbl1");
4008   }
4009   case NEON::BI__builtin_neon_vtbl3_v: {
4010     return packTBLDVectorList(CGF, makeArrayRef(Ops).slice(0, 3), nullptr,
4011                               Ops[3], Ty, Intrinsic::aarch64_neon_tbl2,
4012                               "vtbl2");
4013   }
4014   case NEON::BI__builtin_neon_vtbl4_v: {
4015     return packTBLDVectorList(CGF, makeArrayRef(Ops).slice(0, 4), nullptr,
4016                               Ops[4], Ty, Intrinsic::aarch64_neon_tbl2,
4017                               "vtbl2");
4018   }
4019   case NEON::BI__builtin_neon_vtbx1_v: {
4020     Value *TblRes =
4021         packTBLDVectorList(CGF, makeArrayRef(Ops).slice(1, 1), nullptr, Ops[2],
4022                            Ty, Intrinsic::aarch64_neon_tbl1, "vtbl1");
4023 
4024     llvm::Constant *EightV = ConstantInt::get(Ty, 8);
4025     Value *CmpRes = Builder.CreateICmp(ICmpInst::ICMP_UGE, Ops[2], EightV);
4026     CmpRes = Builder.CreateSExt(CmpRes, Ty);
4027 
4028     Value *EltsFromInput = Builder.CreateAnd(CmpRes, Ops[0]);
4029     Value *EltsFromTbl = Builder.CreateAnd(Builder.CreateNot(CmpRes), TblRes);
4030     return Builder.CreateOr(EltsFromInput, EltsFromTbl, "vtbx");
4031   }
4032   case NEON::BI__builtin_neon_vtbx2_v: {
4033     return packTBLDVectorList(CGF, makeArrayRef(Ops).slice(1, 2), Ops[0],
4034                               Ops[3], Ty, Intrinsic::aarch64_neon_tbx1,
4035                               "vtbx1");
4036   }
4037   case NEON::BI__builtin_neon_vtbx3_v: {
4038     Value *TblRes =
4039         packTBLDVectorList(CGF, makeArrayRef(Ops).slice(1, 3), nullptr, Ops[4],
4040                            Ty, Intrinsic::aarch64_neon_tbl2, "vtbl2");
4041 
4042     llvm::Constant *TwentyFourV = ConstantInt::get(Ty, 24);
4043     Value *CmpRes = Builder.CreateICmp(ICmpInst::ICMP_UGE, Ops[4],
4044                                            TwentyFourV);
4045     CmpRes = Builder.CreateSExt(CmpRes, Ty);
4046 
4047     Value *EltsFromInput = Builder.CreateAnd(CmpRes, Ops[0]);
4048     Value *EltsFromTbl = Builder.CreateAnd(Builder.CreateNot(CmpRes), TblRes);
4049     return Builder.CreateOr(EltsFromInput, EltsFromTbl, "vtbx");
4050   }
4051   case NEON::BI__builtin_neon_vtbx4_v: {
4052     return packTBLDVectorList(CGF, makeArrayRef(Ops).slice(1, 4), Ops[0],
4053                               Ops[5], Ty, Intrinsic::aarch64_neon_tbx2,
4054                               "vtbx2");
4055   }
4056   case NEON::BI__builtin_neon_vqtbl1_v:
4057   case NEON::BI__builtin_neon_vqtbl1q_v:
4058     Int = Intrinsic::aarch64_neon_tbl1; s = "vtbl1"; break;
4059   case NEON::BI__builtin_neon_vqtbl2_v:
4060   case NEON::BI__builtin_neon_vqtbl2q_v: {
4061     Int = Intrinsic::aarch64_neon_tbl2; s = "vtbl2"; break;
4062   case NEON::BI__builtin_neon_vqtbl3_v:
4063   case NEON::BI__builtin_neon_vqtbl3q_v:
4064     Int = Intrinsic::aarch64_neon_tbl3; s = "vtbl3"; break;
4065   case NEON::BI__builtin_neon_vqtbl4_v:
4066   case NEON::BI__builtin_neon_vqtbl4q_v:
4067     Int = Intrinsic::aarch64_neon_tbl4; s = "vtbl4"; break;
4068   case NEON::BI__builtin_neon_vqtbx1_v:
4069   case NEON::BI__builtin_neon_vqtbx1q_v:
4070     Int = Intrinsic::aarch64_neon_tbx1; s = "vtbx1"; break;
4071   case NEON::BI__builtin_neon_vqtbx2_v:
4072   case NEON::BI__builtin_neon_vqtbx2q_v:
4073     Int = Intrinsic::aarch64_neon_tbx2; s = "vtbx2"; break;
4074   case NEON::BI__builtin_neon_vqtbx3_v:
4075   case NEON::BI__builtin_neon_vqtbx3q_v:
4076     Int = Intrinsic::aarch64_neon_tbx3; s = "vtbx3"; break;
4077   case NEON::BI__builtin_neon_vqtbx4_v:
4078   case NEON::BI__builtin_neon_vqtbx4q_v:
4079     Int = Intrinsic::aarch64_neon_tbx4; s = "vtbx4"; break;
4080   }
4081   }
4082 
4083   if (!Int)
4084     return nullptr;
4085 
4086   Function *F = CGF.CGM.getIntrinsic(Int, Ty);
4087   return CGF.EmitNeonCall(F, Ops, s);
4088 }
4089 
4090 Value *CodeGenFunction::vectorWrapScalar16(Value *Op) {
4091   llvm::Type *VTy = llvm::VectorType::get(Int16Ty, 4);
4092   Op = Builder.CreateBitCast(Op, Int16Ty);
4093   Value *V = UndefValue::get(VTy);
4094   llvm::Constant *CI = ConstantInt::get(SizeTy, 0);
4095   Op = Builder.CreateInsertElement(V, Op, CI);
4096   return Op;
4097 }
4098 
4099 Value *CodeGenFunction::vectorWrapScalar8(Value *Op) {
4100   llvm::Type *VTy = llvm::VectorType::get(Int8Ty, 8);
4101   Op = Builder.CreateBitCast(Op, Int8Ty);
4102   Value *V = UndefValue::get(VTy);
4103   llvm::Constant *CI = ConstantInt::get(SizeTy, 0);
4104   Op = Builder.CreateInsertElement(V, Op, CI);
4105   return Op;
4106 }
4107 
4108 Value *CodeGenFunction::EmitAArch64BuiltinExpr(unsigned BuiltinID,
4109                                                const CallExpr *E) {
4110   unsigned HintID = static_cast<unsigned>(-1);
4111   switch (BuiltinID) {
4112   default: break;
4113   case AArch64::BI__builtin_arm_nop:
4114     HintID = 0;
4115     break;
4116   case AArch64::BI__builtin_arm_yield:
4117     HintID = 1;
4118     break;
4119   case AArch64::BI__builtin_arm_wfe:
4120     HintID = 2;
4121     break;
4122   case AArch64::BI__builtin_arm_wfi:
4123     HintID = 3;
4124     break;
4125   case AArch64::BI__builtin_arm_sev:
4126     HintID = 4;
4127     break;
4128   case AArch64::BI__builtin_arm_sevl:
4129     HintID = 5;
4130     break;
4131   }
4132 
4133   if (HintID != static_cast<unsigned>(-1)) {
4134     Function *F = CGM.getIntrinsic(Intrinsic::aarch64_hint);
4135     return Builder.CreateCall(F, llvm::ConstantInt::get(Int32Ty, HintID));
4136   }
4137 
4138   if (BuiltinID == AArch64::BI__builtin_arm_prefetch) {
4139     Value *Address         = EmitScalarExpr(E->getArg(0));
4140     Value *RW              = EmitScalarExpr(E->getArg(1));
4141     Value *CacheLevel      = EmitScalarExpr(E->getArg(2));
4142     Value *RetentionPolicy = EmitScalarExpr(E->getArg(3));
4143     Value *IsData          = EmitScalarExpr(E->getArg(4));
4144 
4145     Value *Locality = nullptr;
4146     if (cast<llvm::ConstantInt>(RetentionPolicy)->isZero()) {
4147       // Temporal fetch, needs to convert cache level to locality.
4148       Locality = llvm::ConstantInt::get(Int32Ty,
4149         -cast<llvm::ConstantInt>(CacheLevel)->getValue() + 3);
4150     } else {
4151       // Streaming fetch.
4152       Locality = llvm::ConstantInt::get(Int32Ty, 0);
4153     }
4154 
4155     // FIXME: We need AArch64 specific LLVM intrinsic if we want to specify
4156     // PLDL3STRM or PLDL2STRM.
4157     Value *F = CGM.getIntrinsic(Intrinsic::prefetch);
4158     return Builder.CreateCall(F, {Address, RW, Locality, IsData});
4159   }
4160 
4161   if (BuiltinID == AArch64::BI__builtin_arm_rbit) {
4162     assert((getContext().getTypeSize(E->getType()) == 32) &&
4163            "rbit of unusual size!");
4164     llvm::Value *Arg = EmitScalarExpr(E->getArg(0));
4165     return Builder.CreateCall(
4166         CGM.getIntrinsic(Intrinsic::aarch64_rbit, Arg->getType()), Arg, "rbit");
4167   }
4168   if (BuiltinID == AArch64::BI__builtin_arm_rbit64) {
4169     assert((getContext().getTypeSize(E->getType()) == 64) &&
4170            "rbit of unusual size!");
4171     llvm::Value *Arg = EmitScalarExpr(E->getArg(0));
4172     return Builder.CreateCall(
4173         CGM.getIntrinsic(Intrinsic::aarch64_rbit, Arg->getType()), Arg, "rbit");
4174   }
4175 
4176   if (BuiltinID == AArch64::BI__clear_cache) {
4177     assert(E->getNumArgs() == 2 && "__clear_cache takes 2 arguments");
4178     const FunctionDecl *FD = E->getDirectCallee();
4179     Value *Ops[2];
4180     for (unsigned i = 0; i < 2; i++)
4181       Ops[i] = EmitScalarExpr(E->getArg(i));
4182     llvm::Type *Ty = CGM.getTypes().ConvertType(FD->getType());
4183     llvm::FunctionType *FTy = cast<llvm::FunctionType>(Ty);
4184     StringRef Name = FD->getName();
4185     return EmitNounwindRuntimeCall(CGM.CreateRuntimeFunction(FTy, Name), Ops);
4186   }
4187 
4188   if ((BuiltinID == AArch64::BI__builtin_arm_ldrex ||
4189       BuiltinID == AArch64::BI__builtin_arm_ldaex) &&
4190       getContext().getTypeSize(E->getType()) == 128) {
4191     Function *F = CGM.getIntrinsic(BuiltinID == AArch64::BI__builtin_arm_ldaex
4192                                        ? Intrinsic::aarch64_ldaxp
4193                                        : Intrinsic::aarch64_ldxp);
4194 
4195     Value *LdPtr = EmitScalarExpr(E->getArg(0));
4196     Value *Val = Builder.CreateCall(F, Builder.CreateBitCast(LdPtr, Int8PtrTy),
4197                                     "ldxp");
4198 
4199     Value *Val0 = Builder.CreateExtractValue(Val, 1);
4200     Value *Val1 = Builder.CreateExtractValue(Val, 0);
4201     llvm::Type *Int128Ty = llvm::IntegerType::get(getLLVMContext(), 128);
4202     Val0 = Builder.CreateZExt(Val0, Int128Ty);
4203     Val1 = Builder.CreateZExt(Val1, Int128Ty);
4204 
4205     Value *ShiftCst = llvm::ConstantInt::get(Int128Ty, 64);
4206     Val = Builder.CreateShl(Val0, ShiftCst, "shl", true /* nuw */);
4207     Val = Builder.CreateOr(Val, Val1);
4208     return Builder.CreateBitCast(Val, ConvertType(E->getType()));
4209   } else if (BuiltinID == AArch64::BI__builtin_arm_ldrex ||
4210              BuiltinID == AArch64::BI__builtin_arm_ldaex) {
4211     Value *LoadAddr = EmitScalarExpr(E->getArg(0));
4212 
4213     QualType Ty = E->getType();
4214     llvm::Type *RealResTy = ConvertType(Ty);
4215     llvm::Type *IntResTy = llvm::IntegerType::get(getLLVMContext(),
4216                                                   getContext().getTypeSize(Ty));
4217     LoadAddr = Builder.CreateBitCast(LoadAddr, IntResTy->getPointerTo());
4218 
4219     Function *F = CGM.getIntrinsic(BuiltinID == AArch64::BI__builtin_arm_ldaex
4220                                        ? Intrinsic::aarch64_ldaxr
4221                                        : Intrinsic::aarch64_ldxr,
4222                                    LoadAddr->getType());
4223     Value *Val = Builder.CreateCall(F, LoadAddr, "ldxr");
4224 
4225     if (RealResTy->isPointerTy())
4226       return Builder.CreateIntToPtr(Val, RealResTy);
4227 
4228     Val = Builder.CreateTruncOrBitCast(Val, IntResTy);
4229     return Builder.CreateBitCast(Val, RealResTy);
4230   }
4231 
4232   if ((BuiltinID == AArch64::BI__builtin_arm_strex ||
4233        BuiltinID == AArch64::BI__builtin_arm_stlex) &&
4234       getContext().getTypeSize(E->getArg(0)->getType()) == 128) {
4235     Function *F = CGM.getIntrinsic(BuiltinID == AArch64::BI__builtin_arm_stlex
4236                                        ? Intrinsic::aarch64_stlxp
4237                                        : Intrinsic::aarch64_stxp);
4238     llvm::Type *STy = llvm::StructType::get(Int64Ty, Int64Ty, nullptr);
4239 
4240     Address Tmp = CreateMemTemp(E->getArg(0)->getType());
4241     EmitAnyExprToMem(E->getArg(0), Tmp, Qualifiers(), /*init*/ true);
4242 
4243     Tmp = Builder.CreateBitCast(Tmp, llvm::PointerType::getUnqual(STy));
4244     llvm::Value *Val = Builder.CreateLoad(Tmp);
4245 
4246     Value *Arg0 = Builder.CreateExtractValue(Val, 0);
4247     Value *Arg1 = Builder.CreateExtractValue(Val, 1);
4248     Value *StPtr = Builder.CreateBitCast(EmitScalarExpr(E->getArg(1)),
4249                                          Int8PtrTy);
4250     return Builder.CreateCall(F, {Arg0, Arg1, StPtr}, "stxp");
4251   }
4252 
4253   if (BuiltinID == AArch64::BI__builtin_arm_strex ||
4254       BuiltinID == AArch64::BI__builtin_arm_stlex) {
4255     Value *StoreVal = EmitScalarExpr(E->getArg(0));
4256     Value *StoreAddr = EmitScalarExpr(E->getArg(1));
4257 
4258     QualType Ty = E->getArg(0)->getType();
4259     llvm::Type *StoreTy = llvm::IntegerType::get(getLLVMContext(),
4260                                                  getContext().getTypeSize(Ty));
4261     StoreAddr = Builder.CreateBitCast(StoreAddr, StoreTy->getPointerTo());
4262 
4263     if (StoreVal->getType()->isPointerTy())
4264       StoreVal = Builder.CreatePtrToInt(StoreVal, Int64Ty);
4265     else {
4266       StoreVal = Builder.CreateBitCast(StoreVal, StoreTy);
4267       StoreVal = Builder.CreateZExtOrBitCast(StoreVal, Int64Ty);
4268     }
4269 
4270     Function *F = CGM.getIntrinsic(BuiltinID == AArch64::BI__builtin_arm_stlex
4271                                        ? Intrinsic::aarch64_stlxr
4272                                        : Intrinsic::aarch64_stxr,
4273                                    StoreAddr->getType());
4274     return Builder.CreateCall(F, {StoreVal, StoreAddr}, "stxr");
4275   }
4276 
4277   if (BuiltinID == AArch64::BI__builtin_arm_clrex) {
4278     Function *F = CGM.getIntrinsic(Intrinsic::aarch64_clrex);
4279     return Builder.CreateCall(F);
4280   }
4281 
4282   if (BuiltinID == AArch64::BI__builtin_thread_pointer) {
4283     Function *F = CGM.getIntrinsic(Intrinsic::aarch64_thread_pointer);
4284     return Builder.CreateCall(F);
4285   }
4286 
4287   // CRC32
4288   Intrinsic::ID CRCIntrinsicID = Intrinsic::not_intrinsic;
4289   switch (BuiltinID) {
4290   case AArch64::BI__builtin_arm_crc32b:
4291     CRCIntrinsicID = Intrinsic::aarch64_crc32b; break;
4292   case AArch64::BI__builtin_arm_crc32cb:
4293     CRCIntrinsicID = Intrinsic::aarch64_crc32cb; break;
4294   case AArch64::BI__builtin_arm_crc32h:
4295     CRCIntrinsicID = Intrinsic::aarch64_crc32h; break;
4296   case AArch64::BI__builtin_arm_crc32ch:
4297     CRCIntrinsicID = Intrinsic::aarch64_crc32ch; break;
4298   case AArch64::BI__builtin_arm_crc32w:
4299     CRCIntrinsicID = Intrinsic::aarch64_crc32w; break;
4300   case AArch64::BI__builtin_arm_crc32cw:
4301     CRCIntrinsicID = Intrinsic::aarch64_crc32cw; break;
4302   case AArch64::BI__builtin_arm_crc32d:
4303     CRCIntrinsicID = Intrinsic::aarch64_crc32x; break;
4304   case AArch64::BI__builtin_arm_crc32cd:
4305     CRCIntrinsicID = Intrinsic::aarch64_crc32cx; break;
4306   }
4307 
4308   if (CRCIntrinsicID != Intrinsic::not_intrinsic) {
4309     Value *Arg0 = EmitScalarExpr(E->getArg(0));
4310     Value *Arg1 = EmitScalarExpr(E->getArg(1));
4311     Function *F = CGM.getIntrinsic(CRCIntrinsicID);
4312 
4313     llvm::Type *DataTy = F->getFunctionType()->getParamType(1);
4314     Arg1 = Builder.CreateZExtOrBitCast(Arg1, DataTy);
4315 
4316     return Builder.CreateCall(F, {Arg0, Arg1});
4317   }
4318 
4319   if (BuiltinID == AArch64::BI__builtin_arm_rsr ||
4320       BuiltinID == AArch64::BI__builtin_arm_rsr64 ||
4321       BuiltinID == AArch64::BI__builtin_arm_rsrp ||
4322       BuiltinID == AArch64::BI__builtin_arm_wsr ||
4323       BuiltinID == AArch64::BI__builtin_arm_wsr64 ||
4324       BuiltinID == AArch64::BI__builtin_arm_wsrp) {
4325 
4326     bool IsRead = BuiltinID == AArch64::BI__builtin_arm_rsr ||
4327                   BuiltinID == AArch64::BI__builtin_arm_rsr64 ||
4328                   BuiltinID == AArch64::BI__builtin_arm_rsrp;
4329 
4330     bool IsPointerBuiltin = BuiltinID == AArch64::BI__builtin_arm_rsrp ||
4331                             BuiltinID == AArch64::BI__builtin_arm_wsrp;
4332 
4333     bool Is64Bit = BuiltinID != AArch64::BI__builtin_arm_rsr &&
4334                    BuiltinID != AArch64::BI__builtin_arm_wsr;
4335 
4336     llvm::Type *ValueType;
4337     llvm::Type *RegisterType = Int64Ty;
4338     if (IsPointerBuiltin) {
4339       ValueType = VoidPtrTy;
4340     } else if (Is64Bit) {
4341       ValueType = Int64Ty;
4342     } else {
4343       ValueType = Int32Ty;
4344     }
4345 
4346     return EmitSpecialRegisterBuiltin(*this, E, RegisterType, ValueType, IsRead);
4347   }
4348 
4349   // Find out if any arguments are required to be integer constant
4350   // expressions.
4351   unsigned ICEArguments = 0;
4352   ASTContext::GetBuiltinTypeError Error;
4353   getContext().GetBuiltinType(BuiltinID, Error, &ICEArguments);
4354   assert(Error == ASTContext::GE_None && "Should not codegen an error");
4355 
4356   llvm::SmallVector<Value*, 4> Ops;
4357   for (unsigned i = 0, e = E->getNumArgs() - 1; i != e; i++) {
4358     if ((ICEArguments & (1 << i)) == 0) {
4359       Ops.push_back(EmitScalarExpr(E->getArg(i)));
4360     } else {
4361       // If this is required to be a constant, constant fold it so that we know
4362       // that the generated intrinsic gets a ConstantInt.
4363       llvm::APSInt Result;
4364       bool IsConst = E->getArg(i)->isIntegerConstantExpr(Result, getContext());
4365       assert(IsConst && "Constant arg isn't actually constant?");
4366       (void)IsConst;
4367       Ops.push_back(llvm::ConstantInt::get(getLLVMContext(), Result));
4368     }
4369   }
4370 
4371   auto SISDMap = makeArrayRef(AArch64SISDIntrinsicMap);
4372   const NeonIntrinsicInfo *Builtin = findNeonIntrinsicInMap(
4373       SISDMap, BuiltinID, AArch64SISDIntrinsicsProvenSorted);
4374 
4375   if (Builtin) {
4376     Ops.push_back(EmitScalarExpr(E->getArg(E->getNumArgs() - 1)));
4377     Value *Result = EmitCommonNeonSISDBuiltinExpr(*this, *Builtin, Ops, E);
4378     assert(Result && "SISD intrinsic should have been handled");
4379     return Result;
4380   }
4381 
4382   llvm::APSInt Result;
4383   const Expr *Arg = E->getArg(E->getNumArgs()-1);
4384   NeonTypeFlags Type(0);
4385   if (Arg->isIntegerConstantExpr(Result, getContext()))
4386     // Determine the type of this overloaded NEON intrinsic.
4387     Type = NeonTypeFlags(Result.getZExtValue());
4388 
4389   bool usgn = Type.isUnsigned();
4390   bool quad = Type.isQuad();
4391 
4392   // Handle non-overloaded intrinsics first.
4393   switch (BuiltinID) {
4394   default: break;
4395   case NEON::BI__builtin_neon_vldrq_p128: {
4396     llvm::Type *Int128PTy = llvm::Type::getIntNPtrTy(getLLVMContext(), 128);
4397     Value *Ptr = Builder.CreateBitCast(EmitScalarExpr(E->getArg(0)), Int128PTy);
4398     return Builder.CreateDefaultAlignedLoad(Ptr);
4399   }
4400   case NEON::BI__builtin_neon_vstrq_p128: {
4401     llvm::Type *Int128PTy = llvm::Type::getIntNPtrTy(getLLVMContext(), 128);
4402     Value *Ptr = Builder.CreateBitCast(Ops[0], Int128PTy);
4403     return Builder.CreateDefaultAlignedStore(EmitScalarExpr(E->getArg(1)), Ptr);
4404   }
4405   case NEON::BI__builtin_neon_vcvts_u32_f32:
4406   case NEON::BI__builtin_neon_vcvtd_u64_f64:
4407     usgn = true;
4408     // FALL THROUGH
4409   case NEON::BI__builtin_neon_vcvts_s32_f32:
4410   case NEON::BI__builtin_neon_vcvtd_s64_f64: {
4411     Ops.push_back(EmitScalarExpr(E->getArg(0)));
4412     bool Is64 = Ops[0]->getType()->getPrimitiveSizeInBits() == 64;
4413     llvm::Type *InTy = Is64 ? Int64Ty : Int32Ty;
4414     llvm::Type *FTy = Is64 ? DoubleTy : FloatTy;
4415     Ops[0] = Builder.CreateBitCast(Ops[0], FTy);
4416     if (usgn)
4417       return Builder.CreateFPToUI(Ops[0], InTy);
4418     return Builder.CreateFPToSI(Ops[0], InTy);
4419   }
4420   case NEON::BI__builtin_neon_vcvts_f32_u32:
4421   case NEON::BI__builtin_neon_vcvtd_f64_u64:
4422     usgn = true;
4423     // FALL THROUGH
4424   case NEON::BI__builtin_neon_vcvts_f32_s32:
4425   case NEON::BI__builtin_neon_vcvtd_f64_s64: {
4426     Ops.push_back(EmitScalarExpr(E->getArg(0)));
4427     bool Is64 = Ops[0]->getType()->getPrimitiveSizeInBits() == 64;
4428     llvm::Type *InTy = Is64 ? Int64Ty : Int32Ty;
4429     llvm::Type *FTy = Is64 ? DoubleTy : FloatTy;
4430     Ops[0] = Builder.CreateBitCast(Ops[0], InTy);
4431     if (usgn)
4432       return Builder.CreateUIToFP(Ops[0], FTy);
4433     return Builder.CreateSIToFP(Ops[0], FTy);
4434   }
4435   case NEON::BI__builtin_neon_vpaddd_s64: {
4436     llvm::Type *Ty = llvm::VectorType::get(Int64Ty, 2);
4437     Value *Vec = EmitScalarExpr(E->getArg(0));
4438     // The vector is v2f64, so make sure it's bitcast to that.
4439     Vec = Builder.CreateBitCast(Vec, Ty, "v2i64");
4440     llvm::Value *Idx0 = llvm::ConstantInt::get(SizeTy, 0);
4441     llvm::Value *Idx1 = llvm::ConstantInt::get(SizeTy, 1);
4442     Value *Op0 = Builder.CreateExtractElement(Vec, Idx0, "lane0");
4443     Value *Op1 = Builder.CreateExtractElement(Vec, Idx1, "lane1");
4444     // Pairwise addition of a v2f64 into a scalar f64.
4445     return Builder.CreateAdd(Op0, Op1, "vpaddd");
4446   }
4447   case NEON::BI__builtin_neon_vpaddd_f64: {
4448     llvm::Type *Ty =
4449       llvm::VectorType::get(DoubleTy, 2);
4450     Value *Vec = EmitScalarExpr(E->getArg(0));
4451     // The vector is v2f64, so make sure it's bitcast to that.
4452     Vec = Builder.CreateBitCast(Vec, Ty, "v2f64");
4453     llvm::Value *Idx0 = llvm::ConstantInt::get(SizeTy, 0);
4454     llvm::Value *Idx1 = llvm::ConstantInt::get(SizeTy, 1);
4455     Value *Op0 = Builder.CreateExtractElement(Vec, Idx0, "lane0");
4456     Value *Op1 = Builder.CreateExtractElement(Vec, Idx1, "lane1");
4457     // Pairwise addition of a v2f64 into a scalar f64.
4458     return Builder.CreateFAdd(Op0, Op1, "vpaddd");
4459   }
4460   case NEON::BI__builtin_neon_vpadds_f32: {
4461     llvm::Type *Ty =
4462       llvm::VectorType::get(FloatTy, 2);
4463     Value *Vec = EmitScalarExpr(E->getArg(0));
4464     // The vector is v2f32, so make sure it's bitcast to that.
4465     Vec = Builder.CreateBitCast(Vec, Ty, "v2f32");
4466     llvm::Value *Idx0 = llvm::ConstantInt::get(SizeTy, 0);
4467     llvm::Value *Idx1 = llvm::ConstantInt::get(SizeTy, 1);
4468     Value *Op0 = Builder.CreateExtractElement(Vec, Idx0, "lane0");
4469     Value *Op1 = Builder.CreateExtractElement(Vec, Idx1, "lane1");
4470     // Pairwise addition of a v2f32 into a scalar f32.
4471     return Builder.CreateFAdd(Op0, Op1, "vpaddd");
4472   }
4473   case NEON::BI__builtin_neon_vceqzd_s64:
4474   case NEON::BI__builtin_neon_vceqzd_f64:
4475   case NEON::BI__builtin_neon_vceqzs_f32:
4476     Ops.push_back(EmitScalarExpr(E->getArg(0)));
4477     return EmitAArch64CompareBuiltinExpr(
4478         Ops[0], ConvertType(E->getCallReturnType(getContext())),
4479         ICmpInst::FCMP_OEQ, ICmpInst::ICMP_EQ, "vceqz");
4480   case NEON::BI__builtin_neon_vcgezd_s64:
4481   case NEON::BI__builtin_neon_vcgezd_f64:
4482   case NEON::BI__builtin_neon_vcgezs_f32:
4483     Ops.push_back(EmitScalarExpr(E->getArg(0)));
4484     return EmitAArch64CompareBuiltinExpr(
4485         Ops[0], ConvertType(E->getCallReturnType(getContext())),
4486         ICmpInst::FCMP_OGE, ICmpInst::ICMP_SGE, "vcgez");
4487   case NEON::BI__builtin_neon_vclezd_s64:
4488   case NEON::BI__builtin_neon_vclezd_f64:
4489   case NEON::BI__builtin_neon_vclezs_f32:
4490     Ops.push_back(EmitScalarExpr(E->getArg(0)));
4491     return EmitAArch64CompareBuiltinExpr(
4492         Ops[0], ConvertType(E->getCallReturnType(getContext())),
4493         ICmpInst::FCMP_OLE, ICmpInst::ICMP_SLE, "vclez");
4494   case NEON::BI__builtin_neon_vcgtzd_s64:
4495   case NEON::BI__builtin_neon_vcgtzd_f64:
4496   case NEON::BI__builtin_neon_vcgtzs_f32:
4497     Ops.push_back(EmitScalarExpr(E->getArg(0)));
4498     return EmitAArch64CompareBuiltinExpr(
4499         Ops[0], ConvertType(E->getCallReturnType(getContext())),
4500         ICmpInst::FCMP_OGT, ICmpInst::ICMP_SGT, "vcgtz");
4501   case NEON::BI__builtin_neon_vcltzd_s64:
4502   case NEON::BI__builtin_neon_vcltzd_f64:
4503   case NEON::BI__builtin_neon_vcltzs_f32:
4504     Ops.push_back(EmitScalarExpr(E->getArg(0)));
4505     return EmitAArch64CompareBuiltinExpr(
4506         Ops[0], ConvertType(E->getCallReturnType(getContext())),
4507         ICmpInst::FCMP_OLT, ICmpInst::ICMP_SLT, "vcltz");
4508 
4509   case NEON::BI__builtin_neon_vceqzd_u64: {
4510     Ops.push_back(EmitScalarExpr(E->getArg(0)));
4511     Ops[0] = Builder.CreateBitCast(Ops[0], Int64Ty);
4512     Ops[0] =
4513         Builder.CreateICmpEQ(Ops[0], llvm::Constant::getNullValue(Int64Ty));
4514     return Builder.CreateSExt(Ops[0], Int64Ty, "vceqzd");
4515   }
4516   case NEON::BI__builtin_neon_vceqd_f64:
4517   case NEON::BI__builtin_neon_vcled_f64:
4518   case NEON::BI__builtin_neon_vcltd_f64:
4519   case NEON::BI__builtin_neon_vcged_f64:
4520   case NEON::BI__builtin_neon_vcgtd_f64: {
4521     llvm::CmpInst::Predicate P;
4522     switch (BuiltinID) {
4523     default: llvm_unreachable("missing builtin ID in switch!");
4524     case NEON::BI__builtin_neon_vceqd_f64: P = llvm::FCmpInst::FCMP_OEQ; break;
4525     case NEON::BI__builtin_neon_vcled_f64: P = llvm::FCmpInst::FCMP_OLE; break;
4526     case NEON::BI__builtin_neon_vcltd_f64: P = llvm::FCmpInst::FCMP_OLT; break;
4527     case NEON::BI__builtin_neon_vcged_f64: P = llvm::FCmpInst::FCMP_OGE; break;
4528     case NEON::BI__builtin_neon_vcgtd_f64: P = llvm::FCmpInst::FCMP_OGT; break;
4529     }
4530     Ops.push_back(EmitScalarExpr(E->getArg(1)));
4531     Ops[0] = Builder.CreateBitCast(Ops[0], DoubleTy);
4532     Ops[1] = Builder.CreateBitCast(Ops[1], DoubleTy);
4533     Ops[0] = Builder.CreateFCmp(P, Ops[0], Ops[1]);
4534     return Builder.CreateSExt(Ops[0], Int64Ty, "vcmpd");
4535   }
4536   case NEON::BI__builtin_neon_vceqs_f32:
4537   case NEON::BI__builtin_neon_vcles_f32:
4538   case NEON::BI__builtin_neon_vclts_f32:
4539   case NEON::BI__builtin_neon_vcges_f32:
4540   case NEON::BI__builtin_neon_vcgts_f32: {
4541     llvm::CmpInst::Predicate P;
4542     switch (BuiltinID) {
4543     default: llvm_unreachable("missing builtin ID in switch!");
4544     case NEON::BI__builtin_neon_vceqs_f32: P = llvm::FCmpInst::FCMP_OEQ; break;
4545     case NEON::BI__builtin_neon_vcles_f32: P = llvm::FCmpInst::FCMP_OLE; break;
4546     case NEON::BI__builtin_neon_vclts_f32: P = llvm::FCmpInst::FCMP_OLT; break;
4547     case NEON::BI__builtin_neon_vcges_f32: P = llvm::FCmpInst::FCMP_OGE; break;
4548     case NEON::BI__builtin_neon_vcgts_f32: P = llvm::FCmpInst::FCMP_OGT; break;
4549     }
4550     Ops.push_back(EmitScalarExpr(E->getArg(1)));
4551     Ops[0] = Builder.CreateBitCast(Ops[0], FloatTy);
4552     Ops[1] = Builder.CreateBitCast(Ops[1], FloatTy);
4553     Ops[0] = Builder.CreateFCmp(P, Ops[0], Ops[1]);
4554     return Builder.CreateSExt(Ops[0], Int32Ty, "vcmpd");
4555   }
4556   case NEON::BI__builtin_neon_vceqd_s64:
4557   case NEON::BI__builtin_neon_vceqd_u64:
4558   case NEON::BI__builtin_neon_vcgtd_s64:
4559   case NEON::BI__builtin_neon_vcgtd_u64:
4560   case NEON::BI__builtin_neon_vcltd_s64:
4561   case NEON::BI__builtin_neon_vcltd_u64:
4562   case NEON::BI__builtin_neon_vcged_u64:
4563   case NEON::BI__builtin_neon_vcged_s64:
4564   case NEON::BI__builtin_neon_vcled_u64:
4565   case NEON::BI__builtin_neon_vcled_s64: {
4566     llvm::CmpInst::Predicate P;
4567     switch (BuiltinID) {
4568     default: llvm_unreachable("missing builtin ID in switch!");
4569     case NEON::BI__builtin_neon_vceqd_s64:
4570     case NEON::BI__builtin_neon_vceqd_u64:P = llvm::ICmpInst::ICMP_EQ;break;
4571     case NEON::BI__builtin_neon_vcgtd_s64:P = llvm::ICmpInst::ICMP_SGT;break;
4572     case NEON::BI__builtin_neon_vcgtd_u64:P = llvm::ICmpInst::ICMP_UGT;break;
4573     case NEON::BI__builtin_neon_vcltd_s64:P = llvm::ICmpInst::ICMP_SLT;break;
4574     case NEON::BI__builtin_neon_vcltd_u64:P = llvm::ICmpInst::ICMP_ULT;break;
4575     case NEON::BI__builtin_neon_vcged_u64:P = llvm::ICmpInst::ICMP_UGE;break;
4576     case NEON::BI__builtin_neon_vcged_s64:P = llvm::ICmpInst::ICMP_SGE;break;
4577     case NEON::BI__builtin_neon_vcled_u64:P = llvm::ICmpInst::ICMP_ULE;break;
4578     case NEON::BI__builtin_neon_vcled_s64:P = llvm::ICmpInst::ICMP_SLE;break;
4579     }
4580     Ops.push_back(EmitScalarExpr(E->getArg(1)));
4581     Ops[0] = Builder.CreateBitCast(Ops[0], Int64Ty);
4582     Ops[1] = Builder.CreateBitCast(Ops[1], Int64Ty);
4583     Ops[0] = Builder.CreateICmp(P, Ops[0], Ops[1]);
4584     return Builder.CreateSExt(Ops[0], Int64Ty, "vceqd");
4585   }
4586   case NEON::BI__builtin_neon_vtstd_s64:
4587   case NEON::BI__builtin_neon_vtstd_u64: {
4588     Ops.push_back(EmitScalarExpr(E->getArg(1)));
4589     Ops[0] = Builder.CreateBitCast(Ops[0], Int64Ty);
4590     Ops[1] = Builder.CreateBitCast(Ops[1], Int64Ty);
4591     Ops[0] = Builder.CreateAnd(Ops[0], Ops[1]);
4592     Ops[0] = Builder.CreateICmp(ICmpInst::ICMP_NE, Ops[0],
4593                                 llvm::Constant::getNullValue(Int64Ty));
4594     return Builder.CreateSExt(Ops[0], Int64Ty, "vtstd");
4595   }
4596   case NEON::BI__builtin_neon_vset_lane_i8:
4597   case NEON::BI__builtin_neon_vset_lane_i16:
4598   case NEON::BI__builtin_neon_vset_lane_i32:
4599   case NEON::BI__builtin_neon_vset_lane_i64:
4600   case NEON::BI__builtin_neon_vset_lane_f32:
4601   case NEON::BI__builtin_neon_vsetq_lane_i8:
4602   case NEON::BI__builtin_neon_vsetq_lane_i16:
4603   case NEON::BI__builtin_neon_vsetq_lane_i32:
4604   case NEON::BI__builtin_neon_vsetq_lane_i64:
4605   case NEON::BI__builtin_neon_vsetq_lane_f32:
4606     Ops.push_back(EmitScalarExpr(E->getArg(2)));
4607     return Builder.CreateInsertElement(Ops[1], Ops[0], Ops[2], "vset_lane");
4608   case NEON::BI__builtin_neon_vset_lane_f64:
4609     // The vector type needs a cast for the v1f64 variant.
4610     Ops[1] = Builder.CreateBitCast(Ops[1],
4611                                    llvm::VectorType::get(DoubleTy, 1));
4612     Ops.push_back(EmitScalarExpr(E->getArg(2)));
4613     return Builder.CreateInsertElement(Ops[1], Ops[0], Ops[2], "vset_lane");
4614   case NEON::BI__builtin_neon_vsetq_lane_f64:
4615     // The vector type needs a cast for the v2f64 variant.
4616     Ops[1] = Builder.CreateBitCast(Ops[1],
4617         llvm::VectorType::get(DoubleTy, 2));
4618     Ops.push_back(EmitScalarExpr(E->getArg(2)));
4619     return Builder.CreateInsertElement(Ops[1], Ops[0], Ops[2], "vset_lane");
4620 
4621   case NEON::BI__builtin_neon_vget_lane_i8:
4622   case NEON::BI__builtin_neon_vdupb_lane_i8:
4623     Ops[0] = Builder.CreateBitCast(Ops[0], llvm::VectorType::get(Int8Ty, 8));
4624     return Builder.CreateExtractElement(Ops[0], EmitScalarExpr(E->getArg(1)),
4625                                         "vget_lane");
4626   case NEON::BI__builtin_neon_vgetq_lane_i8:
4627   case NEON::BI__builtin_neon_vdupb_laneq_i8:
4628     Ops[0] = Builder.CreateBitCast(Ops[0], llvm::VectorType::get(Int8Ty, 16));
4629     return Builder.CreateExtractElement(Ops[0], EmitScalarExpr(E->getArg(1)),
4630                                         "vgetq_lane");
4631   case NEON::BI__builtin_neon_vget_lane_i16:
4632   case NEON::BI__builtin_neon_vduph_lane_i16:
4633     Ops[0] = Builder.CreateBitCast(Ops[0], llvm::VectorType::get(Int16Ty, 4));
4634     return Builder.CreateExtractElement(Ops[0], EmitScalarExpr(E->getArg(1)),
4635                                         "vget_lane");
4636   case NEON::BI__builtin_neon_vgetq_lane_i16:
4637   case NEON::BI__builtin_neon_vduph_laneq_i16:
4638     Ops[0] = Builder.CreateBitCast(Ops[0], llvm::VectorType::get(Int16Ty, 8));
4639     return Builder.CreateExtractElement(Ops[0], EmitScalarExpr(E->getArg(1)),
4640                                         "vgetq_lane");
4641   case NEON::BI__builtin_neon_vget_lane_i32:
4642   case NEON::BI__builtin_neon_vdups_lane_i32:
4643     Ops[0] = Builder.CreateBitCast(Ops[0], llvm::VectorType::get(Int32Ty, 2));
4644     return Builder.CreateExtractElement(Ops[0], EmitScalarExpr(E->getArg(1)),
4645                                         "vget_lane");
4646   case NEON::BI__builtin_neon_vdups_lane_f32:
4647     Ops[0] = Builder.CreateBitCast(Ops[0],
4648         llvm::VectorType::get(FloatTy, 2));
4649     return Builder.CreateExtractElement(Ops[0], EmitScalarExpr(E->getArg(1)),
4650                                         "vdups_lane");
4651   case NEON::BI__builtin_neon_vgetq_lane_i32:
4652   case NEON::BI__builtin_neon_vdups_laneq_i32:
4653     Ops[0] = Builder.CreateBitCast(Ops[0], llvm::VectorType::get(Int32Ty, 4));
4654     return Builder.CreateExtractElement(Ops[0], EmitScalarExpr(E->getArg(1)),
4655                                         "vgetq_lane");
4656   case NEON::BI__builtin_neon_vget_lane_i64:
4657   case NEON::BI__builtin_neon_vdupd_lane_i64:
4658     Ops[0] = Builder.CreateBitCast(Ops[0], llvm::VectorType::get(Int64Ty, 1));
4659     return Builder.CreateExtractElement(Ops[0], EmitScalarExpr(E->getArg(1)),
4660                                         "vget_lane");
4661   case NEON::BI__builtin_neon_vdupd_lane_f64:
4662     Ops[0] = Builder.CreateBitCast(Ops[0],
4663         llvm::VectorType::get(DoubleTy, 1));
4664     return Builder.CreateExtractElement(Ops[0], EmitScalarExpr(E->getArg(1)),
4665                                         "vdupd_lane");
4666   case NEON::BI__builtin_neon_vgetq_lane_i64:
4667   case NEON::BI__builtin_neon_vdupd_laneq_i64:
4668     Ops[0] = Builder.CreateBitCast(Ops[0], llvm::VectorType::get(Int64Ty, 2));
4669     return Builder.CreateExtractElement(Ops[0], EmitScalarExpr(E->getArg(1)),
4670                                         "vgetq_lane");
4671   case NEON::BI__builtin_neon_vget_lane_f32:
4672     Ops[0] = Builder.CreateBitCast(Ops[0],
4673         llvm::VectorType::get(FloatTy, 2));
4674     return Builder.CreateExtractElement(Ops[0], EmitScalarExpr(E->getArg(1)),
4675                                         "vget_lane");
4676   case NEON::BI__builtin_neon_vget_lane_f64:
4677     Ops[0] = Builder.CreateBitCast(Ops[0],
4678         llvm::VectorType::get(DoubleTy, 1));
4679     return Builder.CreateExtractElement(Ops[0], EmitScalarExpr(E->getArg(1)),
4680                                         "vget_lane");
4681   case NEON::BI__builtin_neon_vgetq_lane_f32:
4682   case NEON::BI__builtin_neon_vdups_laneq_f32:
4683     Ops[0] = Builder.CreateBitCast(Ops[0],
4684         llvm::VectorType::get(FloatTy, 4));
4685     return Builder.CreateExtractElement(Ops[0], EmitScalarExpr(E->getArg(1)),
4686                                         "vgetq_lane");
4687   case NEON::BI__builtin_neon_vgetq_lane_f64:
4688   case NEON::BI__builtin_neon_vdupd_laneq_f64:
4689     Ops[0] = Builder.CreateBitCast(Ops[0],
4690         llvm::VectorType::get(DoubleTy, 2));
4691     return Builder.CreateExtractElement(Ops[0], EmitScalarExpr(E->getArg(1)),
4692                                         "vgetq_lane");
4693   case NEON::BI__builtin_neon_vaddd_s64:
4694   case NEON::BI__builtin_neon_vaddd_u64:
4695     return Builder.CreateAdd(Ops[0], EmitScalarExpr(E->getArg(1)), "vaddd");
4696   case NEON::BI__builtin_neon_vsubd_s64:
4697   case NEON::BI__builtin_neon_vsubd_u64:
4698     return Builder.CreateSub(Ops[0], EmitScalarExpr(E->getArg(1)), "vsubd");
4699   case NEON::BI__builtin_neon_vqdmlalh_s16:
4700   case NEON::BI__builtin_neon_vqdmlslh_s16: {
4701     SmallVector<Value *, 2> ProductOps;
4702     ProductOps.push_back(vectorWrapScalar16(Ops[1]));
4703     ProductOps.push_back(vectorWrapScalar16(EmitScalarExpr(E->getArg(2))));
4704     llvm::Type *VTy = llvm::VectorType::get(Int32Ty, 4);
4705     Ops[1] = EmitNeonCall(CGM.getIntrinsic(Intrinsic::aarch64_neon_sqdmull, VTy),
4706                           ProductOps, "vqdmlXl");
4707     Constant *CI = ConstantInt::get(SizeTy, 0);
4708     Ops[1] = Builder.CreateExtractElement(Ops[1], CI, "lane0");
4709 
4710     unsigned AccumInt = BuiltinID == NEON::BI__builtin_neon_vqdmlalh_s16
4711                                         ? Intrinsic::aarch64_neon_sqadd
4712                                         : Intrinsic::aarch64_neon_sqsub;
4713     return EmitNeonCall(CGM.getIntrinsic(AccumInt, Int32Ty), Ops, "vqdmlXl");
4714   }
4715   case NEON::BI__builtin_neon_vqshlud_n_s64: {
4716     Ops.push_back(EmitScalarExpr(E->getArg(1)));
4717     Ops[1] = Builder.CreateZExt(Ops[1], Int64Ty);
4718     return EmitNeonCall(CGM.getIntrinsic(Intrinsic::aarch64_neon_sqshlu, Int64Ty),
4719                         Ops, "vqshlu_n");
4720   }
4721   case NEON::BI__builtin_neon_vqshld_n_u64:
4722   case NEON::BI__builtin_neon_vqshld_n_s64: {
4723     unsigned Int = BuiltinID == NEON::BI__builtin_neon_vqshld_n_u64
4724                                    ? Intrinsic::aarch64_neon_uqshl
4725                                    : Intrinsic::aarch64_neon_sqshl;
4726     Ops.push_back(EmitScalarExpr(E->getArg(1)));
4727     Ops[1] = Builder.CreateZExt(Ops[1], Int64Ty);
4728     return EmitNeonCall(CGM.getIntrinsic(Int, Int64Ty), Ops, "vqshl_n");
4729   }
4730   case NEON::BI__builtin_neon_vrshrd_n_u64:
4731   case NEON::BI__builtin_neon_vrshrd_n_s64: {
4732     unsigned Int = BuiltinID == NEON::BI__builtin_neon_vrshrd_n_u64
4733                                    ? Intrinsic::aarch64_neon_urshl
4734                                    : Intrinsic::aarch64_neon_srshl;
4735     Ops.push_back(EmitScalarExpr(E->getArg(1)));
4736     int SV = cast<ConstantInt>(Ops[1])->getSExtValue();
4737     Ops[1] = ConstantInt::get(Int64Ty, -SV);
4738     return EmitNeonCall(CGM.getIntrinsic(Int, Int64Ty), Ops, "vrshr_n");
4739   }
4740   case NEON::BI__builtin_neon_vrsrad_n_u64:
4741   case NEON::BI__builtin_neon_vrsrad_n_s64: {
4742     unsigned Int = BuiltinID == NEON::BI__builtin_neon_vrsrad_n_u64
4743                                    ? Intrinsic::aarch64_neon_urshl
4744                                    : Intrinsic::aarch64_neon_srshl;
4745     Ops[1] = Builder.CreateBitCast(Ops[1], Int64Ty);
4746     Ops.push_back(Builder.CreateNeg(EmitScalarExpr(E->getArg(2))));
4747     Ops[1] = Builder.CreateCall(CGM.getIntrinsic(Int, Int64Ty),
4748                                 {Ops[1], Builder.CreateSExt(Ops[2], Int64Ty)});
4749     return Builder.CreateAdd(Ops[0], Builder.CreateBitCast(Ops[1], Int64Ty));
4750   }
4751   case NEON::BI__builtin_neon_vshld_n_s64:
4752   case NEON::BI__builtin_neon_vshld_n_u64: {
4753     llvm::ConstantInt *Amt = cast<ConstantInt>(EmitScalarExpr(E->getArg(1)));
4754     return Builder.CreateShl(
4755         Ops[0], ConstantInt::get(Int64Ty, Amt->getZExtValue()), "shld_n");
4756   }
4757   case NEON::BI__builtin_neon_vshrd_n_s64: {
4758     llvm::ConstantInt *Amt = cast<ConstantInt>(EmitScalarExpr(E->getArg(1)));
4759     return Builder.CreateAShr(
4760         Ops[0], ConstantInt::get(Int64Ty, std::min(static_cast<uint64_t>(63),
4761                                                    Amt->getZExtValue())),
4762         "shrd_n");
4763   }
4764   case NEON::BI__builtin_neon_vshrd_n_u64: {
4765     llvm::ConstantInt *Amt = cast<ConstantInt>(EmitScalarExpr(E->getArg(1)));
4766     uint64_t ShiftAmt = Amt->getZExtValue();
4767     // Right-shifting an unsigned value by its size yields 0.
4768     if (ShiftAmt == 64)
4769       return ConstantInt::get(Int64Ty, 0);
4770     return Builder.CreateLShr(Ops[0], ConstantInt::get(Int64Ty, ShiftAmt),
4771                               "shrd_n");
4772   }
4773   case NEON::BI__builtin_neon_vsrad_n_s64: {
4774     llvm::ConstantInt *Amt = cast<ConstantInt>(EmitScalarExpr(E->getArg(2)));
4775     Ops[1] = Builder.CreateAShr(
4776         Ops[1], ConstantInt::get(Int64Ty, std::min(static_cast<uint64_t>(63),
4777                                                    Amt->getZExtValue())),
4778         "shrd_n");
4779     return Builder.CreateAdd(Ops[0], Ops[1]);
4780   }
4781   case NEON::BI__builtin_neon_vsrad_n_u64: {
4782     llvm::ConstantInt *Amt = cast<ConstantInt>(EmitScalarExpr(E->getArg(2)));
4783     uint64_t ShiftAmt = Amt->getZExtValue();
4784     // Right-shifting an unsigned value by its size yields 0.
4785     // As Op + 0 = Op, return Ops[0] directly.
4786     if (ShiftAmt == 64)
4787       return Ops[0];
4788     Ops[1] = Builder.CreateLShr(Ops[1], ConstantInt::get(Int64Ty, ShiftAmt),
4789                                 "shrd_n");
4790     return Builder.CreateAdd(Ops[0], Ops[1]);
4791   }
4792   case NEON::BI__builtin_neon_vqdmlalh_lane_s16:
4793   case NEON::BI__builtin_neon_vqdmlalh_laneq_s16:
4794   case NEON::BI__builtin_neon_vqdmlslh_lane_s16:
4795   case NEON::BI__builtin_neon_vqdmlslh_laneq_s16: {
4796     Ops[2] = Builder.CreateExtractElement(Ops[2], EmitScalarExpr(E->getArg(3)),
4797                                           "lane");
4798     SmallVector<Value *, 2> ProductOps;
4799     ProductOps.push_back(vectorWrapScalar16(Ops[1]));
4800     ProductOps.push_back(vectorWrapScalar16(Ops[2]));
4801     llvm::Type *VTy = llvm::VectorType::get(Int32Ty, 4);
4802     Ops[1] = EmitNeonCall(CGM.getIntrinsic(Intrinsic::aarch64_neon_sqdmull, VTy),
4803                           ProductOps, "vqdmlXl");
4804     Constant *CI = ConstantInt::get(SizeTy, 0);
4805     Ops[1] = Builder.CreateExtractElement(Ops[1], CI, "lane0");
4806     Ops.pop_back();
4807 
4808     unsigned AccInt = (BuiltinID == NEON::BI__builtin_neon_vqdmlalh_lane_s16 ||
4809                        BuiltinID == NEON::BI__builtin_neon_vqdmlalh_laneq_s16)
4810                           ? Intrinsic::aarch64_neon_sqadd
4811                           : Intrinsic::aarch64_neon_sqsub;
4812     return EmitNeonCall(CGM.getIntrinsic(AccInt, Int32Ty), Ops, "vqdmlXl");
4813   }
4814   case NEON::BI__builtin_neon_vqdmlals_s32:
4815   case NEON::BI__builtin_neon_vqdmlsls_s32: {
4816     SmallVector<Value *, 2> ProductOps;
4817     ProductOps.push_back(Ops[1]);
4818     ProductOps.push_back(EmitScalarExpr(E->getArg(2)));
4819     Ops[1] =
4820         EmitNeonCall(CGM.getIntrinsic(Intrinsic::aarch64_neon_sqdmulls_scalar),
4821                      ProductOps, "vqdmlXl");
4822 
4823     unsigned AccumInt = BuiltinID == NEON::BI__builtin_neon_vqdmlals_s32
4824                                         ? Intrinsic::aarch64_neon_sqadd
4825                                         : Intrinsic::aarch64_neon_sqsub;
4826     return EmitNeonCall(CGM.getIntrinsic(AccumInt, Int64Ty), Ops, "vqdmlXl");
4827   }
4828   case NEON::BI__builtin_neon_vqdmlals_lane_s32:
4829   case NEON::BI__builtin_neon_vqdmlals_laneq_s32:
4830   case NEON::BI__builtin_neon_vqdmlsls_lane_s32:
4831   case NEON::BI__builtin_neon_vqdmlsls_laneq_s32: {
4832     Ops[2] = Builder.CreateExtractElement(Ops[2], EmitScalarExpr(E->getArg(3)),
4833                                           "lane");
4834     SmallVector<Value *, 2> ProductOps;
4835     ProductOps.push_back(Ops[1]);
4836     ProductOps.push_back(Ops[2]);
4837     Ops[1] =
4838         EmitNeonCall(CGM.getIntrinsic(Intrinsic::aarch64_neon_sqdmulls_scalar),
4839                      ProductOps, "vqdmlXl");
4840     Ops.pop_back();
4841 
4842     unsigned AccInt = (BuiltinID == NEON::BI__builtin_neon_vqdmlals_lane_s32 ||
4843                        BuiltinID == NEON::BI__builtin_neon_vqdmlals_laneq_s32)
4844                           ? Intrinsic::aarch64_neon_sqadd
4845                           : Intrinsic::aarch64_neon_sqsub;
4846     return EmitNeonCall(CGM.getIntrinsic(AccInt, Int64Ty), Ops, "vqdmlXl");
4847   }
4848   }
4849 
4850   llvm::VectorType *VTy = GetNeonType(this, Type);
4851   llvm::Type *Ty = VTy;
4852   if (!Ty)
4853     return nullptr;
4854 
4855   // Not all intrinsics handled by the common case work for AArch64 yet, so only
4856   // defer to common code if it's been added to our special map.
4857   Builtin = findNeonIntrinsicInMap(AArch64SIMDIntrinsicMap, BuiltinID,
4858                                    AArch64SIMDIntrinsicsProvenSorted);
4859 
4860   if (Builtin)
4861     return EmitCommonNeonBuiltinExpr(
4862         Builtin->BuiltinID, Builtin->LLVMIntrinsic, Builtin->AltLLVMIntrinsic,
4863         Builtin->NameHint, Builtin->TypeModifier, E, Ops,
4864         /*never use addresses*/ Address::invalid(), Address::invalid());
4865 
4866   if (Value *V = EmitAArch64TblBuiltinExpr(*this, BuiltinID, E, Ops))
4867     return V;
4868 
4869   unsigned Int;
4870   switch (BuiltinID) {
4871   default: return nullptr;
4872   case NEON::BI__builtin_neon_vbsl_v:
4873   case NEON::BI__builtin_neon_vbslq_v: {
4874     llvm::Type *BitTy = llvm::VectorType::getInteger(VTy);
4875     Ops[0] = Builder.CreateBitCast(Ops[0], BitTy, "vbsl");
4876     Ops[1] = Builder.CreateBitCast(Ops[1], BitTy, "vbsl");
4877     Ops[2] = Builder.CreateBitCast(Ops[2], BitTy, "vbsl");
4878 
4879     Ops[1] = Builder.CreateAnd(Ops[0], Ops[1], "vbsl");
4880     Ops[2] = Builder.CreateAnd(Builder.CreateNot(Ops[0]), Ops[2], "vbsl");
4881     Ops[0] = Builder.CreateOr(Ops[1], Ops[2], "vbsl");
4882     return Builder.CreateBitCast(Ops[0], Ty);
4883   }
4884   case NEON::BI__builtin_neon_vfma_lane_v:
4885   case NEON::BI__builtin_neon_vfmaq_lane_v: { // Only used for FP types
4886     // The ARM builtins (and instructions) have the addend as the first
4887     // operand, but the 'fma' intrinsics have it last. Swap it around here.
4888     Value *Addend = Ops[0];
4889     Value *Multiplicand = Ops[1];
4890     Value *LaneSource = Ops[2];
4891     Ops[0] = Multiplicand;
4892     Ops[1] = LaneSource;
4893     Ops[2] = Addend;
4894 
4895     // Now adjust things to handle the lane access.
4896     llvm::Type *SourceTy = BuiltinID == NEON::BI__builtin_neon_vfmaq_lane_v ?
4897       llvm::VectorType::get(VTy->getElementType(), VTy->getNumElements() / 2) :
4898       VTy;
4899     llvm::Constant *cst = cast<Constant>(Ops[3]);
4900     Value *SV = llvm::ConstantVector::getSplat(VTy->getNumElements(), cst);
4901     Ops[1] = Builder.CreateBitCast(Ops[1], SourceTy);
4902     Ops[1] = Builder.CreateShuffleVector(Ops[1], Ops[1], SV, "lane");
4903 
4904     Ops.pop_back();
4905     Int = Intrinsic::fma;
4906     return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "fmla");
4907   }
4908   case NEON::BI__builtin_neon_vfma_laneq_v: {
4909     llvm::VectorType *VTy = cast<llvm::VectorType>(Ty);
4910     // v1f64 fma should be mapped to Neon scalar f64 fma
4911     if (VTy && VTy->getElementType() == DoubleTy) {
4912       Ops[0] = Builder.CreateBitCast(Ops[0], DoubleTy);
4913       Ops[1] = Builder.CreateBitCast(Ops[1], DoubleTy);
4914       llvm::Type *VTy = GetNeonType(this,
4915         NeonTypeFlags(NeonTypeFlags::Float64, false, true));
4916       Ops[2] = Builder.CreateBitCast(Ops[2], VTy);
4917       Ops[2] = Builder.CreateExtractElement(Ops[2], Ops[3], "extract");
4918       Value *F = CGM.getIntrinsic(Intrinsic::fma, DoubleTy);
4919       Value *Result = Builder.CreateCall(F, {Ops[1], Ops[2], Ops[0]});
4920       return Builder.CreateBitCast(Result, Ty);
4921     }
4922     Value *F = CGM.getIntrinsic(Intrinsic::fma, Ty);
4923     Ops[0] = Builder.CreateBitCast(Ops[0], Ty);
4924     Ops[1] = Builder.CreateBitCast(Ops[1], Ty);
4925 
4926     llvm::Type *STy = llvm::VectorType::get(VTy->getElementType(),
4927                                             VTy->getNumElements() * 2);
4928     Ops[2] = Builder.CreateBitCast(Ops[2], STy);
4929     Value* SV = llvm::ConstantVector::getSplat(VTy->getNumElements(),
4930                                                cast<ConstantInt>(Ops[3]));
4931     Ops[2] = Builder.CreateShuffleVector(Ops[2], Ops[2], SV, "lane");
4932 
4933     return Builder.CreateCall(F, {Ops[2], Ops[1], Ops[0]});
4934   }
4935   case NEON::BI__builtin_neon_vfmaq_laneq_v: {
4936     Value *F = CGM.getIntrinsic(Intrinsic::fma, Ty);
4937     Ops[0] = Builder.CreateBitCast(Ops[0], Ty);
4938     Ops[1] = Builder.CreateBitCast(Ops[1], Ty);
4939 
4940     Ops[2] = Builder.CreateBitCast(Ops[2], Ty);
4941     Ops[2] = EmitNeonSplat(Ops[2], cast<ConstantInt>(Ops[3]));
4942     return Builder.CreateCall(F, {Ops[2], Ops[1], Ops[0]});
4943   }
4944   case NEON::BI__builtin_neon_vfmas_lane_f32:
4945   case NEON::BI__builtin_neon_vfmas_laneq_f32:
4946   case NEON::BI__builtin_neon_vfmad_lane_f64:
4947   case NEON::BI__builtin_neon_vfmad_laneq_f64: {
4948     Ops.push_back(EmitScalarExpr(E->getArg(3)));
4949     llvm::Type *Ty = ConvertType(E->getCallReturnType(getContext()));
4950     Value *F = CGM.getIntrinsic(Intrinsic::fma, Ty);
4951     Ops[2] = Builder.CreateExtractElement(Ops[2], Ops[3], "extract");
4952     return Builder.CreateCall(F, {Ops[1], Ops[2], Ops[0]});
4953   }
4954   case NEON::BI__builtin_neon_vfms_v:
4955   case NEON::BI__builtin_neon_vfmsq_v: {  // Only used for FP types
4956     // FIXME: probably remove when we no longer support aarch64_simd.h
4957     // (arm_neon.h delegates to vfma).
4958 
4959     // The ARM builtins (and instructions) have the addend as the first
4960     // operand, but the 'fma' intrinsics have it last. Swap it around here.
4961     Value *Subtrahend = Ops[0];
4962     Value *Multiplicand = Ops[2];
4963     Ops[0] = Multiplicand;
4964     Ops[2] = Subtrahend;
4965     Ops[1] = Builder.CreateBitCast(Ops[1], VTy);
4966     Ops[1] = Builder.CreateFNeg(Ops[1]);
4967     Int = Intrinsic::fma;
4968     return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "fmls");
4969   }
4970   case NEON::BI__builtin_neon_vmull_v:
4971     // FIXME: improve sharing scheme to cope with 3 alternative LLVM intrinsics.
4972     Int = usgn ? Intrinsic::aarch64_neon_umull : Intrinsic::aarch64_neon_smull;
4973     if (Type.isPoly()) Int = Intrinsic::aarch64_neon_pmull;
4974     return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vmull");
4975   case NEON::BI__builtin_neon_vmax_v:
4976   case NEON::BI__builtin_neon_vmaxq_v:
4977     // FIXME: improve sharing scheme to cope with 3 alternative LLVM intrinsics.
4978     Int = usgn ? Intrinsic::aarch64_neon_umax : Intrinsic::aarch64_neon_smax;
4979     if (Ty->isFPOrFPVectorTy()) Int = Intrinsic::aarch64_neon_fmax;
4980     return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vmax");
4981   case NEON::BI__builtin_neon_vmin_v:
4982   case NEON::BI__builtin_neon_vminq_v:
4983     // FIXME: improve sharing scheme to cope with 3 alternative LLVM intrinsics.
4984     Int = usgn ? Intrinsic::aarch64_neon_umin : Intrinsic::aarch64_neon_smin;
4985     if (Ty->isFPOrFPVectorTy()) Int = Intrinsic::aarch64_neon_fmin;
4986     return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vmin");
4987   case NEON::BI__builtin_neon_vabd_v:
4988   case NEON::BI__builtin_neon_vabdq_v:
4989     // FIXME: improve sharing scheme to cope with 3 alternative LLVM intrinsics.
4990     Int = usgn ? Intrinsic::aarch64_neon_uabd : Intrinsic::aarch64_neon_sabd;
4991     if (Ty->isFPOrFPVectorTy()) Int = Intrinsic::aarch64_neon_fabd;
4992     return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vabd");
4993   case NEON::BI__builtin_neon_vpadal_v:
4994   case NEON::BI__builtin_neon_vpadalq_v: {
4995     unsigned ArgElts = VTy->getNumElements();
4996     llvm::IntegerType *EltTy = cast<IntegerType>(VTy->getElementType());
4997     unsigned BitWidth = EltTy->getBitWidth();
4998     llvm::Type *ArgTy = llvm::VectorType::get(
4999         llvm::IntegerType::get(getLLVMContext(), BitWidth/2), 2*ArgElts);
5000     llvm::Type* Tys[2] = { VTy, ArgTy };
5001     Int = usgn ? Intrinsic::aarch64_neon_uaddlp : Intrinsic::aarch64_neon_saddlp;
5002     SmallVector<llvm::Value*, 1> TmpOps;
5003     TmpOps.push_back(Ops[1]);
5004     Function *F = CGM.getIntrinsic(Int, Tys);
5005     llvm::Value *tmp = EmitNeonCall(F, TmpOps, "vpadal");
5006     llvm::Value *addend = Builder.CreateBitCast(Ops[0], tmp->getType());
5007     return Builder.CreateAdd(tmp, addend);
5008   }
5009   case NEON::BI__builtin_neon_vpmin_v:
5010   case NEON::BI__builtin_neon_vpminq_v:
5011     // FIXME: improve sharing scheme to cope with 3 alternative LLVM intrinsics.
5012     Int = usgn ? Intrinsic::aarch64_neon_uminp : Intrinsic::aarch64_neon_sminp;
5013     if (Ty->isFPOrFPVectorTy()) Int = Intrinsic::aarch64_neon_fminp;
5014     return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vpmin");
5015   case NEON::BI__builtin_neon_vpmax_v:
5016   case NEON::BI__builtin_neon_vpmaxq_v:
5017     // FIXME: improve sharing scheme to cope with 3 alternative LLVM intrinsics.
5018     Int = usgn ? Intrinsic::aarch64_neon_umaxp : Intrinsic::aarch64_neon_smaxp;
5019     if (Ty->isFPOrFPVectorTy()) Int = Intrinsic::aarch64_neon_fmaxp;
5020     return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vpmax");
5021   case NEON::BI__builtin_neon_vminnm_v:
5022   case NEON::BI__builtin_neon_vminnmq_v:
5023     Int = Intrinsic::aarch64_neon_fminnm;
5024     return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vminnm");
5025   case NEON::BI__builtin_neon_vmaxnm_v:
5026   case NEON::BI__builtin_neon_vmaxnmq_v:
5027     Int = Intrinsic::aarch64_neon_fmaxnm;
5028     return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vmaxnm");
5029   case NEON::BI__builtin_neon_vrecpss_f32: {
5030     Ops.push_back(EmitScalarExpr(E->getArg(1)));
5031     return EmitNeonCall(CGM.getIntrinsic(Intrinsic::aarch64_neon_frecps, FloatTy),
5032                         Ops, "vrecps");
5033   }
5034   case NEON::BI__builtin_neon_vrecpsd_f64: {
5035     Ops.push_back(EmitScalarExpr(E->getArg(1)));
5036     return EmitNeonCall(CGM.getIntrinsic(Intrinsic::aarch64_neon_frecps, DoubleTy),
5037                         Ops, "vrecps");
5038   }
5039   case NEON::BI__builtin_neon_vqshrun_n_v:
5040     Int = Intrinsic::aarch64_neon_sqshrun;
5041     return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vqshrun_n");
5042   case NEON::BI__builtin_neon_vqrshrun_n_v:
5043     Int = Intrinsic::aarch64_neon_sqrshrun;
5044     return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vqrshrun_n");
5045   case NEON::BI__builtin_neon_vqshrn_n_v:
5046     Int = usgn ? Intrinsic::aarch64_neon_uqshrn : Intrinsic::aarch64_neon_sqshrn;
5047     return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vqshrn_n");
5048   case NEON::BI__builtin_neon_vrshrn_n_v:
5049     Int = Intrinsic::aarch64_neon_rshrn;
5050     return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vrshrn_n");
5051   case NEON::BI__builtin_neon_vqrshrn_n_v:
5052     Int = usgn ? Intrinsic::aarch64_neon_uqrshrn : Intrinsic::aarch64_neon_sqrshrn;
5053     return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vqrshrn_n");
5054   case NEON::BI__builtin_neon_vrnda_v:
5055   case NEON::BI__builtin_neon_vrndaq_v: {
5056     Int = Intrinsic::round;
5057     return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vrnda");
5058   }
5059   case NEON::BI__builtin_neon_vrndi_v:
5060   case NEON::BI__builtin_neon_vrndiq_v: {
5061     Int = Intrinsic::nearbyint;
5062     return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vrndi");
5063   }
5064   case NEON::BI__builtin_neon_vrndm_v:
5065   case NEON::BI__builtin_neon_vrndmq_v: {
5066     Int = Intrinsic::floor;
5067     return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vrndm");
5068   }
5069   case NEON::BI__builtin_neon_vrndn_v:
5070   case NEON::BI__builtin_neon_vrndnq_v: {
5071     Int = Intrinsic::aarch64_neon_frintn;
5072     return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vrndn");
5073   }
5074   case NEON::BI__builtin_neon_vrndp_v:
5075   case NEON::BI__builtin_neon_vrndpq_v: {
5076     Int = Intrinsic::ceil;
5077     return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vrndp");
5078   }
5079   case NEON::BI__builtin_neon_vrndx_v:
5080   case NEON::BI__builtin_neon_vrndxq_v: {
5081     Int = Intrinsic::rint;
5082     return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vrndx");
5083   }
5084   case NEON::BI__builtin_neon_vrnd_v:
5085   case NEON::BI__builtin_neon_vrndq_v: {
5086     Int = Intrinsic::trunc;
5087     return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vrndz");
5088   }
5089   case NEON::BI__builtin_neon_vceqz_v:
5090   case NEON::BI__builtin_neon_vceqzq_v:
5091     return EmitAArch64CompareBuiltinExpr(Ops[0], Ty, ICmpInst::FCMP_OEQ,
5092                                          ICmpInst::ICMP_EQ, "vceqz");
5093   case NEON::BI__builtin_neon_vcgez_v:
5094   case NEON::BI__builtin_neon_vcgezq_v:
5095     return EmitAArch64CompareBuiltinExpr(Ops[0], Ty, ICmpInst::FCMP_OGE,
5096                                          ICmpInst::ICMP_SGE, "vcgez");
5097   case NEON::BI__builtin_neon_vclez_v:
5098   case NEON::BI__builtin_neon_vclezq_v:
5099     return EmitAArch64CompareBuiltinExpr(Ops[0], Ty, ICmpInst::FCMP_OLE,
5100                                          ICmpInst::ICMP_SLE, "vclez");
5101   case NEON::BI__builtin_neon_vcgtz_v:
5102   case NEON::BI__builtin_neon_vcgtzq_v:
5103     return EmitAArch64CompareBuiltinExpr(Ops[0], Ty, ICmpInst::FCMP_OGT,
5104                                          ICmpInst::ICMP_SGT, "vcgtz");
5105   case NEON::BI__builtin_neon_vcltz_v:
5106   case NEON::BI__builtin_neon_vcltzq_v:
5107     return EmitAArch64CompareBuiltinExpr(Ops[0], Ty, ICmpInst::FCMP_OLT,
5108                                          ICmpInst::ICMP_SLT, "vcltz");
5109   case NEON::BI__builtin_neon_vcvt_f64_v:
5110   case NEON::BI__builtin_neon_vcvtq_f64_v:
5111     Ops[0] = Builder.CreateBitCast(Ops[0], Ty);
5112     Ty = GetNeonType(this, NeonTypeFlags(NeonTypeFlags::Float64, false, quad));
5113     return usgn ? Builder.CreateUIToFP(Ops[0], Ty, "vcvt")
5114                 : Builder.CreateSIToFP(Ops[0], Ty, "vcvt");
5115   case NEON::BI__builtin_neon_vcvt_f64_f32: {
5116     assert(Type.getEltType() == NeonTypeFlags::Float64 && quad &&
5117            "unexpected vcvt_f64_f32 builtin");
5118     NeonTypeFlags SrcFlag = NeonTypeFlags(NeonTypeFlags::Float32, false, false);
5119     Ops[0] = Builder.CreateBitCast(Ops[0], GetNeonType(this, SrcFlag));
5120 
5121     return Builder.CreateFPExt(Ops[0], Ty, "vcvt");
5122   }
5123   case NEON::BI__builtin_neon_vcvt_f32_f64: {
5124     assert(Type.getEltType() == NeonTypeFlags::Float32 &&
5125            "unexpected vcvt_f32_f64 builtin");
5126     NeonTypeFlags SrcFlag = NeonTypeFlags(NeonTypeFlags::Float64, false, true);
5127     Ops[0] = Builder.CreateBitCast(Ops[0], GetNeonType(this, SrcFlag));
5128 
5129     return Builder.CreateFPTrunc(Ops[0], Ty, "vcvt");
5130   }
5131   case NEON::BI__builtin_neon_vcvt_s32_v:
5132   case NEON::BI__builtin_neon_vcvt_u32_v:
5133   case NEON::BI__builtin_neon_vcvt_s64_v:
5134   case NEON::BI__builtin_neon_vcvt_u64_v:
5135   case NEON::BI__builtin_neon_vcvtq_s32_v:
5136   case NEON::BI__builtin_neon_vcvtq_u32_v:
5137   case NEON::BI__builtin_neon_vcvtq_s64_v:
5138   case NEON::BI__builtin_neon_vcvtq_u64_v: {
5139     Ops[0] = Builder.CreateBitCast(Ops[0], GetFloatNeonType(this, Type));
5140     if (usgn)
5141       return Builder.CreateFPToUI(Ops[0], Ty);
5142     return Builder.CreateFPToSI(Ops[0], Ty);
5143   }
5144   case NEON::BI__builtin_neon_vcvta_s32_v:
5145   case NEON::BI__builtin_neon_vcvtaq_s32_v:
5146   case NEON::BI__builtin_neon_vcvta_u32_v:
5147   case NEON::BI__builtin_neon_vcvtaq_u32_v:
5148   case NEON::BI__builtin_neon_vcvta_s64_v:
5149   case NEON::BI__builtin_neon_vcvtaq_s64_v:
5150   case NEON::BI__builtin_neon_vcvta_u64_v:
5151   case NEON::BI__builtin_neon_vcvtaq_u64_v: {
5152     Int = usgn ? Intrinsic::aarch64_neon_fcvtau : Intrinsic::aarch64_neon_fcvtas;
5153     llvm::Type *Tys[2] = { Ty, GetFloatNeonType(this, Type) };
5154     return EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vcvta");
5155   }
5156   case NEON::BI__builtin_neon_vcvtm_s32_v:
5157   case NEON::BI__builtin_neon_vcvtmq_s32_v:
5158   case NEON::BI__builtin_neon_vcvtm_u32_v:
5159   case NEON::BI__builtin_neon_vcvtmq_u32_v:
5160   case NEON::BI__builtin_neon_vcvtm_s64_v:
5161   case NEON::BI__builtin_neon_vcvtmq_s64_v:
5162   case NEON::BI__builtin_neon_vcvtm_u64_v:
5163   case NEON::BI__builtin_neon_vcvtmq_u64_v: {
5164     Int = usgn ? Intrinsic::aarch64_neon_fcvtmu : Intrinsic::aarch64_neon_fcvtms;
5165     llvm::Type *Tys[2] = { Ty, GetFloatNeonType(this, Type) };
5166     return EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vcvtm");
5167   }
5168   case NEON::BI__builtin_neon_vcvtn_s32_v:
5169   case NEON::BI__builtin_neon_vcvtnq_s32_v:
5170   case NEON::BI__builtin_neon_vcvtn_u32_v:
5171   case NEON::BI__builtin_neon_vcvtnq_u32_v:
5172   case NEON::BI__builtin_neon_vcvtn_s64_v:
5173   case NEON::BI__builtin_neon_vcvtnq_s64_v:
5174   case NEON::BI__builtin_neon_vcvtn_u64_v:
5175   case NEON::BI__builtin_neon_vcvtnq_u64_v: {
5176     Int = usgn ? Intrinsic::aarch64_neon_fcvtnu : Intrinsic::aarch64_neon_fcvtns;
5177     llvm::Type *Tys[2] = { Ty, GetFloatNeonType(this, Type) };
5178     return EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vcvtn");
5179   }
5180   case NEON::BI__builtin_neon_vcvtp_s32_v:
5181   case NEON::BI__builtin_neon_vcvtpq_s32_v:
5182   case NEON::BI__builtin_neon_vcvtp_u32_v:
5183   case NEON::BI__builtin_neon_vcvtpq_u32_v:
5184   case NEON::BI__builtin_neon_vcvtp_s64_v:
5185   case NEON::BI__builtin_neon_vcvtpq_s64_v:
5186   case NEON::BI__builtin_neon_vcvtp_u64_v:
5187   case NEON::BI__builtin_neon_vcvtpq_u64_v: {
5188     Int = usgn ? Intrinsic::aarch64_neon_fcvtpu : Intrinsic::aarch64_neon_fcvtps;
5189     llvm::Type *Tys[2] = { Ty, GetFloatNeonType(this, Type) };
5190     return EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vcvtp");
5191   }
5192   case NEON::BI__builtin_neon_vmulx_v:
5193   case NEON::BI__builtin_neon_vmulxq_v: {
5194     Int = Intrinsic::aarch64_neon_fmulx;
5195     return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vmulx");
5196   }
5197   case NEON::BI__builtin_neon_vmul_lane_v:
5198   case NEON::BI__builtin_neon_vmul_laneq_v: {
5199     // v1f64 vmul_lane should be mapped to Neon scalar mul lane
5200     bool Quad = false;
5201     if (BuiltinID == NEON::BI__builtin_neon_vmul_laneq_v)
5202       Quad = true;
5203     Ops[0] = Builder.CreateBitCast(Ops[0], DoubleTy);
5204     llvm::Type *VTy = GetNeonType(this,
5205       NeonTypeFlags(NeonTypeFlags::Float64, false, Quad));
5206     Ops[1] = Builder.CreateBitCast(Ops[1], VTy);
5207     Ops[1] = Builder.CreateExtractElement(Ops[1], Ops[2], "extract");
5208     Value *Result = Builder.CreateFMul(Ops[0], Ops[1]);
5209     return Builder.CreateBitCast(Result, Ty);
5210   }
5211   case NEON::BI__builtin_neon_vnegd_s64:
5212     return Builder.CreateNeg(EmitScalarExpr(E->getArg(0)), "vnegd");
5213   case NEON::BI__builtin_neon_vpmaxnm_v:
5214   case NEON::BI__builtin_neon_vpmaxnmq_v: {
5215     Int = Intrinsic::aarch64_neon_fmaxnmp;
5216     return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vpmaxnm");
5217   }
5218   case NEON::BI__builtin_neon_vpminnm_v:
5219   case NEON::BI__builtin_neon_vpminnmq_v: {
5220     Int = Intrinsic::aarch64_neon_fminnmp;
5221     return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vpminnm");
5222   }
5223   case NEON::BI__builtin_neon_vsqrt_v:
5224   case NEON::BI__builtin_neon_vsqrtq_v: {
5225     Int = Intrinsic::sqrt;
5226     Ops[0] = Builder.CreateBitCast(Ops[0], Ty);
5227     return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vsqrt");
5228   }
5229   case NEON::BI__builtin_neon_vrbit_v:
5230   case NEON::BI__builtin_neon_vrbitq_v: {
5231     Int = Intrinsic::aarch64_neon_rbit;
5232     return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vrbit");
5233   }
5234   case NEON::BI__builtin_neon_vaddv_u8:
5235     // FIXME: These are handled by the AArch64 scalar code.
5236     usgn = true;
5237     // FALLTHROUGH
5238   case NEON::BI__builtin_neon_vaddv_s8: {
5239     Int = usgn ? Intrinsic::aarch64_neon_uaddv : Intrinsic::aarch64_neon_saddv;
5240     Ty = Int32Ty;
5241     VTy = llvm::VectorType::get(Int8Ty, 8);
5242     llvm::Type *Tys[2] = { Ty, VTy };
5243     Ops.push_back(EmitScalarExpr(E->getArg(0)));
5244     Ops[0] = EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vaddv");
5245     return Builder.CreateTrunc(Ops[0], Int8Ty);
5246   }
5247   case NEON::BI__builtin_neon_vaddv_u16:
5248     usgn = true;
5249     // FALLTHROUGH
5250   case NEON::BI__builtin_neon_vaddv_s16: {
5251     Int = usgn ? Intrinsic::aarch64_neon_uaddv : Intrinsic::aarch64_neon_saddv;
5252     Ty = Int32Ty;
5253     VTy = llvm::VectorType::get(Int16Ty, 4);
5254     llvm::Type *Tys[2] = { Ty, VTy };
5255     Ops.push_back(EmitScalarExpr(E->getArg(0)));
5256     Ops[0] = EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vaddv");
5257     return Builder.CreateTrunc(Ops[0], Int16Ty);
5258   }
5259   case NEON::BI__builtin_neon_vaddvq_u8:
5260     usgn = true;
5261     // FALLTHROUGH
5262   case NEON::BI__builtin_neon_vaddvq_s8: {
5263     Int = usgn ? Intrinsic::aarch64_neon_uaddv : Intrinsic::aarch64_neon_saddv;
5264     Ty = Int32Ty;
5265     VTy = llvm::VectorType::get(Int8Ty, 16);
5266     llvm::Type *Tys[2] = { Ty, VTy };
5267     Ops.push_back(EmitScalarExpr(E->getArg(0)));
5268     Ops[0] = EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vaddv");
5269     return Builder.CreateTrunc(Ops[0], Int8Ty);
5270   }
5271   case NEON::BI__builtin_neon_vaddvq_u16:
5272     usgn = true;
5273     // FALLTHROUGH
5274   case NEON::BI__builtin_neon_vaddvq_s16: {
5275     Int = usgn ? Intrinsic::aarch64_neon_uaddv : Intrinsic::aarch64_neon_saddv;
5276     Ty = Int32Ty;
5277     VTy = llvm::VectorType::get(Int16Ty, 8);
5278     llvm::Type *Tys[2] = { Ty, VTy };
5279     Ops.push_back(EmitScalarExpr(E->getArg(0)));
5280     Ops[0] = EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vaddv");
5281     return Builder.CreateTrunc(Ops[0], Int16Ty);
5282   }
5283   case NEON::BI__builtin_neon_vmaxv_u8: {
5284     Int = Intrinsic::aarch64_neon_umaxv;
5285     Ty = Int32Ty;
5286     VTy = llvm::VectorType::get(Int8Ty, 8);
5287     llvm::Type *Tys[2] = { Ty, VTy };
5288     Ops.push_back(EmitScalarExpr(E->getArg(0)));
5289     Ops[0] = EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vmaxv");
5290     return Builder.CreateTrunc(Ops[0], Int8Ty);
5291   }
5292   case NEON::BI__builtin_neon_vmaxv_u16: {
5293     Int = Intrinsic::aarch64_neon_umaxv;
5294     Ty = Int32Ty;
5295     VTy = llvm::VectorType::get(Int16Ty, 4);
5296     llvm::Type *Tys[2] = { Ty, VTy };
5297     Ops.push_back(EmitScalarExpr(E->getArg(0)));
5298     Ops[0] = EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vmaxv");
5299     return Builder.CreateTrunc(Ops[0], Int16Ty);
5300   }
5301   case NEON::BI__builtin_neon_vmaxvq_u8: {
5302     Int = Intrinsic::aarch64_neon_umaxv;
5303     Ty = Int32Ty;
5304     VTy = llvm::VectorType::get(Int8Ty, 16);
5305     llvm::Type *Tys[2] = { Ty, VTy };
5306     Ops.push_back(EmitScalarExpr(E->getArg(0)));
5307     Ops[0] = EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vmaxv");
5308     return Builder.CreateTrunc(Ops[0], Int8Ty);
5309   }
5310   case NEON::BI__builtin_neon_vmaxvq_u16: {
5311     Int = Intrinsic::aarch64_neon_umaxv;
5312     Ty = Int32Ty;
5313     VTy = llvm::VectorType::get(Int16Ty, 8);
5314     llvm::Type *Tys[2] = { Ty, VTy };
5315     Ops.push_back(EmitScalarExpr(E->getArg(0)));
5316     Ops[0] = EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vmaxv");
5317     return Builder.CreateTrunc(Ops[0], Int16Ty);
5318   }
5319   case NEON::BI__builtin_neon_vmaxv_s8: {
5320     Int = Intrinsic::aarch64_neon_smaxv;
5321     Ty = Int32Ty;
5322     VTy = llvm::VectorType::get(Int8Ty, 8);
5323     llvm::Type *Tys[2] = { Ty, VTy };
5324     Ops.push_back(EmitScalarExpr(E->getArg(0)));
5325     Ops[0] = EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vmaxv");
5326     return Builder.CreateTrunc(Ops[0], Int8Ty);
5327   }
5328   case NEON::BI__builtin_neon_vmaxv_s16: {
5329     Int = Intrinsic::aarch64_neon_smaxv;
5330     Ty = Int32Ty;
5331     VTy = llvm::VectorType::get(Int16Ty, 4);
5332     llvm::Type *Tys[2] = { Ty, VTy };
5333     Ops.push_back(EmitScalarExpr(E->getArg(0)));
5334     Ops[0] = EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vmaxv");
5335     return Builder.CreateTrunc(Ops[0], Int16Ty);
5336   }
5337   case NEON::BI__builtin_neon_vmaxvq_s8: {
5338     Int = Intrinsic::aarch64_neon_smaxv;
5339     Ty = Int32Ty;
5340     VTy = llvm::VectorType::get(Int8Ty, 16);
5341     llvm::Type *Tys[2] = { Ty, VTy };
5342     Ops.push_back(EmitScalarExpr(E->getArg(0)));
5343     Ops[0] = EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vmaxv");
5344     return Builder.CreateTrunc(Ops[0], Int8Ty);
5345   }
5346   case NEON::BI__builtin_neon_vmaxvq_s16: {
5347     Int = Intrinsic::aarch64_neon_smaxv;
5348     Ty = Int32Ty;
5349     VTy = llvm::VectorType::get(Int16Ty, 8);
5350     llvm::Type *Tys[2] = { Ty, VTy };
5351     Ops.push_back(EmitScalarExpr(E->getArg(0)));
5352     Ops[0] = EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vmaxv");
5353     return Builder.CreateTrunc(Ops[0], Int16Ty);
5354   }
5355   case NEON::BI__builtin_neon_vminv_u8: {
5356     Int = Intrinsic::aarch64_neon_uminv;
5357     Ty = Int32Ty;
5358     VTy = llvm::VectorType::get(Int8Ty, 8);
5359     llvm::Type *Tys[2] = { Ty, VTy };
5360     Ops.push_back(EmitScalarExpr(E->getArg(0)));
5361     Ops[0] = EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vminv");
5362     return Builder.CreateTrunc(Ops[0], Int8Ty);
5363   }
5364   case NEON::BI__builtin_neon_vminv_u16: {
5365     Int = Intrinsic::aarch64_neon_uminv;
5366     Ty = Int32Ty;
5367     VTy = llvm::VectorType::get(Int16Ty, 4);
5368     llvm::Type *Tys[2] = { Ty, VTy };
5369     Ops.push_back(EmitScalarExpr(E->getArg(0)));
5370     Ops[0] = EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vminv");
5371     return Builder.CreateTrunc(Ops[0], Int16Ty);
5372   }
5373   case NEON::BI__builtin_neon_vminvq_u8: {
5374     Int = Intrinsic::aarch64_neon_uminv;
5375     Ty = Int32Ty;
5376     VTy = llvm::VectorType::get(Int8Ty, 16);
5377     llvm::Type *Tys[2] = { Ty, VTy };
5378     Ops.push_back(EmitScalarExpr(E->getArg(0)));
5379     Ops[0] = EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vminv");
5380     return Builder.CreateTrunc(Ops[0], Int8Ty);
5381   }
5382   case NEON::BI__builtin_neon_vminvq_u16: {
5383     Int = Intrinsic::aarch64_neon_uminv;
5384     Ty = Int32Ty;
5385     VTy = llvm::VectorType::get(Int16Ty, 8);
5386     llvm::Type *Tys[2] = { Ty, VTy };
5387     Ops.push_back(EmitScalarExpr(E->getArg(0)));
5388     Ops[0] = EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vminv");
5389     return Builder.CreateTrunc(Ops[0], Int16Ty);
5390   }
5391   case NEON::BI__builtin_neon_vminv_s8: {
5392     Int = Intrinsic::aarch64_neon_sminv;
5393     Ty = Int32Ty;
5394     VTy = llvm::VectorType::get(Int8Ty, 8);
5395     llvm::Type *Tys[2] = { Ty, VTy };
5396     Ops.push_back(EmitScalarExpr(E->getArg(0)));
5397     Ops[0] = EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vminv");
5398     return Builder.CreateTrunc(Ops[0], Int8Ty);
5399   }
5400   case NEON::BI__builtin_neon_vminv_s16: {
5401     Int = Intrinsic::aarch64_neon_sminv;
5402     Ty = Int32Ty;
5403     VTy = llvm::VectorType::get(Int16Ty, 4);
5404     llvm::Type *Tys[2] = { Ty, VTy };
5405     Ops.push_back(EmitScalarExpr(E->getArg(0)));
5406     Ops[0] = EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vminv");
5407     return Builder.CreateTrunc(Ops[0], Int16Ty);
5408   }
5409   case NEON::BI__builtin_neon_vminvq_s8: {
5410     Int = Intrinsic::aarch64_neon_sminv;
5411     Ty = Int32Ty;
5412     VTy = llvm::VectorType::get(Int8Ty, 16);
5413     llvm::Type *Tys[2] = { Ty, VTy };
5414     Ops.push_back(EmitScalarExpr(E->getArg(0)));
5415     Ops[0] = EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vminv");
5416     return Builder.CreateTrunc(Ops[0], Int8Ty);
5417   }
5418   case NEON::BI__builtin_neon_vminvq_s16: {
5419     Int = Intrinsic::aarch64_neon_sminv;
5420     Ty = Int32Ty;
5421     VTy = llvm::VectorType::get(Int16Ty, 8);
5422     llvm::Type *Tys[2] = { Ty, VTy };
5423     Ops.push_back(EmitScalarExpr(E->getArg(0)));
5424     Ops[0] = EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vminv");
5425     return Builder.CreateTrunc(Ops[0], Int16Ty);
5426   }
5427   case NEON::BI__builtin_neon_vmul_n_f64: {
5428     Ops[0] = Builder.CreateBitCast(Ops[0], DoubleTy);
5429     Value *RHS = Builder.CreateBitCast(EmitScalarExpr(E->getArg(1)), DoubleTy);
5430     return Builder.CreateFMul(Ops[0], RHS);
5431   }
5432   case NEON::BI__builtin_neon_vaddlv_u8: {
5433     Int = Intrinsic::aarch64_neon_uaddlv;
5434     Ty = Int32Ty;
5435     VTy = llvm::VectorType::get(Int8Ty, 8);
5436     llvm::Type *Tys[2] = { Ty, VTy };
5437     Ops.push_back(EmitScalarExpr(E->getArg(0)));
5438     Ops[0] = EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vaddlv");
5439     return Builder.CreateTrunc(Ops[0], Int16Ty);
5440   }
5441   case NEON::BI__builtin_neon_vaddlv_u16: {
5442     Int = Intrinsic::aarch64_neon_uaddlv;
5443     Ty = Int32Ty;
5444     VTy = llvm::VectorType::get(Int16Ty, 4);
5445     llvm::Type *Tys[2] = { Ty, VTy };
5446     Ops.push_back(EmitScalarExpr(E->getArg(0)));
5447     return EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vaddlv");
5448   }
5449   case NEON::BI__builtin_neon_vaddlvq_u8: {
5450     Int = Intrinsic::aarch64_neon_uaddlv;
5451     Ty = Int32Ty;
5452     VTy = llvm::VectorType::get(Int8Ty, 16);
5453     llvm::Type *Tys[2] = { Ty, VTy };
5454     Ops.push_back(EmitScalarExpr(E->getArg(0)));
5455     Ops[0] = EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vaddlv");
5456     return Builder.CreateTrunc(Ops[0], Int16Ty);
5457   }
5458   case NEON::BI__builtin_neon_vaddlvq_u16: {
5459     Int = Intrinsic::aarch64_neon_uaddlv;
5460     Ty = Int32Ty;
5461     VTy = llvm::VectorType::get(Int16Ty, 8);
5462     llvm::Type *Tys[2] = { Ty, VTy };
5463     Ops.push_back(EmitScalarExpr(E->getArg(0)));
5464     return EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vaddlv");
5465   }
5466   case NEON::BI__builtin_neon_vaddlv_s8: {
5467     Int = Intrinsic::aarch64_neon_saddlv;
5468     Ty = Int32Ty;
5469     VTy = llvm::VectorType::get(Int8Ty, 8);
5470     llvm::Type *Tys[2] = { Ty, VTy };
5471     Ops.push_back(EmitScalarExpr(E->getArg(0)));
5472     Ops[0] = EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vaddlv");
5473     return Builder.CreateTrunc(Ops[0], Int16Ty);
5474   }
5475   case NEON::BI__builtin_neon_vaddlv_s16: {
5476     Int = Intrinsic::aarch64_neon_saddlv;
5477     Ty = Int32Ty;
5478     VTy = llvm::VectorType::get(Int16Ty, 4);
5479     llvm::Type *Tys[2] = { Ty, VTy };
5480     Ops.push_back(EmitScalarExpr(E->getArg(0)));
5481     return EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vaddlv");
5482   }
5483   case NEON::BI__builtin_neon_vaddlvq_s8: {
5484     Int = Intrinsic::aarch64_neon_saddlv;
5485     Ty = Int32Ty;
5486     VTy = llvm::VectorType::get(Int8Ty, 16);
5487     llvm::Type *Tys[2] = { Ty, VTy };
5488     Ops.push_back(EmitScalarExpr(E->getArg(0)));
5489     Ops[0] = EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vaddlv");
5490     return Builder.CreateTrunc(Ops[0], Int16Ty);
5491   }
5492   case NEON::BI__builtin_neon_vaddlvq_s16: {
5493     Int = Intrinsic::aarch64_neon_saddlv;
5494     Ty = Int32Ty;
5495     VTy = llvm::VectorType::get(Int16Ty, 8);
5496     llvm::Type *Tys[2] = { Ty, VTy };
5497     Ops.push_back(EmitScalarExpr(E->getArg(0)));
5498     return EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vaddlv");
5499   }
5500   case NEON::BI__builtin_neon_vsri_n_v:
5501   case NEON::BI__builtin_neon_vsriq_n_v: {
5502     Int = Intrinsic::aarch64_neon_vsri;
5503     llvm::Function *Intrin = CGM.getIntrinsic(Int, Ty);
5504     return EmitNeonCall(Intrin, Ops, "vsri_n");
5505   }
5506   case NEON::BI__builtin_neon_vsli_n_v:
5507   case NEON::BI__builtin_neon_vsliq_n_v: {
5508     Int = Intrinsic::aarch64_neon_vsli;
5509     llvm::Function *Intrin = CGM.getIntrinsic(Int, Ty);
5510     return EmitNeonCall(Intrin, Ops, "vsli_n");
5511   }
5512   case NEON::BI__builtin_neon_vsra_n_v:
5513   case NEON::BI__builtin_neon_vsraq_n_v:
5514     Ops[0] = Builder.CreateBitCast(Ops[0], Ty);
5515     Ops[1] = EmitNeonRShiftImm(Ops[1], Ops[2], Ty, usgn, "vsra_n");
5516     return Builder.CreateAdd(Ops[0], Ops[1]);
5517   case NEON::BI__builtin_neon_vrsra_n_v:
5518   case NEON::BI__builtin_neon_vrsraq_n_v: {
5519     Int = usgn ? Intrinsic::aarch64_neon_urshl : Intrinsic::aarch64_neon_srshl;
5520     SmallVector<llvm::Value*,2> TmpOps;
5521     TmpOps.push_back(Ops[1]);
5522     TmpOps.push_back(Ops[2]);
5523     Function* F = CGM.getIntrinsic(Int, Ty);
5524     llvm::Value *tmp = EmitNeonCall(F, TmpOps, "vrshr_n", 1, true);
5525     Ops[0] = Builder.CreateBitCast(Ops[0], VTy);
5526     return Builder.CreateAdd(Ops[0], tmp);
5527   }
5528     // FIXME: Sharing loads & stores with 32-bit is complicated by the absence
5529     // of an Align parameter here.
5530   case NEON::BI__builtin_neon_vld1_x2_v:
5531   case NEON::BI__builtin_neon_vld1q_x2_v:
5532   case NEON::BI__builtin_neon_vld1_x3_v:
5533   case NEON::BI__builtin_neon_vld1q_x3_v:
5534   case NEON::BI__builtin_neon_vld1_x4_v:
5535   case NEON::BI__builtin_neon_vld1q_x4_v: {
5536     llvm::Type *PTy = llvm::PointerType::getUnqual(VTy->getVectorElementType());
5537     Ops[1] = Builder.CreateBitCast(Ops[1], PTy);
5538     llvm::Type *Tys[2] = { VTy, PTy };
5539     unsigned Int;
5540     switch (BuiltinID) {
5541     case NEON::BI__builtin_neon_vld1_x2_v:
5542     case NEON::BI__builtin_neon_vld1q_x2_v:
5543       Int = Intrinsic::aarch64_neon_ld1x2;
5544       break;
5545     case NEON::BI__builtin_neon_vld1_x3_v:
5546     case NEON::BI__builtin_neon_vld1q_x3_v:
5547       Int = Intrinsic::aarch64_neon_ld1x3;
5548       break;
5549     case NEON::BI__builtin_neon_vld1_x4_v:
5550     case NEON::BI__builtin_neon_vld1q_x4_v:
5551       Int = Intrinsic::aarch64_neon_ld1x4;
5552       break;
5553     }
5554     Function *F = CGM.getIntrinsic(Int, Tys);
5555     Ops[1] = Builder.CreateCall(F, Ops[1], "vld1xN");
5556     Ty = llvm::PointerType::getUnqual(Ops[1]->getType());
5557     Ops[0] = Builder.CreateBitCast(Ops[0], Ty);
5558     return Builder.CreateDefaultAlignedStore(Ops[1], Ops[0]);
5559   }
5560   case NEON::BI__builtin_neon_vst1_x2_v:
5561   case NEON::BI__builtin_neon_vst1q_x2_v:
5562   case NEON::BI__builtin_neon_vst1_x3_v:
5563   case NEON::BI__builtin_neon_vst1q_x3_v:
5564   case NEON::BI__builtin_neon_vst1_x4_v:
5565   case NEON::BI__builtin_neon_vst1q_x4_v: {
5566     llvm::Type *PTy = llvm::PointerType::getUnqual(VTy->getVectorElementType());
5567     llvm::Type *Tys[2] = { VTy, PTy };
5568     unsigned Int;
5569     switch (BuiltinID) {
5570     case NEON::BI__builtin_neon_vst1_x2_v:
5571     case NEON::BI__builtin_neon_vst1q_x2_v:
5572       Int = Intrinsic::aarch64_neon_st1x2;
5573       break;
5574     case NEON::BI__builtin_neon_vst1_x3_v:
5575     case NEON::BI__builtin_neon_vst1q_x3_v:
5576       Int = Intrinsic::aarch64_neon_st1x3;
5577       break;
5578     case NEON::BI__builtin_neon_vst1_x4_v:
5579     case NEON::BI__builtin_neon_vst1q_x4_v:
5580       Int = Intrinsic::aarch64_neon_st1x4;
5581       break;
5582     }
5583     std::rotate(Ops.begin(), Ops.begin() + 1, Ops.end());
5584     return EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "");
5585   }
5586   case NEON::BI__builtin_neon_vld1_v:
5587   case NEON::BI__builtin_neon_vld1q_v:
5588     Ops[0] = Builder.CreateBitCast(Ops[0], llvm::PointerType::getUnqual(VTy));
5589     return Builder.CreateDefaultAlignedLoad(Ops[0]);
5590   case NEON::BI__builtin_neon_vst1_v:
5591   case NEON::BI__builtin_neon_vst1q_v:
5592     Ops[0] = Builder.CreateBitCast(Ops[0], llvm::PointerType::getUnqual(VTy));
5593     Ops[1] = Builder.CreateBitCast(Ops[1], VTy);
5594     return Builder.CreateDefaultAlignedStore(Ops[1], Ops[0]);
5595   case NEON::BI__builtin_neon_vld1_lane_v:
5596   case NEON::BI__builtin_neon_vld1q_lane_v:
5597     Ops[1] = Builder.CreateBitCast(Ops[1], Ty);
5598     Ty = llvm::PointerType::getUnqual(VTy->getElementType());
5599     Ops[0] = Builder.CreateBitCast(Ops[0], Ty);
5600     Ops[0] = Builder.CreateDefaultAlignedLoad(Ops[0]);
5601     return Builder.CreateInsertElement(Ops[1], Ops[0], Ops[2], "vld1_lane");
5602   case NEON::BI__builtin_neon_vld1_dup_v:
5603   case NEON::BI__builtin_neon_vld1q_dup_v: {
5604     Value *V = UndefValue::get(Ty);
5605     Ty = llvm::PointerType::getUnqual(VTy->getElementType());
5606     Ops[0] = Builder.CreateBitCast(Ops[0], Ty);
5607     Ops[0] = Builder.CreateDefaultAlignedLoad(Ops[0]);
5608     llvm::Constant *CI = ConstantInt::get(Int32Ty, 0);
5609     Ops[0] = Builder.CreateInsertElement(V, Ops[0], CI);
5610     return EmitNeonSplat(Ops[0], CI);
5611   }
5612   case NEON::BI__builtin_neon_vst1_lane_v:
5613   case NEON::BI__builtin_neon_vst1q_lane_v:
5614     Ops[1] = Builder.CreateBitCast(Ops[1], Ty);
5615     Ops[1] = Builder.CreateExtractElement(Ops[1], Ops[2]);
5616     Ty = llvm::PointerType::getUnqual(Ops[1]->getType());
5617     return Builder.CreateDefaultAlignedStore(Ops[1],
5618                                              Builder.CreateBitCast(Ops[0], Ty));
5619   case NEON::BI__builtin_neon_vld2_v:
5620   case NEON::BI__builtin_neon_vld2q_v: {
5621     llvm::Type *PTy = llvm::PointerType::getUnqual(VTy);
5622     Ops[1] = Builder.CreateBitCast(Ops[1], PTy);
5623     llvm::Type *Tys[2] = { VTy, PTy };
5624     Function *F = CGM.getIntrinsic(Intrinsic::aarch64_neon_ld2, Tys);
5625     Ops[1] = Builder.CreateCall(F, Ops[1], "vld2");
5626     Ops[0] = Builder.CreateBitCast(Ops[0],
5627                 llvm::PointerType::getUnqual(Ops[1]->getType()));
5628     return Builder.CreateDefaultAlignedStore(Ops[1], Ops[0]);
5629   }
5630   case NEON::BI__builtin_neon_vld3_v:
5631   case NEON::BI__builtin_neon_vld3q_v: {
5632     llvm::Type *PTy = llvm::PointerType::getUnqual(VTy);
5633     Ops[1] = Builder.CreateBitCast(Ops[1], PTy);
5634     llvm::Type *Tys[2] = { VTy, PTy };
5635     Function *F = CGM.getIntrinsic(Intrinsic::aarch64_neon_ld3, Tys);
5636     Ops[1] = Builder.CreateCall(F, Ops[1], "vld3");
5637     Ops[0] = Builder.CreateBitCast(Ops[0],
5638                 llvm::PointerType::getUnqual(Ops[1]->getType()));
5639     return Builder.CreateDefaultAlignedStore(Ops[1], Ops[0]);
5640   }
5641   case NEON::BI__builtin_neon_vld4_v:
5642   case NEON::BI__builtin_neon_vld4q_v: {
5643     llvm::Type *PTy = llvm::PointerType::getUnqual(VTy);
5644     Ops[1] = Builder.CreateBitCast(Ops[1], PTy);
5645     llvm::Type *Tys[2] = { VTy, PTy };
5646     Function *F = CGM.getIntrinsic(Intrinsic::aarch64_neon_ld4, Tys);
5647     Ops[1] = Builder.CreateCall(F, Ops[1], "vld4");
5648     Ops[0] = Builder.CreateBitCast(Ops[0],
5649                 llvm::PointerType::getUnqual(Ops[1]->getType()));
5650     return Builder.CreateDefaultAlignedStore(Ops[1], Ops[0]);
5651   }
5652   case NEON::BI__builtin_neon_vld2_dup_v:
5653   case NEON::BI__builtin_neon_vld2q_dup_v: {
5654     llvm::Type *PTy =
5655       llvm::PointerType::getUnqual(VTy->getElementType());
5656     Ops[1] = Builder.CreateBitCast(Ops[1], PTy);
5657     llvm::Type *Tys[2] = { VTy, PTy };
5658     Function *F = CGM.getIntrinsic(Intrinsic::aarch64_neon_ld2r, Tys);
5659     Ops[1] = Builder.CreateCall(F, Ops[1], "vld2");
5660     Ops[0] = Builder.CreateBitCast(Ops[0],
5661                 llvm::PointerType::getUnqual(Ops[1]->getType()));
5662     return Builder.CreateDefaultAlignedStore(Ops[1], Ops[0]);
5663   }
5664   case NEON::BI__builtin_neon_vld3_dup_v:
5665   case NEON::BI__builtin_neon_vld3q_dup_v: {
5666     llvm::Type *PTy =
5667       llvm::PointerType::getUnqual(VTy->getElementType());
5668     Ops[1] = Builder.CreateBitCast(Ops[1], PTy);
5669     llvm::Type *Tys[2] = { VTy, PTy };
5670     Function *F = CGM.getIntrinsic(Intrinsic::aarch64_neon_ld3r, Tys);
5671     Ops[1] = Builder.CreateCall(F, Ops[1], "vld3");
5672     Ops[0] = Builder.CreateBitCast(Ops[0],
5673                 llvm::PointerType::getUnqual(Ops[1]->getType()));
5674     return Builder.CreateDefaultAlignedStore(Ops[1], Ops[0]);
5675   }
5676   case NEON::BI__builtin_neon_vld4_dup_v:
5677   case NEON::BI__builtin_neon_vld4q_dup_v: {
5678     llvm::Type *PTy =
5679       llvm::PointerType::getUnqual(VTy->getElementType());
5680     Ops[1] = Builder.CreateBitCast(Ops[1], PTy);
5681     llvm::Type *Tys[2] = { VTy, PTy };
5682     Function *F = CGM.getIntrinsic(Intrinsic::aarch64_neon_ld4r, Tys);
5683     Ops[1] = Builder.CreateCall(F, Ops[1], "vld4");
5684     Ops[0] = Builder.CreateBitCast(Ops[0],
5685                 llvm::PointerType::getUnqual(Ops[1]->getType()));
5686     return Builder.CreateDefaultAlignedStore(Ops[1], Ops[0]);
5687   }
5688   case NEON::BI__builtin_neon_vld2_lane_v:
5689   case NEON::BI__builtin_neon_vld2q_lane_v: {
5690     llvm::Type *Tys[2] = { VTy, Ops[1]->getType() };
5691     Function *F = CGM.getIntrinsic(Intrinsic::aarch64_neon_ld2lane, Tys);
5692     Ops.push_back(Ops[1]);
5693     Ops.erase(Ops.begin()+1);
5694     Ops[1] = Builder.CreateBitCast(Ops[1], Ty);
5695     Ops[2] = Builder.CreateBitCast(Ops[2], Ty);
5696     Ops[3] = Builder.CreateZExt(Ops[3], Int64Ty);
5697     Ops[1] = Builder.CreateCall(F, makeArrayRef(Ops).slice(1), "vld2_lane");
5698     Ty = llvm::PointerType::getUnqual(Ops[1]->getType());
5699     Ops[0] = Builder.CreateBitCast(Ops[0], Ty);
5700     return Builder.CreateDefaultAlignedStore(Ops[1], Ops[0]);
5701   }
5702   case NEON::BI__builtin_neon_vld3_lane_v:
5703   case NEON::BI__builtin_neon_vld3q_lane_v: {
5704     llvm::Type *Tys[2] = { VTy, Ops[1]->getType() };
5705     Function *F = CGM.getIntrinsic(Intrinsic::aarch64_neon_ld3lane, Tys);
5706     Ops.push_back(Ops[1]);
5707     Ops.erase(Ops.begin()+1);
5708     Ops[1] = Builder.CreateBitCast(Ops[1], Ty);
5709     Ops[2] = Builder.CreateBitCast(Ops[2], Ty);
5710     Ops[3] = Builder.CreateBitCast(Ops[3], Ty);
5711     Ops[4] = Builder.CreateZExt(Ops[4], Int64Ty);
5712     Ops[1] = Builder.CreateCall(F, makeArrayRef(Ops).slice(1), "vld3_lane");
5713     Ty = llvm::PointerType::getUnqual(Ops[1]->getType());
5714     Ops[0] = Builder.CreateBitCast(Ops[0], Ty);
5715     return Builder.CreateDefaultAlignedStore(Ops[1], Ops[0]);
5716   }
5717   case NEON::BI__builtin_neon_vld4_lane_v:
5718   case NEON::BI__builtin_neon_vld4q_lane_v: {
5719     llvm::Type *Tys[2] = { VTy, Ops[1]->getType() };
5720     Function *F = CGM.getIntrinsic(Intrinsic::aarch64_neon_ld4lane, Tys);
5721     Ops.push_back(Ops[1]);
5722     Ops.erase(Ops.begin()+1);
5723     Ops[1] = Builder.CreateBitCast(Ops[1], Ty);
5724     Ops[2] = Builder.CreateBitCast(Ops[2], Ty);
5725     Ops[3] = Builder.CreateBitCast(Ops[3], Ty);
5726     Ops[4] = Builder.CreateBitCast(Ops[4], Ty);
5727     Ops[5] = Builder.CreateZExt(Ops[5], Int64Ty);
5728     Ops[1] = Builder.CreateCall(F, makeArrayRef(Ops).slice(1), "vld4_lane");
5729     Ty = llvm::PointerType::getUnqual(Ops[1]->getType());
5730     Ops[0] = Builder.CreateBitCast(Ops[0], Ty);
5731     return Builder.CreateDefaultAlignedStore(Ops[1], Ops[0]);
5732   }
5733   case NEON::BI__builtin_neon_vst2_v:
5734   case NEON::BI__builtin_neon_vst2q_v: {
5735     Ops.push_back(Ops[0]);
5736     Ops.erase(Ops.begin());
5737     llvm::Type *Tys[2] = { VTy, Ops[2]->getType() };
5738     return EmitNeonCall(CGM.getIntrinsic(Intrinsic::aarch64_neon_st2, Tys),
5739                         Ops, "");
5740   }
5741   case NEON::BI__builtin_neon_vst2_lane_v:
5742   case NEON::BI__builtin_neon_vst2q_lane_v: {
5743     Ops.push_back(Ops[0]);
5744     Ops.erase(Ops.begin());
5745     Ops[2] = Builder.CreateZExt(Ops[2], Int64Ty);
5746     llvm::Type *Tys[2] = { VTy, Ops[3]->getType() };
5747     return EmitNeonCall(CGM.getIntrinsic(Intrinsic::aarch64_neon_st2lane, Tys),
5748                         Ops, "");
5749   }
5750   case NEON::BI__builtin_neon_vst3_v:
5751   case NEON::BI__builtin_neon_vst3q_v: {
5752     Ops.push_back(Ops[0]);
5753     Ops.erase(Ops.begin());
5754     llvm::Type *Tys[2] = { VTy, Ops[3]->getType() };
5755     return EmitNeonCall(CGM.getIntrinsic(Intrinsic::aarch64_neon_st3, Tys),
5756                         Ops, "");
5757   }
5758   case NEON::BI__builtin_neon_vst3_lane_v:
5759   case NEON::BI__builtin_neon_vst3q_lane_v: {
5760     Ops.push_back(Ops[0]);
5761     Ops.erase(Ops.begin());
5762     Ops[3] = Builder.CreateZExt(Ops[3], Int64Ty);
5763     llvm::Type *Tys[2] = { VTy, Ops[4]->getType() };
5764     return EmitNeonCall(CGM.getIntrinsic(Intrinsic::aarch64_neon_st3lane, Tys),
5765                         Ops, "");
5766   }
5767   case NEON::BI__builtin_neon_vst4_v:
5768   case NEON::BI__builtin_neon_vst4q_v: {
5769     Ops.push_back(Ops[0]);
5770     Ops.erase(Ops.begin());
5771     llvm::Type *Tys[2] = { VTy, Ops[4]->getType() };
5772     return EmitNeonCall(CGM.getIntrinsic(Intrinsic::aarch64_neon_st4, Tys),
5773                         Ops, "");
5774   }
5775   case NEON::BI__builtin_neon_vst4_lane_v:
5776   case NEON::BI__builtin_neon_vst4q_lane_v: {
5777     Ops.push_back(Ops[0]);
5778     Ops.erase(Ops.begin());
5779     Ops[4] = Builder.CreateZExt(Ops[4], Int64Ty);
5780     llvm::Type *Tys[2] = { VTy, Ops[5]->getType() };
5781     return EmitNeonCall(CGM.getIntrinsic(Intrinsic::aarch64_neon_st4lane, Tys),
5782                         Ops, "");
5783   }
5784   case NEON::BI__builtin_neon_vtrn_v:
5785   case NEON::BI__builtin_neon_vtrnq_v: {
5786     Ops[0] = Builder.CreateBitCast(Ops[0], llvm::PointerType::getUnqual(Ty));
5787     Ops[1] = Builder.CreateBitCast(Ops[1], Ty);
5788     Ops[2] = Builder.CreateBitCast(Ops[2], Ty);
5789     Value *SV = nullptr;
5790 
5791     for (unsigned vi = 0; vi != 2; ++vi) {
5792       SmallVector<Constant*, 16> Indices;
5793       for (unsigned i = 0, e = VTy->getNumElements(); i != e; i += 2) {
5794         Indices.push_back(ConstantInt::get(Int32Ty, i+vi));
5795         Indices.push_back(ConstantInt::get(Int32Ty, i+e+vi));
5796       }
5797       Value *Addr = Builder.CreateConstInBoundsGEP1_32(Ty, Ops[0], vi);
5798       SV = llvm::ConstantVector::get(Indices);
5799       SV = Builder.CreateShuffleVector(Ops[1], Ops[2], SV, "vtrn");
5800       SV = Builder.CreateDefaultAlignedStore(SV, Addr);
5801     }
5802     return SV;
5803   }
5804   case NEON::BI__builtin_neon_vuzp_v:
5805   case NEON::BI__builtin_neon_vuzpq_v: {
5806     Ops[0] = Builder.CreateBitCast(Ops[0], llvm::PointerType::getUnqual(Ty));
5807     Ops[1] = Builder.CreateBitCast(Ops[1], Ty);
5808     Ops[2] = Builder.CreateBitCast(Ops[2], Ty);
5809     Value *SV = nullptr;
5810 
5811     for (unsigned vi = 0; vi != 2; ++vi) {
5812       SmallVector<Constant*, 16> Indices;
5813       for (unsigned i = 0, e = VTy->getNumElements(); i != e; ++i)
5814         Indices.push_back(ConstantInt::get(Int32Ty, 2*i+vi));
5815 
5816       Value *Addr = Builder.CreateConstInBoundsGEP1_32(Ty, Ops[0], vi);
5817       SV = llvm::ConstantVector::get(Indices);
5818       SV = Builder.CreateShuffleVector(Ops[1], Ops[2], SV, "vuzp");
5819       SV = Builder.CreateDefaultAlignedStore(SV, Addr);
5820     }
5821     return SV;
5822   }
5823   case NEON::BI__builtin_neon_vzip_v:
5824   case NEON::BI__builtin_neon_vzipq_v: {
5825     Ops[0] = Builder.CreateBitCast(Ops[0], llvm::PointerType::getUnqual(Ty));
5826     Ops[1] = Builder.CreateBitCast(Ops[1], Ty);
5827     Ops[2] = Builder.CreateBitCast(Ops[2], Ty);
5828     Value *SV = nullptr;
5829 
5830     for (unsigned vi = 0; vi != 2; ++vi) {
5831       SmallVector<Constant*, 16> Indices;
5832       for (unsigned i = 0, e = VTy->getNumElements(); i != e; i += 2) {
5833         Indices.push_back(ConstantInt::get(Int32Ty, (i + vi*e) >> 1));
5834         Indices.push_back(ConstantInt::get(Int32Ty, ((i + vi*e) >> 1)+e));
5835       }
5836       Value *Addr = Builder.CreateConstInBoundsGEP1_32(Ty, Ops[0], vi);
5837       SV = llvm::ConstantVector::get(Indices);
5838       SV = Builder.CreateShuffleVector(Ops[1], Ops[2], SV, "vzip");
5839       SV = Builder.CreateDefaultAlignedStore(SV, Addr);
5840     }
5841     return SV;
5842   }
5843   case NEON::BI__builtin_neon_vqtbl1q_v: {
5844     return EmitNeonCall(CGM.getIntrinsic(Intrinsic::aarch64_neon_tbl1, Ty),
5845                         Ops, "vtbl1");
5846   }
5847   case NEON::BI__builtin_neon_vqtbl2q_v: {
5848     return EmitNeonCall(CGM.getIntrinsic(Intrinsic::aarch64_neon_tbl2, Ty),
5849                         Ops, "vtbl2");
5850   }
5851   case NEON::BI__builtin_neon_vqtbl3q_v: {
5852     return EmitNeonCall(CGM.getIntrinsic(Intrinsic::aarch64_neon_tbl3, Ty),
5853                         Ops, "vtbl3");
5854   }
5855   case NEON::BI__builtin_neon_vqtbl4q_v: {
5856     return EmitNeonCall(CGM.getIntrinsic(Intrinsic::aarch64_neon_tbl4, Ty),
5857                         Ops, "vtbl4");
5858   }
5859   case NEON::BI__builtin_neon_vqtbx1q_v: {
5860     return EmitNeonCall(CGM.getIntrinsic(Intrinsic::aarch64_neon_tbx1, Ty),
5861                         Ops, "vtbx1");
5862   }
5863   case NEON::BI__builtin_neon_vqtbx2q_v: {
5864     return EmitNeonCall(CGM.getIntrinsic(Intrinsic::aarch64_neon_tbx2, Ty),
5865                         Ops, "vtbx2");
5866   }
5867   case NEON::BI__builtin_neon_vqtbx3q_v: {
5868     return EmitNeonCall(CGM.getIntrinsic(Intrinsic::aarch64_neon_tbx3, Ty),
5869                         Ops, "vtbx3");
5870   }
5871   case NEON::BI__builtin_neon_vqtbx4q_v: {
5872     return EmitNeonCall(CGM.getIntrinsic(Intrinsic::aarch64_neon_tbx4, Ty),
5873                         Ops, "vtbx4");
5874   }
5875   case NEON::BI__builtin_neon_vsqadd_v:
5876   case NEON::BI__builtin_neon_vsqaddq_v: {
5877     Int = Intrinsic::aarch64_neon_usqadd;
5878     return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vsqadd");
5879   }
5880   case NEON::BI__builtin_neon_vuqadd_v:
5881   case NEON::BI__builtin_neon_vuqaddq_v: {
5882     Int = Intrinsic::aarch64_neon_suqadd;
5883     return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vuqadd");
5884   }
5885   }
5886 }
5887 
5888 llvm::Value *CodeGenFunction::
5889 BuildVector(ArrayRef<llvm::Value*> Ops) {
5890   assert((Ops.size() & (Ops.size() - 1)) == 0 &&
5891          "Not a power-of-two sized vector!");
5892   bool AllConstants = true;
5893   for (unsigned i = 0, e = Ops.size(); i != e && AllConstants; ++i)
5894     AllConstants &= isa<Constant>(Ops[i]);
5895 
5896   // If this is a constant vector, create a ConstantVector.
5897   if (AllConstants) {
5898     SmallVector<llvm::Constant*, 16> CstOps;
5899     for (unsigned i = 0, e = Ops.size(); i != e; ++i)
5900       CstOps.push_back(cast<Constant>(Ops[i]));
5901     return llvm::ConstantVector::get(CstOps);
5902   }
5903 
5904   // Otherwise, insertelement the values to build the vector.
5905   Value *Result =
5906     llvm::UndefValue::get(llvm::VectorType::get(Ops[0]->getType(), Ops.size()));
5907 
5908   for (unsigned i = 0, e = Ops.size(); i != e; ++i)
5909     Result = Builder.CreateInsertElement(Result, Ops[i], Builder.getInt32(i));
5910 
5911   return Result;
5912 }
5913 
5914 Value *CodeGenFunction::EmitX86BuiltinExpr(unsigned BuiltinID,
5915                                            const CallExpr *E) {
5916   if (BuiltinID == X86::BI__builtin_ms_va_start ||
5917       BuiltinID == X86::BI__builtin_ms_va_end)
5918     return EmitVAStartEnd(EmitMSVAListRef(E->getArg(0)).getPointer(),
5919                           BuiltinID == X86::BI__builtin_ms_va_start);
5920   if (BuiltinID == X86::BI__builtin_ms_va_copy) {
5921     // Lower this manually. We can't reliably determine whether or not any
5922     // given va_copy() is for a Win64 va_list from the calling convention
5923     // alone, because it's legal to do this from a System V ABI function.
5924     // With opaque pointer types, we won't have enough information in LLVM
5925     // IR to determine this from the argument types, either. Best to do it
5926     // now, while we have enough information.
5927     Address DestAddr = EmitMSVAListRef(E->getArg(0));
5928     Address SrcAddr = EmitMSVAListRef(E->getArg(1));
5929 
5930     llvm::Type *BPP = Int8PtrPtrTy;
5931 
5932     DestAddr = Address(Builder.CreateBitCast(DestAddr.getPointer(), BPP, "cp"),
5933                        DestAddr.getAlignment());
5934     SrcAddr = Address(Builder.CreateBitCast(SrcAddr.getPointer(), BPP, "ap"),
5935                       SrcAddr.getAlignment());
5936 
5937     Value *ArgPtr = Builder.CreateLoad(SrcAddr, "ap.val");
5938     return Builder.CreateStore(ArgPtr, DestAddr);
5939   }
5940 
5941   SmallVector<Value*, 4> Ops;
5942 
5943   // Find out if any arguments are required to be integer constant expressions.
5944   unsigned ICEArguments = 0;
5945   ASTContext::GetBuiltinTypeError Error;
5946   getContext().GetBuiltinType(BuiltinID, Error, &ICEArguments);
5947   assert(Error == ASTContext::GE_None && "Should not codegen an error");
5948 
5949   for (unsigned i = 0, e = E->getNumArgs(); i != e; i++) {
5950     // If this is a normal argument, just emit it as a scalar.
5951     if ((ICEArguments & (1 << i)) == 0) {
5952       Ops.push_back(EmitScalarExpr(E->getArg(i)));
5953       continue;
5954     }
5955 
5956     // If this is required to be a constant, constant fold it so that we know
5957     // that the generated intrinsic gets a ConstantInt.
5958     llvm::APSInt Result;
5959     bool IsConst = E->getArg(i)->isIntegerConstantExpr(Result, getContext());
5960     assert(IsConst && "Constant arg isn't actually constant?"); (void)IsConst;
5961     Ops.push_back(llvm::ConstantInt::get(getLLVMContext(), Result));
5962   }
5963 
5964   switch (BuiltinID) {
5965   default: return nullptr;
5966   case X86::BI__builtin_cpu_supports: {
5967     const Expr *FeatureExpr = E->getArg(0)->IgnoreParenCasts();
5968     StringRef FeatureStr = cast<StringLiteral>(FeatureExpr)->getString();
5969 
5970     // TODO: When/if this becomes more than x86 specific then use a TargetInfo
5971     // based mapping.
5972     // Processor features and mapping to processor feature value.
5973     enum X86Features {
5974       CMOV = 0,
5975       MMX,
5976       POPCNT,
5977       SSE,
5978       SSE2,
5979       SSE3,
5980       SSSE3,
5981       SSE4_1,
5982       SSE4_2,
5983       AVX,
5984       AVX2,
5985       SSE4_A,
5986       FMA4,
5987       XOP,
5988       FMA,
5989       AVX512F,
5990       BMI,
5991       BMI2,
5992       MAX
5993     };
5994 
5995     X86Features Feature = StringSwitch<X86Features>(FeatureStr)
5996                               .Case("cmov", X86Features::CMOV)
5997                               .Case("mmx", X86Features::MMX)
5998                               .Case("popcnt", X86Features::POPCNT)
5999                               .Case("sse", X86Features::SSE)
6000                               .Case("sse2", X86Features::SSE2)
6001                               .Case("sse3", X86Features::SSE3)
6002                               .Case("sse4.1", X86Features::SSE4_1)
6003                               .Case("sse4.2", X86Features::SSE4_2)
6004                               .Case("avx", X86Features::AVX)
6005                               .Case("avx2", X86Features::AVX2)
6006                               .Case("sse4a", X86Features::SSE4_A)
6007                               .Case("fma4", X86Features::FMA4)
6008                               .Case("xop", X86Features::XOP)
6009                               .Case("fma", X86Features::FMA)
6010                               .Case("avx512f", X86Features::AVX512F)
6011                               .Case("bmi", X86Features::BMI)
6012                               .Case("bmi2", X86Features::BMI2)
6013                               .Default(X86Features::MAX);
6014     assert(Feature != X86Features::MAX && "Invalid feature!");
6015 
6016     // Matching the struct layout from the compiler-rt/libgcc structure that is
6017     // filled in:
6018     // unsigned int __cpu_vendor;
6019     // unsigned int __cpu_type;
6020     // unsigned int __cpu_subtype;
6021     // unsigned int __cpu_features[1];
6022     llvm::Type *STy = llvm::StructType::get(
6023         Int32Ty, Int32Ty, Int32Ty, llvm::ArrayType::get(Int32Ty, 1), nullptr);
6024 
6025     // Grab the global __cpu_model.
6026     llvm::Constant *CpuModel = CGM.CreateRuntimeVariable(STy, "__cpu_model");
6027 
6028     // Grab the first (0th) element from the field __cpu_features off of the
6029     // global in the struct STy.
6030     Value *Idxs[] = {
6031       ConstantInt::get(Int32Ty, 0),
6032       ConstantInt::get(Int32Ty, 3),
6033       ConstantInt::get(Int32Ty, 0)
6034     };
6035     Value *CpuFeatures = Builder.CreateGEP(STy, CpuModel, Idxs);
6036     Value *Features = Builder.CreateAlignedLoad(CpuFeatures,
6037                                                 CharUnits::fromQuantity(4));
6038 
6039     // Check the value of the bit corresponding to the feature requested.
6040     Value *Bitset = Builder.CreateAnd(
6041         Features, llvm::ConstantInt::get(Int32Ty, 1 << Feature));
6042     return Builder.CreateICmpNE(Bitset, llvm::ConstantInt::get(Int32Ty, 0));
6043   }
6044   case X86::BI_mm_prefetch: {
6045     Value *Address = Ops[0];
6046     Value *RW = ConstantInt::get(Int32Ty, 0);
6047     Value *Locality = Ops[1];
6048     Value *Data = ConstantInt::get(Int32Ty, 1);
6049     Value *F = CGM.getIntrinsic(Intrinsic::prefetch);
6050     return Builder.CreateCall(F, {Address, RW, Locality, Data});
6051   }
6052   case X86::BI__builtin_ia32_undef128:
6053   case X86::BI__builtin_ia32_undef256:
6054   case X86::BI__builtin_ia32_undef512:
6055     return UndefValue::get(ConvertType(E->getType()));
6056   case X86::BI__builtin_ia32_vec_init_v8qi:
6057   case X86::BI__builtin_ia32_vec_init_v4hi:
6058   case X86::BI__builtin_ia32_vec_init_v2si:
6059     return Builder.CreateBitCast(BuildVector(Ops),
6060                                  llvm::Type::getX86_MMXTy(getLLVMContext()));
6061   case X86::BI__builtin_ia32_vec_ext_v2si:
6062     return Builder.CreateExtractElement(Ops[0],
6063                                   llvm::ConstantInt::get(Ops[1]->getType(), 0));
6064   case X86::BI__builtin_ia32_ldmxcsr: {
6065     Address Tmp = CreateMemTemp(E->getArg(0)->getType());
6066     Builder.CreateStore(Ops[0], Tmp);
6067     return Builder.CreateCall(CGM.getIntrinsic(Intrinsic::x86_sse_ldmxcsr),
6068                           Builder.CreateBitCast(Tmp.getPointer(), Int8PtrTy));
6069   }
6070   case X86::BI__builtin_ia32_stmxcsr: {
6071     Address Tmp = CreateMemTemp(E->getType());
6072     Builder.CreateCall(CGM.getIntrinsic(Intrinsic::x86_sse_stmxcsr),
6073                        Builder.CreateBitCast(Tmp.getPointer(), Int8PtrTy));
6074     return Builder.CreateLoad(Tmp, "stmxcsr");
6075   }
6076   case X86::BI__builtin_ia32_storehps:
6077   case X86::BI__builtin_ia32_storelps: {
6078     llvm::Type *PtrTy = llvm::PointerType::getUnqual(Int64Ty);
6079     llvm::Type *VecTy = llvm::VectorType::get(Int64Ty, 2);
6080 
6081     // cast val v2i64
6082     Ops[1] = Builder.CreateBitCast(Ops[1], VecTy, "cast");
6083 
6084     // extract (0, 1)
6085     unsigned Index = BuiltinID == X86::BI__builtin_ia32_storelps ? 0 : 1;
6086     llvm::Value *Idx = llvm::ConstantInt::get(SizeTy, Index);
6087     Ops[1] = Builder.CreateExtractElement(Ops[1], Idx, "extract");
6088 
6089     // cast pointer to i64 & store
6090     Ops[0] = Builder.CreateBitCast(Ops[0], PtrTy);
6091     return Builder.CreateDefaultAlignedStore(Ops[1], Ops[0]);
6092   }
6093   case X86::BI__builtin_ia32_palignr128:
6094   case X86::BI__builtin_ia32_palignr256: {
6095     unsigned ShiftVal = cast<llvm::ConstantInt>(Ops[2])->getZExtValue();
6096 
6097     unsigned NumElts =
6098       cast<llvm::VectorType>(Ops[0]->getType())->getNumElements();
6099     assert(NumElts % 16 == 0);
6100     unsigned NumLanes = NumElts / 16;
6101     unsigned NumLaneElts = NumElts / NumLanes;
6102 
6103     // If palignr is shifting the pair of vectors more than the size of two
6104     // lanes, emit zero.
6105     if (ShiftVal >= (2 * NumLaneElts))
6106       return llvm::Constant::getNullValue(ConvertType(E->getType()));
6107 
6108     // If palignr is shifting the pair of input vectors more than one lane,
6109     // but less than two lanes, convert to shifting in zeroes.
6110     if (ShiftVal > NumLaneElts) {
6111       ShiftVal -= NumLaneElts;
6112       Ops[1] = Ops[0];
6113       Ops[0] = llvm::Constant::getNullValue(Ops[0]->getType());
6114     }
6115 
6116     uint32_t Indices[32];
6117     // 256-bit palignr operates on 128-bit lanes so we need to handle that
6118     for (unsigned l = 0; l != NumElts; l += NumLaneElts) {
6119       for (unsigned i = 0; i != NumLaneElts; ++i) {
6120         unsigned Idx = ShiftVal + i;
6121         if (Idx >= NumLaneElts)
6122           Idx += NumElts - NumLaneElts; // End of lane, switch operand.
6123         Indices[l + i] = Idx + l;
6124       }
6125     }
6126 
6127     Value *SV = llvm::ConstantDataVector::get(getLLVMContext(),
6128                                               makeArrayRef(Indices, NumElts));
6129     return Builder.CreateShuffleVector(Ops[1], Ops[0], SV, "palignr");
6130   }
6131   case X86::BI__builtin_ia32_pslldqi256: {
6132     // Shift value is in bits so divide by 8.
6133     unsigned shiftVal = cast<llvm::ConstantInt>(Ops[1])->getZExtValue() >> 3;
6134 
6135     // If pslldq is shifting the vector more than 15 bytes, emit zero.
6136     if (shiftVal >= 16)
6137       return llvm::Constant::getNullValue(ConvertType(E->getType()));
6138 
6139     uint32_t Indices[32];
6140     // 256-bit pslldq operates on 128-bit lanes so we need to handle that
6141     for (unsigned l = 0; l != 32; l += 16) {
6142       for (unsigned i = 0; i != 16; ++i) {
6143         unsigned Idx = 32 + i - shiftVal;
6144         if (Idx < 32) Idx -= 16; // end of lane, switch operand.
6145         Indices[l + i] = Idx + l;
6146       }
6147     }
6148 
6149     llvm::Type *VecTy = llvm::VectorType::get(Int8Ty, 32);
6150     Ops[0] = Builder.CreateBitCast(Ops[0], VecTy, "cast");
6151     Value *Zero = llvm::Constant::getNullValue(VecTy);
6152 
6153     Value *SV = llvm::ConstantDataVector::get(getLLVMContext(), Indices);
6154     SV = Builder.CreateShuffleVector(Zero, Ops[0], SV, "pslldq");
6155     llvm::Type *ResultType = ConvertType(E->getType());
6156     return Builder.CreateBitCast(SV, ResultType, "cast");
6157   }
6158   case X86::BI__builtin_ia32_psrldqi256: {
6159     // Shift value is in bits so divide by 8.
6160     unsigned shiftVal = cast<llvm::ConstantInt>(Ops[1])->getZExtValue() >> 3;
6161 
6162     // If psrldq is shifting the vector more than 15 bytes, emit zero.
6163     if (shiftVal >= 16)
6164       return llvm::Constant::getNullValue(ConvertType(E->getType()));
6165 
6166     uint32_t Indices[32];
6167     // 256-bit psrldq operates on 128-bit lanes so we need to handle that
6168     for (unsigned l = 0; l != 32; l += 16) {
6169       for (unsigned i = 0; i != 16; ++i) {
6170         unsigned Idx = i + shiftVal;
6171         if (Idx >= 16) Idx += 16; // end of lane, switch operand.
6172         Indices[l + i] = Idx + l;
6173       }
6174     }
6175 
6176     llvm::Type *VecTy = llvm::VectorType::get(Int8Ty, 32);
6177     Ops[0] = Builder.CreateBitCast(Ops[0], VecTy, "cast");
6178     Value *Zero = llvm::Constant::getNullValue(VecTy);
6179 
6180     Value *SV = llvm::ConstantDataVector::get(getLLVMContext(), Indices);
6181     SV = Builder.CreateShuffleVector(Ops[0], Zero, SV, "psrldq");
6182     llvm::Type *ResultType = ConvertType(E->getType());
6183     return Builder.CreateBitCast(SV, ResultType, "cast");
6184   }
6185   case X86::BI__builtin_ia32_movntps:
6186   case X86::BI__builtin_ia32_movntps256:
6187   case X86::BI__builtin_ia32_movntpd:
6188   case X86::BI__builtin_ia32_movntpd256:
6189   case X86::BI__builtin_ia32_movntdq:
6190   case X86::BI__builtin_ia32_movntdq256:
6191   case X86::BI__builtin_ia32_movnti:
6192   case X86::BI__builtin_ia32_movnti64: {
6193     llvm::MDNode *Node = llvm::MDNode::get(
6194         getLLVMContext(), llvm::ConstantAsMetadata::get(Builder.getInt32(1)));
6195 
6196     // Convert the type of the pointer to a pointer to the stored type.
6197     Value *BC = Builder.CreateBitCast(Ops[0],
6198                                 llvm::PointerType::getUnqual(Ops[1]->getType()),
6199                                       "cast");
6200     StoreInst *SI = Builder.CreateDefaultAlignedStore(Ops[1], BC);
6201     SI->setMetadata(CGM.getModule().getMDKindID("nontemporal"), Node);
6202 
6203     // If the operand is an integer, we can't assume alignment. Otherwise,
6204     // assume natural alignment.
6205     QualType ArgTy = E->getArg(1)->getType();
6206     unsigned Align;
6207     if (ArgTy->isIntegerType())
6208       Align = 1;
6209     else
6210       Align = getContext().getTypeSizeInChars(ArgTy).getQuantity();
6211     SI->setAlignment(Align);
6212     return SI;
6213   }
6214   // 3DNow!
6215   case X86::BI__builtin_ia32_pswapdsf:
6216   case X86::BI__builtin_ia32_pswapdsi: {
6217     llvm::Type *MMXTy = llvm::Type::getX86_MMXTy(getLLVMContext());
6218     Ops[0] = Builder.CreateBitCast(Ops[0], MMXTy, "cast");
6219     llvm::Function *F = CGM.getIntrinsic(Intrinsic::x86_3dnowa_pswapd);
6220     return Builder.CreateCall(F, Ops, "pswapd");
6221   }
6222   case X86::BI__builtin_ia32_rdrand16_step:
6223   case X86::BI__builtin_ia32_rdrand32_step:
6224   case X86::BI__builtin_ia32_rdrand64_step:
6225   case X86::BI__builtin_ia32_rdseed16_step:
6226   case X86::BI__builtin_ia32_rdseed32_step:
6227   case X86::BI__builtin_ia32_rdseed64_step: {
6228     Intrinsic::ID ID;
6229     switch (BuiltinID) {
6230     default: llvm_unreachable("Unsupported intrinsic!");
6231     case X86::BI__builtin_ia32_rdrand16_step:
6232       ID = Intrinsic::x86_rdrand_16;
6233       break;
6234     case X86::BI__builtin_ia32_rdrand32_step:
6235       ID = Intrinsic::x86_rdrand_32;
6236       break;
6237     case X86::BI__builtin_ia32_rdrand64_step:
6238       ID = Intrinsic::x86_rdrand_64;
6239       break;
6240     case X86::BI__builtin_ia32_rdseed16_step:
6241       ID = Intrinsic::x86_rdseed_16;
6242       break;
6243     case X86::BI__builtin_ia32_rdseed32_step:
6244       ID = Intrinsic::x86_rdseed_32;
6245       break;
6246     case X86::BI__builtin_ia32_rdseed64_step:
6247       ID = Intrinsic::x86_rdseed_64;
6248       break;
6249     }
6250 
6251     Value *Call = Builder.CreateCall(CGM.getIntrinsic(ID));
6252     Builder.CreateDefaultAlignedStore(Builder.CreateExtractValue(Call, 0),
6253                                       Ops[0]);
6254     return Builder.CreateExtractValue(Call, 1);
6255   }
6256   // SSE comparison intrisics
6257   case X86::BI__builtin_ia32_cmpeqps:
6258   case X86::BI__builtin_ia32_cmpltps:
6259   case X86::BI__builtin_ia32_cmpleps:
6260   case X86::BI__builtin_ia32_cmpunordps:
6261   case X86::BI__builtin_ia32_cmpneqps:
6262   case X86::BI__builtin_ia32_cmpnltps:
6263   case X86::BI__builtin_ia32_cmpnleps:
6264   case X86::BI__builtin_ia32_cmpordps:
6265   case X86::BI__builtin_ia32_cmpeqss:
6266   case X86::BI__builtin_ia32_cmpltss:
6267   case X86::BI__builtin_ia32_cmpless:
6268   case X86::BI__builtin_ia32_cmpunordss:
6269   case X86::BI__builtin_ia32_cmpneqss:
6270   case X86::BI__builtin_ia32_cmpnltss:
6271   case X86::BI__builtin_ia32_cmpnless:
6272   case X86::BI__builtin_ia32_cmpordss:
6273   case X86::BI__builtin_ia32_cmpeqpd:
6274   case X86::BI__builtin_ia32_cmpltpd:
6275   case X86::BI__builtin_ia32_cmplepd:
6276   case X86::BI__builtin_ia32_cmpunordpd:
6277   case X86::BI__builtin_ia32_cmpneqpd:
6278   case X86::BI__builtin_ia32_cmpnltpd:
6279   case X86::BI__builtin_ia32_cmpnlepd:
6280   case X86::BI__builtin_ia32_cmpordpd:
6281   case X86::BI__builtin_ia32_cmpeqsd:
6282   case X86::BI__builtin_ia32_cmpltsd:
6283   case X86::BI__builtin_ia32_cmplesd:
6284   case X86::BI__builtin_ia32_cmpunordsd:
6285   case X86::BI__builtin_ia32_cmpneqsd:
6286   case X86::BI__builtin_ia32_cmpnltsd:
6287   case X86::BI__builtin_ia32_cmpnlesd:
6288   case X86::BI__builtin_ia32_cmpordsd:
6289     // These exist so that the builtin that takes an immediate can be bounds
6290     // checked by clang to avoid passing bad immediates to the backend. Since
6291     // AVX has a larger immediate than SSE we would need separate builtins to
6292     // do the different bounds checking. Rather than create a clang specific
6293     // SSE only builtin, this implements eight separate builtins to match gcc
6294     // implementation.
6295 
6296     // Choose the immediate.
6297     unsigned Imm;
6298     switch (BuiltinID) {
6299     default: llvm_unreachable("Unsupported intrinsic!");
6300     case X86::BI__builtin_ia32_cmpeqps:
6301     case X86::BI__builtin_ia32_cmpeqss:
6302     case X86::BI__builtin_ia32_cmpeqpd:
6303     case X86::BI__builtin_ia32_cmpeqsd:
6304       Imm = 0;
6305       break;
6306     case X86::BI__builtin_ia32_cmpltps:
6307     case X86::BI__builtin_ia32_cmpltss:
6308     case X86::BI__builtin_ia32_cmpltpd:
6309     case X86::BI__builtin_ia32_cmpltsd:
6310       Imm = 1;
6311       break;
6312     case X86::BI__builtin_ia32_cmpleps:
6313     case X86::BI__builtin_ia32_cmpless:
6314     case X86::BI__builtin_ia32_cmplepd:
6315     case X86::BI__builtin_ia32_cmplesd:
6316       Imm = 2;
6317       break;
6318     case X86::BI__builtin_ia32_cmpunordps:
6319     case X86::BI__builtin_ia32_cmpunordss:
6320     case X86::BI__builtin_ia32_cmpunordpd:
6321     case X86::BI__builtin_ia32_cmpunordsd:
6322       Imm = 3;
6323       break;
6324     case X86::BI__builtin_ia32_cmpneqps:
6325     case X86::BI__builtin_ia32_cmpneqss:
6326     case X86::BI__builtin_ia32_cmpneqpd:
6327     case X86::BI__builtin_ia32_cmpneqsd:
6328       Imm = 4;
6329       break;
6330     case X86::BI__builtin_ia32_cmpnltps:
6331     case X86::BI__builtin_ia32_cmpnltss:
6332     case X86::BI__builtin_ia32_cmpnltpd:
6333     case X86::BI__builtin_ia32_cmpnltsd:
6334       Imm = 5;
6335       break;
6336     case X86::BI__builtin_ia32_cmpnleps:
6337     case X86::BI__builtin_ia32_cmpnless:
6338     case X86::BI__builtin_ia32_cmpnlepd:
6339     case X86::BI__builtin_ia32_cmpnlesd:
6340       Imm = 6;
6341       break;
6342     case X86::BI__builtin_ia32_cmpordps:
6343     case X86::BI__builtin_ia32_cmpordss:
6344     case X86::BI__builtin_ia32_cmpordpd:
6345     case X86::BI__builtin_ia32_cmpordsd:
6346       Imm = 7;
6347       break;
6348     }
6349 
6350     // Choose the intrinsic ID.
6351     const char *name;
6352     Intrinsic::ID ID;
6353     switch (BuiltinID) {
6354     default: llvm_unreachable("Unsupported intrinsic!");
6355     case X86::BI__builtin_ia32_cmpeqps:
6356     case X86::BI__builtin_ia32_cmpltps:
6357     case X86::BI__builtin_ia32_cmpleps:
6358     case X86::BI__builtin_ia32_cmpunordps:
6359     case X86::BI__builtin_ia32_cmpneqps:
6360     case X86::BI__builtin_ia32_cmpnltps:
6361     case X86::BI__builtin_ia32_cmpnleps:
6362     case X86::BI__builtin_ia32_cmpordps:
6363       name = "cmpps";
6364       ID = Intrinsic::x86_sse_cmp_ps;
6365       break;
6366     case X86::BI__builtin_ia32_cmpeqss:
6367     case X86::BI__builtin_ia32_cmpltss:
6368     case X86::BI__builtin_ia32_cmpless:
6369     case X86::BI__builtin_ia32_cmpunordss:
6370     case X86::BI__builtin_ia32_cmpneqss:
6371     case X86::BI__builtin_ia32_cmpnltss:
6372     case X86::BI__builtin_ia32_cmpnless:
6373     case X86::BI__builtin_ia32_cmpordss:
6374       name = "cmpss";
6375       ID = Intrinsic::x86_sse_cmp_ss;
6376       break;
6377     case X86::BI__builtin_ia32_cmpeqpd:
6378     case X86::BI__builtin_ia32_cmpltpd:
6379     case X86::BI__builtin_ia32_cmplepd:
6380     case X86::BI__builtin_ia32_cmpunordpd:
6381     case X86::BI__builtin_ia32_cmpneqpd:
6382     case X86::BI__builtin_ia32_cmpnltpd:
6383     case X86::BI__builtin_ia32_cmpnlepd:
6384     case X86::BI__builtin_ia32_cmpordpd:
6385       name = "cmppd";
6386       ID = Intrinsic::x86_sse2_cmp_pd;
6387       break;
6388     case X86::BI__builtin_ia32_cmpeqsd:
6389     case X86::BI__builtin_ia32_cmpltsd:
6390     case X86::BI__builtin_ia32_cmplesd:
6391     case X86::BI__builtin_ia32_cmpunordsd:
6392     case X86::BI__builtin_ia32_cmpneqsd:
6393     case X86::BI__builtin_ia32_cmpnltsd:
6394     case X86::BI__builtin_ia32_cmpnlesd:
6395     case X86::BI__builtin_ia32_cmpordsd:
6396       name = "cmpsd";
6397       ID = Intrinsic::x86_sse2_cmp_sd;
6398       break;
6399     }
6400 
6401     Ops.push_back(llvm::ConstantInt::get(Int8Ty, Imm));
6402     llvm::Function *F = CGM.getIntrinsic(ID);
6403     return Builder.CreateCall(F, Ops, name);
6404   }
6405 }
6406 
6407 
6408 Value *CodeGenFunction::EmitPPCBuiltinExpr(unsigned BuiltinID,
6409                                            const CallExpr *E) {
6410   SmallVector<Value*, 4> Ops;
6411 
6412   for (unsigned i = 0, e = E->getNumArgs(); i != e; i++)
6413     Ops.push_back(EmitScalarExpr(E->getArg(i)));
6414 
6415   Intrinsic::ID ID = Intrinsic::not_intrinsic;
6416 
6417   switch (BuiltinID) {
6418   default: return nullptr;
6419 
6420   // __builtin_ppc_get_timebase is GCC 4.8+'s PowerPC-specific name for what we
6421   // call __builtin_readcyclecounter.
6422   case PPC::BI__builtin_ppc_get_timebase:
6423     return Builder.CreateCall(CGM.getIntrinsic(Intrinsic::readcyclecounter));
6424 
6425   // vec_ld, vec_lvsl, vec_lvsr
6426   case PPC::BI__builtin_altivec_lvx:
6427   case PPC::BI__builtin_altivec_lvxl:
6428   case PPC::BI__builtin_altivec_lvebx:
6429   case PPC::BI__builtin_altivec_lvehx:
6430   case PPC::BI__builtin_altivec_lvewx:
6431   case PPC::BI__builtin_altivec_lvsl:
6432   case PPC::BI__builtin_altivec_lvsr:
6433   case PPC::BI__builtin_vsx_lxvd2x:
6434   case PPC::BI__builtin_vsx_lxvw4x:
6435   {
6436     Ops[1] = Builder.CreateBitCast(Ops[1], Int8PtrTy);
6437 
6438     Ops[0] = Builder.CreateGEP(Ops[1], Ops[0]);
6439     Ops.pop_back();
6440 
6441     switch (BuiltinID) {
6442     default: llvm_unreachable("Unsupported ld/lvsl/lvsr intrinsic!");
6443     case PPC::BI__builtin_altivec_lvx:
6444       ID = Intrinsic::ppc_altivec_lvx;
6445       break;
6446     case PPC::BI__builtin_altivec_lvxl:
6447       ID = Intrinsic::ppc_altivec_lvxl;
6448       break;
6449     case PPC::BI__builtin_altivec_lvebx:
6450       ID = Intrinsic::ppc_altivec_lvebx;
6451       break;
6452     case PPC::BI__builtin_altivec_lvehx:
6453       ID = Intrinsic::ppc_altivec_lvehx;
6454       break;
6455     case PPC::BI__builtin_altivec_lvewx:
6456       ID = Intrinsic::ppc_altivec_lvewx;
6457       break;
6458     case PPC::BI__builtin_altivec_lvsl:
6459       ID = Intrinsic::ppc_altivec_lvsl;
6460       break;
6461     case PPC::BI__builtin_altivec_lvsr:
6462       ID = Intrinsic::ppc_altivec_lvsr;
6463       break;
6464     case PPC::BI__builtin_vsx_lxvd2x:
6465       ID = Intrinsic::ppc_vsx_lxvd2x;
6466       break;
6467     case PPC::BI__builtin_vsx_lxvw4x:
6468       ID = Intrinsic::ppc_vsx_lxvw4x;
6469       break;
6470     }
6471     llvm::Function *F = CGM.getIntrinsic(ID);
6472     return Builder.CreateCall(F, Ops, "");
6473   }
6474 
6475   // vec_st
6476   case PPC::BI__builtin_altivec_stvx:
6477   case PPC::BI__builtin_altivec_stvxl:
6478   case PPC::BI__builtin_altivec_stvebx:
6479   case PPC::BI__builtin_altivec_stvehx:
6480   case PPC::BI__builtin_altivec_stvewx:
6481   case PPC::BI__builtin_vsx_stxvd2x:
6482   case PPC::BI__builtin_vsx_stxvw4x:
6483   {
6484     Ops[2] = Builder.CreateBitCast(Ops[2], Int8PtrTy);
6485     Ops[1] = Builder.CreateGEP(Ops[2], Ops[1]);
6486     Ops.pop_back();
6487 
6488     switch (BuiltinID) {
6489     default: llvm_unreachable("Unsupported st intrinsic!");
6490     case PPC::BI__builtin_altivec_stvx:
6491       ID = Intrinsic::ppc_altivec_stvx;
6492       break;
6493     case PPC::BI__builtin_altivec_stvxl:
6494       ID = Intrinsic::ppc_altivec_stvxl;
6495       break;
6496     case PPC::BI__builtin_altivec_stvebx:
6497       ID = Intrinsic::ppc_altivec_stvebx;
6498       break;
6499     case PPC::BI__builtin_altivec_stvehx:
6500       ID = Intrinsic::ppc_altivec_stvehx;
6501       break;
6502     case PPC::BI__builtin_altivec_stvewx:
6503       ID = Intrinsic::ppc_altivec_stvewx;
6504       break;
6505     case PPC::BI__builtin_vsx_stxvd2x:
6506       ID = Intrinsic::ppc_vsx_stxvd2x;
6507       break;
6508     case PPC::BI__builtin_vsx_stxvw4x:
6509       ID = Intrinsic::ppc_vsx_stxvw4x;
6510       break;
6511     }
6512     llvm::Function *F = CGM.getIntrinsic(ID);
6513     return Builder.CreateCall(F, Ops, "");
6514   }
6515   // Square root
6516   case PPC::BI__builtin_vsx_xvsqrtsp:
6517   case PPC::BI__builtin_vsx_xvsqrtdp: {
6518     llvm::Type *ResultType = ConvertType(E->getType());
6519     Value *X = EmitScalarExpr(E->getArg(0));
6520     ID = Intrinsic::sqrt;
6521     llvm::Function *F = CGM.getIntrinsic(ID, ResultType);
6522     return Builder.CreateCall(F, X);
6523   }
6524   // Count leading zeros
6525   case PPC::BI__builtin_altivec_vclzb:
6526   case PPC::BI__builtin_altivec_vclzh:
6527   case PPC::BI__builtin_altivec_vclzw:
6528   case PPC::BI__builtin_altivec_vclzd: {
6529     llvm::Type *ResultType = ConvertType(E->getType());
6530     Value *X = EmitScalarExpr(E->getArg(0));
6531     Value *Undef = ConstantInt::get(Builder.getInt1Ty(), false);
6532     Function *F = CGM.getIntrinsic(Intrinsic::ctlz, ResultType);
6533     return Builder.CreateCall(F, {X, Undef});
6534   }
6535   // Copy sign
6536   case PPC::BI__builtin_vsx_xvcpsgnsp:
6537   case PPC::BI__builtin_vsx_xvcpsgndp: {
6538     llvm::Type *ResultType = ConvertType(E->getType());
6539     Value *X = EmitScalarExpr(E->getArg(0));
6540     Value *Y = EmitScalarExpr(E->getArg(1));
6541     ID = Intrinsic::copysign;
6542     llvm::Function *F = CGM.getIntrinsic(ID, ResultType);
6543     return Builder.CreateCall(F, {X, Y});
6544   }
6545   // Rounding/truncation
6546   case PPC::BI__builtin_vsx_xvrspip:
6547   case PPC::BI__builtin_vsx_xvrdpip:
6548   case PPC::BI__builtin_vsx_xvrdpim:
6549   case PPC::BI__builtin_vsx_xvrspim:
6550   case PPC::BI__builtin_vsx_xvrdpi:
6551   case PPC::BI__builtin_vsx_xvrspi:
6552   case PPC::BI__builtin_vsx_xvrdpic:
6553   case PPC::BI__builtin_vsx_xvrspic:
6554   case PPC::BI__builtin_vsx_xvrdpiz:
6555   case PPC::BI__builtin_vsx_xvrspiz: {
6556     llvm::Type *ResultType = ConvertType(E->getType());
6557     Value *X = EmitScalarExpr(E->getArg(0));
6558     if (BuiltinID == PPC::BI__builtin_vsx_xvrdpim ||
6559         BuiltinID == PPC::BI__builtin_vsx_xvrspim)
6560       ID = Intrinsic::floor;
6561     else if (BuiltinID == PPC::BI__builtin_vsx_xvrdpi ||
6562              BuiltinID == PPC::BI__builtin_vsx_xvrspi)
6563       ID = Intrinsic::round;
6564     else if (BuiltinID == PPC::BI__builtin_vsx_xvrdpic ||
6565              BuiltinID == PPC::BI__builtin_vsx_xvrspic)
6566       ID = Intrinsic::nearbyint;
6567     else if (BuiltinID == PPC::BI__builtin_vsx_xvrdpip ||
6568              BuiltinID == PPC::BI__builtin_vsx_xvrspip)
6569       ID = Intrinsic::ceil;
6570     else if (BuiltinID == PPC::BI__builtin_vsx_xvrdpiz ||
6571              BuiltinID == PPC::BI__builtin_vsx_xvrspiz)
6572       ID = Intrinsic::trunc;
6573     llvm::Function *F = CGM.getIntrinsic(ID, ResultType);
6574     return Builder.CreateCall(F, X);
6575   }
6576   // FMA variations
6577   case PPC::BI__builtin_vsx_xvmaddadp:
6578   case PPC::BI__builtin_vsx_xvmaddasp:
6579   case PPC::BI__builtin_vsx_xvnmaddadp:
6580   case PPC::BI__builtin_vsx_xvnmaddasp:
6581   case PPC::BI__builtin_vsx_xvmsubadp:
6582   case PPC::BI__builtin_vsx_xvmsubasp:
6583   case PPC::BI__builtin_vsx_xvnmsubadp:
6584   case PPC::BI__builtin_vsx_xvnmsubasp: {
6585     llvm::Type *ResultType = ConvertType(E->getType());
6586     Value *X = EmitScalarExpr(E->getArg(0));
6587     Value *Y = EmitScalarExpr(E->getArg(1));
6588     Value *Z = EmitScalarExpr(E->getArg(2));
6589     Value *Zero = llvm::ConstantFP::getZeroValueForNegation(ResultType);
6590     llvm::Function *F = CGM.getIntrinsic(Intrinsic::fma, ResultType);
6591     switch (BuiltinID) {
6592       case PPC::BI__builtin_vsx_xvmaddadp:
6593       case PPC::BI__builtin_vsx_xvmaddasp:
6594         return Builder.CreateCall(F, {X, Y, Z});
6595       case PPC::BI__builtin_vsx_xvnmaddadp:
6596       case PPC::BI__builtin_vsx_xvnmaddasp:
6597         return Builder.CreateFSub(Zero,
6598                                   Builder.CreateCall(F, {X, Y, Z}), "sub");
6599       case PPC::BI__builtin_vsx_xvmsubadp:
6600       case PPC::BI__builtin_vsx_xvmsubasp:
6601         return Builder.CreateCall(F,
6602                                   {X, Y, Builder.CreateFSub(Zero, Z, "sub")});
6603       case PPC::BI__builtin_vsx_xvnmsubadp:
6604       case PPC::BI__builtin_vsx_xvnmsubasp:
6605         Value *FsubRes =
6606           Builder.CreateCall(F, {X, Y, Builder.CreateFSub(Zero, Z, "sub")});
6607         return Builder.CreateFSub(Zero, FsubRes, "sub");
6608     }
6609     llvm_unreachable("Unknown FMA operation");
6610     return nullptr; // Suppress no-return warning
6611   }
6612   }
6613 }
6614 
6615 // Emit an intrinsic that has 1 float or double.
6616 static Value *emitUnaryFPBuiltin(CodeGenFunction &CGF,
6617                                  const CallExpr *E,
6618                                  unsigned IntrinsicID) {
6619   llvm::Value *Src0 = CGF.EmitScalarExpr(E->getArg(0));
6620 
6621   Value *F = CGF.CGM.getIntrinsic(IntrinsicID, Src0->getType());
6622   return CGF.Builder.CreateCall(F, Src0);
6623 }
6624 
6625 // Emit an intrinsic that has 3 float or double operands.
6626 static Value *emitTernaryFPBuiltin(CodeGenFunction &CGF,
6627                                    const CallExpr *E,
6628                                    unsigned IntrinsicID) {
6629   llvm::Value *Src0 = CGF.EmitScalarExpr(E->getArg(0));
6630   llvm::Value *Src1 = CGF.EmitScalarExpr(E->getArg(1));
6631   llvm::Value *Src2 = CGF.EmitScalarExpr(E->getArg(2));
6632 
6633   Value *F = CGF.CGM.getIntrinsic(IntrinsicID, Src0->getType());
6634   return CGF.Builder.CreateCall(F, {Src0, Src1, Src2});
6635 }
6636 
6637 // Emit an intrinsic that has 1 float or double operand, and 1 integer.
6638 static Value *emitFPIntBuiltin(CodeGenFunction &CGF,
6639                                const CallExpr *E,
6640                                unsigned IntrinsicID) {
6641   llvm::Value *Src0 = CGF.EmitScalarExpr(E->getArg(0));
6642   llvm::Value *Src1 = CGF.EmitScalarExpr(E->getArg(1));
6643 
6644   Value *F = CGF.CGM.getIntrinsic(IntrinsicID, Src0->getType());
6645   return CGF.Builder.CreateCall(F, {Src0, Src1});
6646 }
6647 
6648 Value *CodeGenFunction::EmitAMDGPUBuiltinExpr(unsigned BuiltinID,
6649                                               const CallExpr *E) {
6650   switch (BuiltinID) {
6651   case AMDGPU::BI__builtin_amdgpu_div_scale:
6652   case AMDGPU::BI__builtin_amdgpu_div_scalef: {
6653     // Translate from the intrinsics's struct return to the builtin's out
6654     // argument.
6655 
6656     Address FlagOutPtr = EmitPointerWithAlignment(E->getArg(3));
6657 
6658     llvm::Value *X = EmitScalarExpr(E->getArg(0));
6659     llvm::Value *Y = EmitScalarExpr(E->getArg(1));
6660     llvm::Value *Z = EmitScalarExpr(E->getArg(2));
6661 
6662     llvm::Value *Callee = CGM.getIntrinsic(Intrinsic::AMDGPU_div_scale,
6663                                            X->getType());
6664 
6665     llvm::Value *Tmp = Builder.CreateCall(Callee, {X, Y, Z});
6666 
6667     llvm::Value *Result = Builder.CreateExtractValue(Tmp, 0);
6668     llvm::Value *Flag = Builder.CreateExtractValue(Tmp, 1);
6669 
6670     llvm::Type *RealFlagType
6671       = FlagOutPtr.getPointer()->getType()->getPointerElementType();
6672 
6673     llvm::Value *FlagExt = Builder.CreateZExt(Flag, RealFlagType);
6674     Builder.CreateStore(FlagExt, FlagOutPtr);
6675     return Result;
6676   }
6677   case AMDGPU::BI__builtin_amdgpu_div_fmas:
6678   case AMDGPU::BI__builtin_amdgpu_div_fmasf: {
6679     llvm::Value *Src0 = EmitScalarExpr(E->getArg(0));
6680     llvm::Value *Src1 = EmitScalarExpr(E->getArg(1));
6681     llvm::Value *Src2 = EmitScalarExpr(E->getArg(2));
6682     llvm::Value *Src3 = EmitScalarExpr(E->getArg(3));
6683 
6684     llvm::Value *F = CGM.getIntrinsic(Intrinsic::AMDGPU_div_fmas,
6685                                       Src0->getType());
6686     llvm::Value *Src3ToBool = Builder.CreateIsNotNull(Src3);
6687     return Builder.CreateCall(F, {Src0, Src1, Src2, Src3ToBool});
6688   }
6689   case AMDGPU::BI__builtin_amdgpu_div_fixup:
6690   case AMDGPU::BI__builtin_amdgpu_div_fixupf:
6691     return emitTernaryFPBuiltin(*this, E, Intrinsic::AMDGPU_div_fixup);
6692   case AMDGPU::BI__builtin_amdgpu_trig_preop:
6693   case AMDGPU::BI__builtin_amdgpu_trig_preopf:
6694     return emitFPIntBuiltin(*this, E, Intrinsic::AMDGPU_trig_preop);
6695   case AMDGPU::BI__builtin_amdgpu_rcp:
6696   case AMDGPU::BI__builtin_amdgpu_rcpf:
6697     return emitUnaryFPBuiltin(*this, E, Intrinsic::AMDGPU_rcp);
6698   case AMDGPU::BI__builtin_amdgpu_rsq:
6699   case AMDGPU::BI__builtin_amdgpu_rsqf:
6700     return emitUnaryFPBuiltin(*this, E, Intrinsic::AMDGPU_rsq);
6701   case AMDGPU::BI__builtin_amdgpu_rsq_clamped:
6702   case AMDGPU::BI__builtin_amdgpu_rsq_clampedf:
6703     return emitUnaryFPBuiltin(*this, E, Intrinsic::AMDGPU_rsq_clamped);
6704   case AMDGPU::BI__builtin_amdgpu_ldexp:
6705   case AMDGPU::BI__builtin_amdgpu_ldexpf:
6706     return emitFPIntBuiltin(*this, E, Intrinsic::AMDGPU_ldexp);
6707   case AMDGPU::BI__builtin_amdgpu_class:
6708   case AMDGPU::BI__builtin_amdgpu_classf:
6709     return emitFPIntBuiltin(*this, E, Intrinsic::AMDGPU_class);
6710    default:
6711     return nullptr;
6712   }
6713 }
6714 
6715 /// Handle a SystemZ function in which the final argument is a pointer
6716 /// to an int that receives the post-instruction CC value.  At the LLVM level
6717 /// this is represented as a function that returns a {result, cc} pair.
6718 static Value *EmitSystemZIntrinsicWithCC(CodeGenFunction &CGF,
6719                                          unsigned IntrinsicID,
6720                                          const CallExpr *E) {
6721   unsigned NumArgs = E->getNumArgs() - 1;
6722   SmallVector<Value *, 8> Args(NumArgs);
6723   for (unsigned I = 0; I < NumArgs; ++I)
6724     Args[I] = CGF.EmitScalarExpr(E->getArg(I));
6725   Address CCPtr = CGF.EmitPointerWithAlignment(E->getArg(NumArgs));
6726   Value *F = CGF.CGM.getIntrinsic(IntrinsicID);
6727   Value *Call = CGF.Builder.CreateCall(F, Args);
6728   Value *CC = CGF.Builder.CreateExtractValue(Call, 1);
6729   CGF.Builder.CreateStore(CC, CCPtr);
6730   return CGF.Builder.CreateExtractValue(Call, 0);
6731 }
6732 
6733 Value *CodeGenFunction::EmitSystemZBuiltinExpr(unsigned BuiltinID,
6734                                                const CallExpr *E) {
6735   switch (BuiltinID) {
6736   case SystemZ::BI__builtin_tbegin: {
6737     Value *TDB = EmitScalarExpr(E->getArg(0));
6738     Value *Control = llvm::ConstantInt::get(Int32Ty, 0xff0c);
6739     Value *F = CGM.getIntrinsic(Intrinsic::s390_tbegin);
6740     return Builder.CreateCall(F, {TDB, Control});
6741   }
6742   case SystemZ::BI__builtin_tbegin_nofloat: {
6743     Value *TDB = EmitScalarExpr(E->getArg(0));
6744     Value *Control = llvm::ConstantInt::get(Int32Ty, 0xff0c);
6745     Value *F = CGM.getIntrinsic(Intrinsic::s390_tbegin_nofloat);
6746     return Builder.CreateCall(F, {TDB, Control});
6747   }
6748   case SystemZ::BI__builtin_tbeginc: {
6749     Value *TDB = llvm::ConstantPointerNull::get(Int8PtrTy);
6750     Value *Control = llvm::ConstantInt::get(Int32Ty, 0xff08);
6751     Value *F = CGM.getIntrinsic(Intrinsic::s390_tbeginc);
6752     return Builder.CreateCall(F, {TDB, Control});
6753   }
6754   case SystemZ::BI__builtin_tabort: {
6755     Value *Data = EmitScalarExpr(E->getArg(0));
6756     Value *F = CGM.getIntrinsic(Intrinsic::s390_tabort);
6757     return Builder.CreateCall(F, Builder.CreateSExt(Data, Int64Ty, "tabort"));
6758   }
6759   case SystemZ::BI__builtin_non_tx_store: {
6760     Value *Address = EmitScalarExpr(E->getArg(0));
6761     Value *Data = EmitScalarExpr(E->getArg(1));
6762     Value *F = CGM.getIntrinsic(Intrinsic::s390_ntstg);
6763     return Builder.CreateCall(F, {Data, Address});
6764   }
6765 
6766   // Vector builtins.  Note that most vector builtins are mapped automatically
6767   // to target-specific LLVM intrinsics.  The ones handled specially here can
6768   // be represented via standard LLVM IR, which is preferable to enable common
6769   // LLVM optimizations.
6770 
6771   case SystemZ::BI__builtin_s390_vpopctb:
6772   case SystemZ::BI__builtin_s390_vpopcth:
6773   case SystemZ::BI__builtin_s390_vpopctf:
6774   case SystemZ::BI__builtin_s390_vpopctg: {
6775     llvm::Type *ResultType = ConvertType(E->getType());
6776     Value *X = EmitScalarExpr(E->getArg(0));
6777     Function *F = CGM.getIntrinsic(Intrinsic::ctpop, ResultType);
6778     return Builder.CreateCall(F, X);
6779   }
6780 
6781   case SystemZ::BI__builtin_s390_vclzb:
6782   case SystemZ::BI__builtin_s390_vclzh:
6783   case SystemZ::BI__builtin_s390_vclzf:
6784   case SystemZ::BI__builtin_s390_vclzg: {
6785     llvm::Type *ResultType = ConvertType(E->getType());
6786     Value *X = EmitScalarExpr(E->getArg(0));
6787     Value *Undef = ConstantInt::get(Builder.getInt1Ty(), false);
6788     Function *F = CGM.getIntrinsic(Intrinsic::ctlz, ResultType);
6789     return Builder.CreateCall(F, {X, Undef});
6790   }
6791 
6792   case SystemZ::BI__builtin_s390_vctzb:
6793   case SystemZ::BI__builtin_s390_vctzh:
6794   case SystemZ::BI__builtin_s390_vctzf:
6795   case SystemZ::BI__builtin_s390_vctzg: {
6796     llvm::Type *ResultType = ConvertType(E->getType());
6797     Value *X = EmitScalarExpr(E->getArg(0));
6798     Value *Undef = ConstantInt::get(Builder.getInt1Ty(), false);
6799     Function *F = CGM.getIntrinsic(Intrinsic::cttz, ResultType);
6800     return Builder.CreateCall(F, {X, Undef});
6801   }
6802 
6803   case SystemZ::BI__builtin_s390_vfsqdb: {
6804     llvm::Type *ResultType = ConvertType(E->getType());
6805     Value *X = EmitScalarExpr(E->getArg(0));
6806     Function *F = CGM.getIntrinsic(Intrinsic::sqrt, ResultType);
6807     return Builder.CreateCall(F, X);
6808   }
6809   case SystemZ::BI__builtin_s390_vfmadb: {
6810     llvm::Type *ResultType = ConvertType(E->getType());
6811     Value *X = EmitScalarExpr(E->getArg(0));
6812     Value *Y = EmitScalarExpr(E->getArg(1));
6813     Value *Z = EmitScalarExpr(E->getArg(2));
6814     Function *F = CGM.getIntrinsic(Intrinsic::fma, ResultType);
6815     return Builder.CreateCall(F, {X, Y, Z});
6816   }
6817   case SystemZ::BI__builtin_s390_vfmsdb: {
6818     llvm::Type *ResultType = ConvertType(E->getType());
6819     Value *X = EmitScalarExpr(E->getArg(0));
6820     Value *Y = EmitScalarExpr(E->getArg(1));
6821     Value *Z = EmitScalarExpr(E->getArg(2));
6822     Value *Zero = llvm::ConstantFP::getZeroValueForNegation(ResultType);
6823     Function *F = CGM.getIntrinsic(Intrinsic::fma, ResultType);
6824     return Builder.CreateCall(F, {X, Y, Builder.CreateFSub(Zero, Z, "sub")});
6825   }
6826   case SystemZ::BI__builtin_s390_vflpdb: {
6827     llvm::Type *ResultType = ConvertType(E->getType());
6828     Value *X = EmitScalarExpr(E->getArg(0));
6829     Function *F = CGM.getIntrinsic(Intrinsic::fabs, ResultType);
6830     return Builder.CreateCall(F, X);
6831   }
6832   case SystemZ::BI__builtin_s390_vflndb: {
6833     llvm::Type *ResultType = ConvertType(E->getType());
6834     Value *X = EmitScalarExpr(E->getArg(0));
6835     Value *Zero = llvm::ConstantFP::getZeroValueForNegation(ResultType);
6836     Function *F = CGM.getIntrinsic(Intrinsic::fabs, ResultType);
6837     return Builder.CreateFSub(Zero, Builder.CreateCall(F, X), "sub");
6838   }
6839   case SystemZ::BI__builtin_s390_vfidb: {
6840     llvm::Type *ResultType = ConvertType(E->getType());
6841     Value *X = EmitScalarExpr(E->getArg(0));
6842     // Constant-fold the M4 and M5 mask arguments.
6843     llvm::APSInt M4, M5;
6844     bool IsConstM4 = E->getArg(1)->isIntegerConstantExpr(M4, getContext());
6845     bool IsConstM5 = E->getArg(2)->isIntegerConstantExpr(M5, getContext());
6846     assert(IsConstM4 && IsConstM5 && "Constant arg isn't actually constant?");
6847     (void)IsConstM4; (void)IsConstM5;
6848     // Check whether this instance of vfidb can be represented via a LLVM
6849     // standard intrinsic.  We only support some combinations of M4 and M5.
6850     Intrinsic::ID ID = Intrinsic::not_intrinsic;
6851     switch (M4.getZExtValue()) {
6852     default: break;
6853     case 0:  // IEEE-inexact exception allowed
6854       switch (M5.getZExtValue()) {
6855       default: break;
6856       case 0: ID = Intrinsic::rint; break;
6857       }
6858       break;
6859     case 4:  // IEEE-inexact exception suppressed
6860       switch (M5.getZExtValue()) {
6861       default: break;
6862       case 0: ID = Intrinsic::nearbyint; break;
6863       case 1: ID = Intrinsic::round; break;
6864       case 5: ID = Intrinsic::trunc; break;
6865       case 6: ID = Intrinsic::ceil; break;
6866       case 7: ID = Intrinsic::floor; break;
6867       }
6868       break;
6869     }
6870     if (ID != Intrinsic::not_intrinsic) {
6871       Function *F = CGM.getIntrinsic(ID, ResultType);
6872       return Builder.CreateCall(F, X);
6873     }
6874     Function *F = CGM.getIntrinsic(Intrinsic::s390_vfidb);
6875     Value *M4Value = llvm::ConstantInt::get(getLLVMContext(), M4);
6876     Value *M5Value = llvm::ConstantInt::get(getLLVMContext(), M5);
6877     return Builder.CreateCall(F, {X, M4Value, M5Value});
6878   }
6879 
6880   // Vector intrisincs that output the post-instruction CC value.
6881 
6882 #define INTRINSIC_WITH_CC(NAME) \
6883     case SystemZ::BI__builtin_##NAME: \
6884       return EmitSystemZIntrinsicWithCC(*this, Intrinsic::NAME, E)
6885 
6886   INTRINSIC_WITH_CC(s390_vpkshs);
6887   INTRINSIC_WITH_CC(s390_vpksfs);
6888   INTRINSIC_WITH_CC(s390_vpksgs);
6889 
6890   INTRINSIC_WITH_CC(s390_vpklshs);
6891   INTRINSIC_WITH_CC(s390_vpklsfs);
6892   INTRINSIC_WITH_CC(s390_vpklsgs);
6893 
6894   INTRINSIC_WITH_CC(s390_vceqbs);
6895   INTRINSIC_WITH_CC(s390_vceqhs);
6896   INTRINSIC_WITH_CC(s390_vceqfs);
6897   INTRINSIC_WITH_CC(s390_vceqgs);
6898 
6899   INTRINSIC_WITH_CC(s390_vchbs);
6900   INTRINSIC_WITH_CC(s390_vchhs);
6901   INTRINSIC_WITH_CC(s390_vchfs);
6902   INTRINSIC_WITH_CC(s390_vchgs);
6903 
6904   INTRINSIC_WITH_CC(s390_vchlbs);
6905   INTRINSIC_WITH_CC(s390_vchlhs);
6906   INTRINSIC_WITH_CC(s390_vchlfs);
6907   INTRINSIC_WITH_CC(s390_vchlgs);
6908 
6909   INTRINSIC_WITH_CC(s390_vfaebs);
6910   INTRINSIC_WITH_CC(s390_vfaehs);
6911   INTRINSIC_WITH_CC(s390_vfaefs);
6912 
6913   INTRINSIC_WITH_CC(s390_vfaezbs);
6914   INTRINSIC_WITH_CC(s390_vfaezhs);
6915   INTRINSIC_WITH_CC(s390_vfaezfs);
6916 
6917   INTRINSIC_WITH_CC(s390_vfeebs);
6918   INTRINSIC_WITH_CC(s390_vfeehs);
6919   INTRINSIC_WITH_CC(s390_vfeefs);
6920 
6921   INTRINSIC_WITH_CC(s390_vfeezbs);
6922   INTRINSIC_WITH_CC(s390_vfeezhs);
6923   INTRINSIC_WITH_CC(s390_vfeezfs);
6924 
6925   INTRINSIC_WITH_CC(s390_vfenebs);
6926   INTRINSIC_WITH_CC(s390_vfenehs);
6927   INTRINSIC_WITH_CC(s390_vfenefs);
6928 
6929   INTRINSIC_WITH_CC(s390_vfenezbs);
6930   INTRINSIC_WITH_CC(s390_vfenezhs);
6931   INTRINSIC_WITH_CC(s390_vfenezfs);
6932 
6933   INTRINSIC_WITH_CC(s390_vistrbs);
6934   INTRINSIC_WITH_CC(s390_vistrhs);
6935   INTRINSIC_WITH_CC(s390_vistrfs);
6936 
6937   INTRINSIC_WITH_CC(s390_vstrcbs);
6938   INTRINSIC_WITH_CC(s390_vstrchs);
6939   INTRINSIC_WITH_CC(s390_vstrcfs);
6940 
6941   INTRINSIC_WITH_CC(s390_vstrczbs);
6942   INTRINSIC_WITH_CC(s390_vstrczhs);
6943   INTRINSIC_WITH_CC(s390_vstrczfs);
6944 
6945   INTRINSIC_WITH_CC(s390_vfcedbs);
6946   INTRINSIC_WITH_CC(s390_vfchdbs);
6947   INTRINSIC_WITH_CC(s390_vfchedbs);
6948 
6949   INTRINSIC_WITH_CC(s390_vftcidb);
6950 
6951 #undef INTRINSIC_WITH_CC
6952 
6953   default:
6954     return nullptr;
6955   }
6956 }
6957 
6958 Value *CodeGenFunction::EmitNVPTXBuiltinExpr(unsigned BuiltinID,
6959                                              const CallExpr *E) {
6960   switch (BuiltinID) {
6961   case NVPTX::BI__nvvm_atom_add_gen_i:
6962   case NVPTX::BI__nvvm_atom_add_gen_l:
6963   case NVPTX::BI__nvvm_atom_add_gen_ll:
6964     return MakeBinaryAtomicValue(*this, llvm::AtomicRMWInst::Add, E);
6965 
6966   case NVPTX::BI__nvvm_atom_sub_gen_i:
6967   case NVPTX::BI__nvvm_atom_sub_gen_l:
6968   case NVPTX::BI__nvvm_atom_sub_gen_ll:
6969     return MakeBinaryAtomicValue(*this, llvm::AtomicRMWInst::Sub, E);
6970 
6971   case NVPTX::BI__nvvm_atom_and_gen_i:
6972   case NVPTX::BI__nvvm_atom_and_gen_l:
6973   case NVPTX::BI__nvvm_atom_and_gen_ll:
6974     return MakeBinaryAtomicValue(*this, llvm::AtomicRMWInst::And, E);
6975 
6976   case NVPTX::BI__nvvm_atom_or_gen_i:
6977   case NVPTX::BI__nvvm_atom_or_gen_l:
6978   case NVPTX::BI__nvvm_atom_or_gen_ll:
6979     return MakeBinaryAtomicValue(*this, llvm::AtomicRMWInst::Or, E);
6980 
6981   case NVPTX::BI__nvvm_atom_xor_gen_i:
6982   case NVPTX::BI__nvvm_atom_xor_gen_l:
6983   case NVPTX::BI__nvvm_atom_xor_gen_ll:
6984     return MakeBinaryAtomicValue(*this, llvm::AtomicRMWInst::Xor, E);
6985 
6986   case NVPTX::BI__nvvm_atom_xchg_gen_i:
6987   case NVPTX::BI__nvvm_atom_xchg_gen_l:
6988   case NVPTX::BI__nvvm_atom_xchg_gen_ll:
6989     return MakeBinaryAtomicValue(*this, llvm::AtomicRMWInst::Xchg, E);
6990 
6991   case NVPTX::BI__nvvm_atom_max_gen_i:
6992   case NVPTX::BI__nvvm_atom_max_gen_l:
6993   case NVPTX::BI__nvvm_atom_max_gen_ll:
6994     return MakeBinaryAtomicValue(*this, llvm::AtomicRMWInst::Max, E);
6995 
6996   case NVPTX::BI__nvvm_atom_max_gen_ui:
6997   case NVPTX::BI__nvvm_atom_max_gen_ul:
6998   case NVPTX::BI__nvvm_atom_max_gen_ull:
6999     return MakeBinaryAtomicValue(*this, llvm::AtomicRMWInst::UMax, E);
7000 
7001   case NVPTX::BI__nvvm_atom_min_gen_i:
7002   case NVPTX::BI__nvvm_atom_min_gen_l:
7003   case NVPTX::BI__nvvm_atom_min_gen_ll:
7004     return MakeBinaryAtomicValue(*this, llvm::AtomicRMWInst::Min, E);
7005 
7006   case NVPTX::BI__nvvm_atom_min_gen_ui:
7007   case NVPTX::BI__nvvm_atom_min_gen_ul:
7008   case NVPTX::BI__nvvm_atom_min_gen_ull:
7009     return MakeBinaryAtomicValue(*this, llvm::AtomicRMWInst::UMin, E);
7010 
7011   case NVPTX::BI__nvvm_atom_cas_gen_i:
7012   case NVPTX::BI__nvvm_atom_cas_gen_l:
7013   case NVPTX::BI__nvvm_atom_cas_gen_ll:
7014     return MakeAtomicCmpXchgValue(*this, E, true);
7015 
7016   case NVPTX::BI__nvvm_atom_add_gen_f: {
7017     Value *Ptr = EmitScalarExpr(E->getArg(0));
7018     Value *Val = EmitScalarExpr(E->getArg(1));
7019     // atomicrmw only deals with integer arguments so we need to use
7020     // LLVM's nvvm_atomic_load_add_f32 intrinsic for that.
7021     Value *FnALAF32 =
7022         CGM.getIntrinsic(Intrinsic::nvvm_atomic_load_add_f32, Ptr->getType());
7023     return Builder.CreateCall(FnALAF32, {Ptr, Val});
7024   }
7025 
7026   default:
7027     return nullptr;
7028   }
7029 }
7030 
7031 Value *CodeGenFunction::EmitWebAssemblyBuiltinExpr(unsigned BuiltinID,
7032                                                    const CallExpr *E) {
7033   switch (BuiltinID) {
7034   case WebAssembly::BI__builtin_wasm_page_size: {
7035     llvm::Type *ResultType = ConvertType(E->getType());
7036     Value *Callee = CGM.getIntrinsic(Intrinsic::wasm_page_size, ResultType);
7037     return Builder.CreateCall(Callee);
7038   }
7039 
7040   default:
7041     return nullptr;
7042   }
7043 }
7044